In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import point

In [2]:
data = pd.read_csv('../output/weather_outage_merged.csv')


In [3]:
data.SAIDI

0         0.022069
1         0.000184
2         0.000192
3         0.028930
4         0.023663
            ...   
111207    2.600634
111208    0.029760
111209    0.018633
111210    0.000379
111211    0.000938
Name: SAIDI, Length: 111212, dtype: float64

In [4]:
data.columns

Index(['DATE', 'time_out', 'day', 'month', 'year', 'date_in', 'time_in',
       'outage_duration', 'number_of_customers_affected', 'city_town',
       'reason_for_outage', 'Latitude', 'Longitude', 'STATION', 'NAME_x',
       'POPULATION', 'NAME_y', 'LATITUDE', 'LONGITUDE', 'Avg_daily_wind',
       'precipitation', 'snow', 'temp_max', 'temp_min',
       'fastest_2_min_wind_speed', 'YEAR', 'MONTH', 'DAY', 'SAIDI'],
      dtype='object')

In [5]:
#weather stations
stations = data[['LATITUDE', 'LONGITUDE']]
stations = stations.rename(columns = {
    'station latitude': 'lat',
    'station longitude': 'lon',
})
stations.drop_duplicates(inplace=True)

In [6]:
stations.reset_index(inplace=True)

In [7]:
stations

Unnamed: 0,index,LATITUDE,LONGITUDE
0,0,42.55495,-71.75699
1,14,41.25407,-70.05893
2,25,41.87561,-71.02083
3,27,42.36057,-71.00975
4,28,42.2123,-71.1137
5,29,42.27054,-71.87312
6,31,42.5841,-70.9153
7,32,42.71249,-71.12558
8,37,41.67908,-70.95911
9,48,42.19123,-71.17326


In [8]:
stations = gpd.GeoDataFrame(stations, 
                                  geometry=gpd.points_from_xy(stations.LONGITUDE, stations.LATITUDE))

In [9]:
stations.to_file('../output/stations.geojson', driver = 'GeoJSON')

# Spatial Analysis

## top 20 affected towns

In [10]:
 tem = data.groupby(['city_town'])[['SAIDI']].mean().reset_index()
top_20 = tem.nlargest(20, ['SAIDI'])

In [11]:
#getting lat and long
df_final = top_20.merge(data[['Latitude','Longitude','city_town']], on=['city_town'], how='left')

In [12]:
df_final.drop_duplicates(inplace=True)

In [13]:
df_final

Unnamed: 0,city_town,SAIDI,Latitude,Longitude
0,hingham,2.900434,42.2418,-70.8898
8,e. bridgewater,0.729567,42.0334,-70.9592
252,royalston,0.646842,42.6831,-72.19
371,west stockbridge,0.631989,42.3045,-73.3926
505,rockland,0.602534,42.1307,-70.9162
688,west bridgewater,0.575932,42.0219,-71.019
894,hinsdale,0.462892,42.4389,-73.1252
1063,cohasset,0.438217,42.2419,-70.8044
1262,avon,0.418868,42.1279,-71.0403
1370,hanover,0.416614,42.1132,-70.812


In [14]:
outages_top_20 = gpd.GeoDataFrame(df_final, 
                                  geometry=gpd.points_from_xy(df_final.Longitude, df_final.Latitude))


In [15]:
outages_top_20.to_file('../output/outages_top_20.geojson', driver = 'GeoJSON')