In [None]:
import geopandas as gpd
import shapely.geometry
import numpy as np
import pandas as pd


data_dir = './'

grid = gpd.read_file(data_dir+'capacity_map.geojson',drive='GeoJSON')

newnames = ['capacity_MW', 'section_id', 'section_voltage',
       'circuit_id', 'circuit', 'substation',
       'substation_voltage', 'substation_rating_MVA', 'substation_type',
       'bulk_substation', 'bulk_substation_voltage', 'bulk_substation_rating_MVA',
       'bulk_substation_MW', 'ONLINE_DG_KW', 'IN_QUEUE_DG_KW', 'PHASEDESIGNATION',
       'FERC_JURISDICTION', 'ASO_STUDIES', 'voltage',
       'FEEDS_SEC_NTWRK_CUSTS', 'amps', 'ESRI_OID',
       'SCHEME_3VO_PRESENT', 'BULK_SUB_LOW_VOLT', 'timestamp', 'geometry']
grid.columns=[dict(zip(grid.columns,newnames))[x] for x in grid.columns]
grid = grid.replace({pd.isnull:'',np.nan:'',None:'','N/A':'','^/$':''},regex=True)
cols = ['capacity_MW', 'section_id', 'section_voltage', 'circuit_id', 
        'substation', 'substation_voltage', 'substation_rating_MVA', 'substation_type',
        'bulk_substation', 'bulk_substation_voltage', 'bulk_substation_rating_MVA',
        'amps', 'timestamp','geometry']
grid = grid[cols]


mask = (grid.substation_type=='')&(grid.circuit_id!='TRM-PTC')
grid.loc[mask,'substation_type']='Distribution'
grid.timestamp=pd.to_datetime(grid.timestamp).values.astype(np.int64) // 10 ** 9

lats = []
lons = []
names = []
colors = []

for feature, name, color in zip(grid.geometry, grid.section_id, grid.circuit_id):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        colors = np.append(colors, [color]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)
        colors = np.append(colors, None)

grid_segments = pd.DataFrame([lats,lons,names]).T
grid_segments.columns =['lats','lons','section_id']
grid_segments=grid_segments.merge(grid,on='section_id',how='left')
grid_segments

grid_segments.circuit_id=grid_segments.circuit_id.fillna(method='ffill')
grid_segments=grid_segments[['lats','lons','section_id','circuit_id']]

grid_segments.to_csv(data_dir+'grid_segments.tsv',sep='\t',index=False)


cols = ['circuit_id','amps',
        'substation','substation_rating_MVA','substation_type',
        'bulk_substation','bulk_substation_voltage','bulk_substation_rating_MVA','timestamp']
grid=grid.groupby(cols).agg({
    'section_id':[len,list],
    'capacity_MW':list,
    'section_voltage':list
}).reset_index()

for col in ["amps","substation_rating_MVA" ,"bulk_substation_voltage","bulk_substation_rating_MVA"]:
    grid.loc[grid[col]=='',col]=None

grid.columns = cols + ['sections','section_id','capacity_MW','section_voltage']
grid.to_csv(data_dir+'grid.tsv',sep='\t',index=False)
