In [None]:
import geopandas as gpd
import shapely.geometry
import numpy as np
import pandas as pd

data_dir = './'
roads = gpd . read_file(data_dir+'ArlingtonMA_Road.geojson')

## missing value in last row
mask = roads.seg_id.str.strip().astype(str)!=roads.OBJECTID.astype(str).str.replace('\.0$','',regex=True)
roads.loc[mask,'seg_id']=roads.loc[mask,'OBJECTID'].astype(str).str.replace('\.0$','',regex=True)#.astype(int)


lats = []
lons = []
names = []
colors = []

for feature, name, color in zip(roads.geometry, roads.OBJECTID, roads.CLASS):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        colors = np.append(colors, [color]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)
        colors = np.append(colors, None)

        
road_segments = pd.DataFrame([lats,lons,names]).T
road_segments.columns =['lats','lons','seg_id']
road_segments.astype(float).round({'lats':6,'lons':6,'seg_id':0})
road_segments.seg_id=road_segments.seg_id.astype(str).str.replace('\.0$','',regex=True)

for col in ['NAME_FULL','FROMSTREET','TOSTREETNA','SafeRoute','OneWay']:
    roads[col]=roads[col].str.strip()
for col in ['RT_NUMBER','FUNCTIONAL','MEDIANTYPE','RIGHTSIDEW','LEFTSIDEWA','SPEEDLIMIT']:
    roads[col]=roads[col].str.strip().replace({'':'0'}).astype(int)
for col in ['ADT','ADTYEAR']:
    roads[col]=roads[col].str.strip().replace({'':'0'}).astype(float).astype(str).replace('\.0$','',regex=True).astype(int)
for col in ['LENGTH']:
    roads[col]=roads[col].str.strip().replace({'':'0'}).astype(float)

roads = roads[['seg_id','NAME_ABBRV','FROMSTREET','TOSTREETNA','CLASS','RT_NUMBER','FUNCTIONAL',
       'MEDIANTYPE','MEDIANWIDT','RIGHTSIDEW','LEFTSIDEWA',
       'L_ADDRFROM','L_ADDRTO','R_ADDRFROM','R_ADDRTO',
       'ADT','ADTYEAR','SPEEDLIMIT','SafeRoute','OneWay','LENGTH','MILES']]

roads.columns=\
    [
    'seg_id', 'streetName', 'from_street', 'to_street',
    'class', 'route', 'functional', 'median_type', 'median_width',
    'right_sidewalk', 'left_sidewalk',
    'left_streetNum_from', 'left_streetNum_to','right_streetNum_from', 'right_streetNum_to', 
    'daily_traffic', 'daily_traffic_year', 'speed_limit', 'safe_route', 'oneway',
    'length', 'miles'
]

road_segments=road_segments.merge(roads[['seg_id','class']],on='seg_id',how='left')

road_segments['class']=road_segments['class'].fillna(method='ffill').astype(str)
road_segments.seg_id=road_segments.seg_id.fillna(method='ffill').str.replace('None','')

road_segments.to_csv(data_dir+'road_segments.tsv',sep='\t',index=False)

#roads.columns
cols = ['streetName','from_street','to_street','class']
roads=roads.groupby(cols).agg({
    'seg_id':[len,list],
    #'route':'last',
    #'functional':'last',
    'median_type':'last', 'median_width':'last',
    'right_sidewalk':'last', 'left_sidewalk':'last',
    #'left_streetNum_from':'last', 'left_streetNum_to':'last','right_streetNum_from':'last', 'right_streetNum_to':'last',
    #'daily_traffic':'last', 'daily_traffic_year':'last',
    'speed_limit':'last', 
    'safe_route':'last', 'oneway':'last',
    'length':[sum,list], 'miles':sum
}).reset_index()

roads.columns = ['streetName','from_street','to_street','class','segments','seg_id',
               'median_type','median_width',
               'right_sidewalk','left_sidewalk',
               'speed_limit','safe_route','oneway','length','segment_lengths','miles'
              ]
mask = roads.speed_limit==0
roads.loc[mask,'speed_limit']=25
roads.median_width=roads.median_width.str.strip().replace({'':'0'}).astype(int)
roads.to_csv(data_dir+'roads.tsv',sep='\t',index=False)