In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [39]:
edges = pd.read_csv('fairfax_links.csv')
nodes = pd.read_csv('fairfax_nodes.csv')
display(edges.head(1))
display(nodes.head(1))

Unnamed: 0,eid,osmid,length,type,lanes,maxmph,capacity,fft,nid_s,nid_e,geometry
0,0,7714200,175.637,residential,1,25,950,15.715577,0,2,"LINESTRING (-122.7694487 38.4353358, -122.7692..."


Unnamed: 0,nid,osmid,lon,lat,type
0,0,56098817,-122.769449,38.435336,real


In [40]:
removed_edges = edges[edges['length']<=20].copy()
print(edges.shape, removed_edges.shape)
removed_node_grp = {}
grp_id = 0
for edge in removed_edges.itertuples():
    nid_s = getattr(edge, 'nid_s')
    nid_e = getattr(edge, 'nid_e')
    try:
        nid_s_grp = removed_node_grp[nid_s]
    except KeyError:
        nid_s_grp = grp_id
    try:
        nid_e_grp = removed_node_grp[nid_e]
    except KeyError:
        nid_e_grp = grp_id
    nid_se_grp_id = min(nid_s_grp, nid_e_grp)
    removed_node_grp[nid_s] = nid_se_grp_id
    removed_node_grp[nid_e] = nid_se_grp_id
    if nid_se_grp_id == grp_id: grp_id += 1
removed_node_grp_df = pd.DataFrame(removed_node_grp.items(), columns=['nid', 'node_grp'])
removed_node_grp_df['node_grp'] = removed_node_grp_df['node_grp'].apply(lambda x: 'g{}'.format(x))
print(nodes.shape, removed_node_grp_df.shape)
removed_node_grp_df.tail()

(549010, 11) (31444, 11)
(224224, 5) (31720, 2)


Unnamed: 0,nid,node_grp
31715,224076,g14136
31716,224122,g14137
31717,36587,g14137
31718,224124,g6211
31719,224147,g6211


In [48]:
new_nodes = pd.merge(nodes, removed_node_grp_df, how='left', on='nid')
new_nodes['node_grp'] = np.where(
    pd.isnull(new_nodes['node_grp']), new_nodes['nid'], new_nodes['node_grp'])
new_nodes = new_nodes.groupby('node_grp').agg({'lon': np.mean, 'lat': np.mean}).reset_index()
print(nodes.shape, new_nodes.shape)

(224224, 5) (206427, 3)


In [51]:
new_edges = edges[edges['length']>20].copy()
new_edges = pd.merge(new_edges, removed_node_grp_df, 
                           how='left', left_on='nid_s', right_on='nid')
new_edges = pd.merge(new_edges, removed_node_grp_df, 
                           how='left', left_on='nid_e', right_on='nid', suffixes=['_ns0', '_ne0'])
new_edges['node_grp_ns0'] = np.where(
    pd.isnull(new_edges['node_grp_ns0']), new_edges['nid_s'], new_edges['node_grp_ns0'])
new_edges['node_grp_ne0'] = np.where(
    pd.isnull(new_edges['node_grp_ne0']), new_edges['nid_e'], new_edges['node_grp_ne0'])
new_edges = new_edges[[
    'nid_s', 'nid_e', 'node_grp_ns0', 'node_grp_ne0', 'length', 'lanes', 'type', 'capacity', 'maxmph', 'geometry'
]]
new_edges = new_edges.loc[new_edges['node_grp_ns0']!=new_edges['node_grp_ne0']]

### update nodes
new_nodes = new_nodes.loc[(
    new_nodes['node_grp'].isin(new_edges['node_grp_ns0'])) | 
    (new_nodes['node_grp'].isin(new_edges['node_grp_ne0']))
]
new_nodes['new_nid'] = np.arange(new_nodes.shape[0])

new_edges = pd.merge(new_edges, new_nodes, how='left', left_on='node_grp_ns0', right_on='node_grp')
new_edges = pd.merge(new_edges, new_nodes, how='left', left_on='node_grp_ne0', right_on='node_grp', 
                           suffixes=['_ns', '_ne'])

geometry_list = []
for edge in new_edges.itertuples():
    geometry = getattr(edge, 'geometry').replace('LINESTRING(','').replace(')', '').split(', ')
    geometry = [tuple(xy.split(' ')) for xy in geometry]
    lon_ns, lat_ns = getattr(edge, 'lon_ns'), getattr(edge, 'lat_ns')
    lon_ne, lat_ne = getattr(edge, 'lon_ne'), getattr(edge, 'lat_ne')
    geometry = [(lon_ns, lat_ns)] + geometry[1:-2] + [(lon_ne, lat_ne)]
    geometry_list.append('LINESTRING('+', '.join('{} {}'.format(xy[0], xy[1]) for xy in geometry)+')')
new_edges['geometry'] = geometry_list
new_edges['start_node_id'] = new_edges['new_nid_ns']
new_edges['end_node_id'] = new_edges['new_nid_ne']
new_edges['nid_s_old'] = new_edges['nid_s']
new_edges['nid_e_old'] = new_edges['nid_e']
new_edges = new_edges[['start_node_id', 'end_node_id', 'nid_s_old', 'nid_e_old',
                                  'length', 'lanes', 'type', 'capacity', 'maxmph', 'geometry']]
new_edges = new_edges.loc[new_edges['start_node_id']!=new_edges['end_node_id']]
new_edges = new_edges.drop_duplicates(subset=['start_node_id', 'end_node_id'])
new_edges['link_id'] = np.arange(new_edges.shape[0])
print(new_edges.shape, edges.shape)
new_edges.tail(1)

(507842, 11) (549010, 11)


Unnamed: 0,start_node_id,end_node_id,nid_s_old,nid_e_old,length,lanes,type,capacity,maxmph,geometry,link_id
513882,119384,192503,139862,224223,1000.0,1000,vl_out,1000000,1000,"LINESTRING(-122.5811396 37.9866327, -122.5823 ...",507841


In [54]:
removed_node_grp_df.to_csv('fairfax_nid_grp_conversion.csv', index=False)
new_nodes.to_csv('new_fairfax_nodes.csv', index=False)
new_edges.to_csv('new_fairfax_links.csv', index=False)

In [64]:
### process OD
node_map = nodes[['nid']].merge(
    removed_node_grp_df, how='left', on='nid')
node_map['node_grp'] = np.where(pd.isnull(node_map['node_grp']), node_map['nid'], node_map['node_grp'])
node_map = pd.merge(node_map, new_nodes[['node_grp', 'new_nid']], how='left', on='node_grp')
#display(node_map[node_map['new_nid']==38788])
node_map_dict = {getattr(n, 'nid'): getattr(n, 'new_nid') for n in node_map.itertuples()}

for od_file in ['fairfax_ods_day', 'fairfax_ods_background']:
    print(od_file)
    sub_od = pd.read_csv(od_file+'.csv')
    sub_od['origin_nid'] = sub_od['origin_node_id'].map(node_map_dict)
    sub_od['destin_nid'] = sub_od['destin_node_id'].map(node_map_dict)
    ### remove ODs whose nodes have been contracted
    remove_ods = np.isnan(sub_od['origin_nid']) | np.isnan(sub_od['destin_nid'])
    display(sub_od[remove_ods].head())
    sub_od = sub_od[~remove_ods]
    print('remove {} ods, keep {} ods'.format(np.sum(remove_ods), sub_od.shape[0]))
    sub_od['origin_nid'] = sub_od['origin_nid'].astype(int)
    sub_od['destin_nid'] = sub_od['destin_nid'].astype(int)
    sub_od.to_csv(od_file+'_new.csv', index=False)

fairfax_ods_day


Unnamed: 0,origin_node_id,destin_node_id,origin_nid,destin_nid


remove 0 ods, keep 4054 ods
fairfax_ods_background


Unnamed: 0,origin_node_id,destin_node_id,departure_hour,departure_quarter,origin_nid,destin_nid
3263,33973,110406,6,3,,94842.0
6661,123469,58611,7,1,105634.0,
12848,33973,118438,6,3,,101442.0
13233,58611,194516,7,0,,194017.0
16214,134244,58611,8,1,114707.0,


remove 36 ods, keep 112113 ods
