In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('network_nodes.csv')
data.head()

Unnamed: 0,id,label,x_fa2,y_fa2
0,8500010,Basel SBB,-1670.112915,-969.267761
1,8500016,Basel St. Johann,-1695.203003,-977.505127
2,8500020,Muttenz,-1680.089722,-971.303772
3,8500021,Pratteln,-1694.761597,-960.146729
4,8500023,Liestal,-1618.025269,-961.962708


In [3]:
stops = pd.read_csv('stops.csv')
stops.head()

Unnamed: 0,stop_id,stop_name,stop_lon,stop_lat
0,8500010,Basel SBB,2611363.0,1266310.0
1,8508319,Horw,2666022.0,1207746.0
2,8502273,Bremgarten,2668595.0,1244915.0
3,8502274,Zufikon,2669212.0,1245040.0
4,8502275,Widen Heinrüti,2669215.0,1246079.0


In [4]:
merged_data = pd.merge(data, stops, left_on='label', right_on='stop_name')
merged_data.drop(columns=['stop_id', 'stop_name'], inplace=True)
merged_data.rename(columns={'stop_lon': 'x_geo', 'stop_lat': 'y_geo'}, inplace=True)
merged_data.head()

Unnamed: 0,id,label,x_fa2,y_fa2,x_geo,y_geo
0,8500010,Basel SBB,-1670.112915,-969.267761,2611363.0,1266310.0
1,8500016,Basel St. Johann,-1695.203003,-977.505127,2610076.0,1268853.0
2,8500020,Muttenz,-1680.089722,-971.303772,2615758.0,1264783.0
3,8500021,Pratteln,-1694.761597,-960.146729,2618994.0,1263578.0
4,8500023,Liestal,-1618.025269,-961.962708,2622064.0,1259341.0


In [5]:
# Scale all coordinates to be in range [0, 10000]
MIN_SCALE = 0
MAX_SCALE = 10000

min_fa2 = merged_data[['x_fa2', 'y_fa2']].min().min()
max_fa2 = merged_data[['x_fa2', 'y_fa2']].max().max()

min_geo = merged_data[['x_geo', 'y_geo']].min().min()
max_geo = merged_data[['x_geo', 'y_geo']].max().max()

merged_data['x_fa2'] = (merged_data['x_fa2'] - min_fa2) / (max_fa2 - min_fa2) * (MAX_SCALE - MIN_SCALE) + MIN_SCALE
merged_data['y_fa2'] = (merged_data['y_fa2'] - min_fa2) / (max_fa2 - min_fa2) * (MAX_SCALE - MIN_SCALE) + MIN_SCALE

merged_data['x_geo'] = (merged_data['x_geo'] - min_geo) / (max_geo - min_geo) * (MAX_SCALE - MIN_SCALE) + MIN_SCALE
merged_data['y_geo'] = (merged_data['y_geo'] - min_geo) / (max_geo - min_geo) * (MAX_SCALE - MIN_SCALE) + MIN_SCALE

merged_data.head()

Unnamed: 0,id,label,x_fa2,y_fa2,x_geo,y_geo
0,8500010,Basel SBB,3224.670618,3971.930148,8747.787053,1083.724166
1,8500016,Basel St. Johann,3197.918906,3963.147252,8740.453774,1098.214087
2,8500020,Muttenz,3214.033084,3969.7593,8772.829603,1075.023376
3,8500021,Pratteln,3198.389545,3981.655233,8791.268213,1068.15733
4,8500023,Liestal,3280.207836,3979.718988,8808.760961,1044.015059


In [11]:
statistics = pd.read_csv('nodes_statistics.csv')
statistics = statistics[['weighted indegree', 'weighted outdegree', 'Weighted Degree', 'betweenesscentrality']]
statistics.rename(columns={
    'weighted indegree': 'indegree', 
    'weighted outdegree': 'outdegree', 
    'Weighted Degree': 'degree',
    'betweenesscentrality': 'betweenness_centrality',
    }, inplace=True)
statistics.head()

Unnamed: 0,indegree,outdegree,degree,betweenness_centrality
0,0,179568,179568,0.0
1,43662,43930,87592,0.0
2,20739,20603,41342,2740.819048
3,52469,52786,105255,9347.652381
4,10333,10333,20666,7780.25


In [12]:
merged_data = pd.concat([merged_data, statistics], axis=1)
merged_data.head()

Unnamed: 0,id,label,x_fa2,y_fa2,x_geo,y_geo,indegree,outdegree,degree,betweenness_centrality
0,8500010,Basel SBB,3224.670618,3971.930148,8747.787053,1083.724166,0.0,179568.0,179568.0,0.0
1,8500016,Basel St. Johann,3197.918906,3963.147252,8740.453774,1098.214087,43662.0,43930.0,87592.0,0.0
2,8500020,Muttenz,3214.033084,3969.7593,8772.829603,1075.023376,20739.0,20603.0,41342.0,2740.819048
3,8500021,Pratteln,3198.389545,3981.655233,8791.268213,1068.15733,52469.0,52786.0,105255.0,9347.652381
4,8500023,Liestal,3280.207836,3979.718988,8808.760961,1044.015059,10333.0,10333.0,20666.0,7780.25


In [14]:
# write to csv with header
merged_data.to_csv('network_nodes_with_coordinates_and_stats.csv', index=False)