# Bike Network Analysis

## Data preperation

In [141]:
# import modules
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [128]:
# read data
path_to_data = "./data/NYC-CitiBike-2016.csv"
df = pd.read_csv(path_to_data)
df.columns
df.columns = df.columns.str.replace(" ", "_")
df.head()

Unnamed: 0,tripduration,starttime,stoptime,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,839,1/1/2016 00:09:55,1/1/2016 00:23:54,532,S 5 Pl & S 4 St,40.710451,-73.960876,401,Allen St & Rivington St,40.720196,-73.989978,17109,Customer,,0
1,686,1/1/2016 00:21:17,1/1/2016 00:32:44,3143,5 Ave & E 78 St,40.776829,-73.963888,3132,E 59 St & Madison Ave,40.763505,-73.971092,23514,Subscriber,1960.0,1
2,315,1/1/2016 00:33:11,1/1/2016 00:38:26,3164,Columbus Ave & W 72 St,40.777057,-73.978985,3178,Riverside Dr & W 78 St,40.784145,-73.983625,14536,Subscriber,1971.0,1
3,739,1/1/2016 00:40:51,1/1/2016 00:53:11,223,W 13 St & 7 Ave,40.737815,-73.999947,276,Duane St & Greenwich St,40.717488,-74.010455,24062,Subscriber,1969.0,1
4,1253,1/1/2016 00:44:16,1/1/2016 01:05:09,484,W 44 St & 5 Ave,40.755003,-73.980144,151,Cleveland Pl & Spring St,40.722104,-73.997249,16380,Customer,,0


In [129]:
df_graph = df.groupby(["start_station_id", "end_station_id"]).size().reset_index()
df_graph.rename(columns={0:"weight"}, inplace=True)
df_graph.head()

Unnamed: 0,start_station_id,end_station_id,weight
0,72,72,9
1,72,116,2
2,72,127,6
3,72,128,3
4,72,146,1


## Create the network

In [131]:
# creating an empty graph object
graph_object = nx.Graph()

In [132]:
# iterating through the DataFrame to add edges
for _, edge in df_graph.iterrows():
    graph_object.add_edge(edge['start_station_id'], 
                          edge['end_station_id'],
                          weight=edge['weight'])

### Degree Centrality

In [154]:
# calculation the degree centrality
dict_degree_centrality = nx.degree_centrality(graph_object)
print(dict_degree_centrality)
pd.DataFrame(dict_degree_centrality)

{72: 0.39783281733746134, 116: 0.4256965944272446, 127: 0.4628482972136223, 128: 0.5123839009287926, 146: 0.3622291021671827, 147: 0.44891640866873067, 151: 0.5526315789473685, 152: 0.3746130030959752, 153: 0.4705882352941177, 167: 0.47678018575851394, 168: 0.48297213622291024, 173: 0.4628482972136223, 195: 0.4473684210526316, 212: 0.4256965944272446, 225: 0.4179566563467493, 229: 0.5170278637770899, 236: 0.5170278637770899, 237: 0.48606811145510836, 238: 0.38544891640866874, 247: 0.3885448916408669, 248: 0.2647058823529412, 251: 0.5170278637770899, 252: 0.38544891640866874, 254: 0.39009287925696595, 257: 0.39318885448916413, 260: 0.281733746130031, 267: 0.34674922600619196, 280: 0.39473684210526316, 281: 0.4303405572755418, 284: 0.49845201238390097, 285: 0.5263157894736843, 293: 0.4210526315789474, 303: 0.44272445820433437, 304: 0.4643962848297214, 309: 0.3715170278637771, 312: 0.48297213622291024, 315: 0.3823529411764706, 319: 0.3993808049535604, 327: 0.44582043343653255, 328: 0.3482

ValueError: If using all scalar values, you must pass an index

In [150]:
# sorting the stations according to their degree centrality and returning the top 10
top_stations = sorted(degree_centrality.items(), 
                      key=lambda x:x[1], 
                      reverse=True)[0:10]
pd.DataFrame(top_stations)

Unnamed: 0,0,1
0,519,0.591331
1,402,0.568111
2,497,0.566563
3,151,0.552632
4,426,0.532508
5,490,0.53096
6,285,0.526316
7,229,0.517028
8,236,0.517028
9,251,0.517028


### Page Rank

In [None]:
# calculation the Google page rank
page_rank = nx.pagerank(graph_object)
df_page_rank

{72: 0.001857156720241571,
 116: 0.0027543277296689203,
 127: 0.003298951685262413,
 128: 0.0034095777560417596,
 146: 0.001724209864708314,
 147: 0.002792091294474884,
 151: 0.004465288470764093,
 152: 0.0014992256109040577,
 153: 0.0024695423793692456,
 167: 0.002557538821291907,
 168: 0.0036550400542110562,
 173: 0.002829049702264442,
 195: 0.0022993932577171122,
 212: 0.0024963012006969255,
 225: 0.0022743833055646565,
 229: 0.003596624713281709,
 236: 0.00292027079489846,
 237: 0.002879588353955309,
 238: 0.0018705724981685668,
 247: 0.0016535035395046865,
 248: 0.0009484902611905367,
 251: 0.0031114679643739628,
 252: 0.0019273013859558118,
 254: 0.001672063373041601,
 257: 0.001937545130297382,
 260: 0.0011676690696677773,
 267: 0.0014122554248178387,
 280: 0.0018148594383293633,
 281: 0.003117862373358979,
 284: 0.004026457108260649,
 285: 0.004449223064225229,
 293: 0.0025131205014197757,
 303: 0.0021046934199379884,
 304: 0.0026209542724638527,
 309: 0.0018127585388221642,
 3

In [155]:
# https://www.kaggle.com/mmmarchetti/game-of-thrones-network-analysis

In [156]:
# https://campus.datacamp.com/courses/introduction-to-network-analysis-in-python/introduction-to-networks?ex=4