## This model constuct a networkx graph from data using the following nodes and edges

### Nodes:
- Station
- Bike

### Edges:
- TripFrom (from Station to bike)
- TripTo   (bike to Station)

In [1]:
import pandas as pd
import networkx as nx
from graphgen import fix_columns, create_graph

In [2]:
trips_filename = '../data//201508_trip_data.csv'
stations_filename = '../data/201508_station_data.csv'

trips_df = pd.read_csv(trips_filename)
stations_df = pd.read_csv(stations_filename)

# if columns have spaces in their names we need to replace them with underscore
fix_columns(trips_df)
fix_columns(stations_df)

In [3]:
station_mapper = {
    'nodes': [
        {
            'type'      : 'Station',
            'key'       : 'station_id',
            'attributes': ['station_id', 'name', 'lat', 'long', 'landmark']
        }
    ]
}

bike_mapper = {
    'nodes': [
        {
            'type'      : 'Bike',
            'key'       : 'Bike_#',
            'attributes': ['Bike_#']
        },
    ]
}

edges_mapper = {
    'edges': [
        {
            'type'      : 'TripFrom',
            'from'      : {
                'key'     :'Start_Terminal',
                'node_key': 'station_id'
            },
            'to'        : {
                'key'     : 'Bike_#',
                'node_key': 'Bike_#'
            },
            'attributes': ['Trip_ID', 'Start_Date']
        },
        {
            'type'      : 'TripTp',
            'from'        : {
                'key'     : 'Bike_#',
                'node_key': 'Bike_#'
            },
            'to'      : {
                'key'     :'End_Terminal',
                'node_key': 'station_id'
            },
            'attributes': ['Trip_ID', 'End_Date']
        }
    ]
}

In [4]:
# make sure that second set of nodes does have a unique IDs
# In this example we'll keep the stations as is, and replace the Bike # with the same number appended with 555
trips_df['Bike_#'] = trips_df['Bike_#'].apply(lambda x : 5550000 + x)

In [5]:
display(trips_df.head())

Unnamed: 0,Trip_ID,Duration,Start_Date,Start_Station,Start_Terminal,End_Date,End_Station,End_Terminal,Bike_#,Subscriber_Type,Zip_Code
0,913460,765,8/31/2015 23:26,Harry Bridges Plaza (Ferry Building),50,8/31/2015 23:39,San Francisco Caltrain (Townsend at 4th),70,5550288,Subscriber,2139
1,913459,1036,8/31/2015 23:11,San Antonio Shopping Center,31,8/31/2015 23:28,Mountain View City Hall,27,5550035,Subscriber,95032
2,913455,307,8/31/2015 23:13,Post at Kearny,47,8/31/2015 23:18,2nd at South Park,64,5550468,Subscriber,94107
3,913454,409,8/31/2015 23:10,San Jose City Hall,10,8/31/2015 23:17,San Salvador at 1st,8,5550068,Subscriber,95113
4,913453,789,8/31/2015 23:09,Embarcadero at Folsom,51,8/31/2015 23:22,Embarcadero at Sansome,60,5550487,Customer,9069


In [11]:

# construct a bidirectional multi-edge graph object
g = nx.MultiDiGraph()

%time g = create_graph(g, graph_mapper = station_mapper, \
                 data_provider = stations_df, update=False)

%time g = create_graph(g, graph_mapper = bike_mapper, \
                 data_provider = trips_df, update=False)

%time g = create_graph(g, graph_mapper = edges_mapper, \
                 data_provider = trips_df)

70
CPU times: user 2.41 ms, sys: 0 ns, total: 2.41 ms
Wall time: 2.42 ms
668
CPU times: user 654 ms, sys: 2.02 ms, total: 656 ms
Wall time: 657 ms
CPU times: user 3.59 s, sys: 89.9 ms, total: 3.68 s
Wall time: 3.68 s


In [12]:
# print(g.get_edge_data(50, 5550288))

{0: {'_type_': 'TripFrom', 'Trip_ID': 913460, 'Start_Date': '8/31/2015 23:26'}, 1: {'_type_': 'TripFrom', 'Trip_ID': 912762, 'Start_Date': '8/31/2015 16:47'}, 2: {'_type_': 'TripFrom', 'Trip_ID': 904150, 'Start_Date': '8/25/2015 9:49'}, 3: {'_type_': 'TripFrom', 'Trip_ID': 861625, 'Start_Date': '7/24/2015 22:04'}, 4: {'_type_': 'TripFrom', 'Trip_ID': 857373, 'Start_Date': '7/22/2015 13:37'}, 5: {'_type_': 'TripFrom', 'Trip_ID': 847865, 'Start_Date': '7/15/2015 15:47'}, 6: {'_type_': 'TripFrom', 'Trip_ID': 836035, 'Start_Date': '7/7/2015 10:55'}, 7: {'_type_': 'TripFrom', 'Trip_ID': 833282, 'Start_Date': '7/4/2015 15:58'}, 8: {'_type_': 'TripFrom', 'Trip_ID': 824284, 'Start_Date': '6/26/2015 17:23'}, 9: {'_type_': 'TripFrom', 'Trip_ID': 770041, 'Start_Date': '5/17/2015 10:38'}, 10: {'_type_': 'TripFrom', 'Trip_ID': 752024, 'Start_Date': '5/4/2015 13:59'}, 11: {'_type_': 'TripFrom', 'Trip_ID': 747377, 'Start_Date': '4/30/2015 9:26'}, 12: {'_type_': 'TripFrom', 'Trip_ID': 741039, 'Start_D