## This model constuct a networkx graph from data using the following nodes

### Nodes:
- Station

### Edges:
- Trip: bike trip between stations

In [15]:
import pandas as pd
import networkx as nx
from pprint import pprint
from graphgen import fix_columns, create_graph

In [3]:
trips_filename = '../data//201508_trip_data.csv'
stations_filename = '../data/201508_station_data.csv'

trips_df = pd.read_csv(trips_filename)
stations_df = pd.read_csv(stations_filename)

# if columns have spaces in their names we need to replace them with underscore
# fix_columns(trips_df)
# fix_columns(stations_df)


In [4]:
print(trips_df.columns)
# display(trips_df.head())
print(stations_df.columns)

Index(['Trip ID', 'Duration', 'Start Date', 'Start Station', 'Start Terminal',
       'End Date', 'End Station', 'End Terminal', 'Bike #', 'Subscriber Type',
       'Zip Code'],
      dtype='object')
Index(['station_id', 'name', 'lat', 'long', 'dockcount', 'landmark',
       'installation'],
      dtype='object')


In [5]:

station_mapper = {
    'nodes': [
        {
            'type'      : 'Station',
            'key'       : [
                {'name': 'id', 'raw': 'station_id'}
            ],
            'attributes': [
                {'name': 'id',       'raw': 'station_id'},
                {'name': 'name',     'raw': 'name'},
                {'name': 'lat',      'raw': 'lat'},
                {'name': 'long',     'raw': 'long'},
                {'name': 'landmark', 'raw': 'landmark'}
            ]
        },
    ]
}

edges_mapper = {
    'edges': [
        {
            'type'      : 'Trip',
            'from'      : {
                'type': 'Station',
                'key' : [
                    {'name': 'id', 'raw': 'Start Terminal'}
                ]
            },
            'to'        : {
                'type' : 'Station',
                'key'  : [
                    {'name': 'id', 'raw': 'End Terminal'}
                ]
            },
            'attributes': [
                {'name': 'trip_id',    'raw': 'Trip ID'}, 
                {'name': 'start_date', 'raw': 'Start Date'}, 
                {'name': 'end_date',   'raw': 'End Date'}
            ]
        }
    ]
}

In [6]:
# %%time

# construct a bidirectional multi-edge graph object
g = nx.MultiDiGraph()

g = create_graph(g, graph_mapper = station_mapper, 
                 data_provider = stations_df)

g = create_graph(g, graph_mapper = edges_mapper, 
                 data_provider = trips_df)

In [7]:
nx.number_of_nodes(g)

70

In [8]:
nx.number_of_edges(g)

354152

In [10]:
print(g.node['Station_2'])

{'_type_': 'Station', 'id': 2, 'name': 'San Jose Diridon Caltrain Station', 'lat': 37.329732, 'long': -121.90178200000001, 'landmark': 'San Jose'}


In [16]:
pprint(g.get_edge_data('Station_2', 'Station_16'))

{0: {'_type_': 'Trip',
     'end_date': '8/31/2015 18:56',
     'start_date': '8/31/2015 18:43',
     'trip_id': 913217},
 1: {'_type_': 'Trip',
     'end_date': '8/28/2015 18:40',
     'start_date': '8/28/2015 18:25',
     'trip_id': 910533},
 2: {'_type_': 'Trip',
     'end_date': '8/26/2015 18:44',
     'start_date': '8/26/2015 18:31',
     'trip_id': 907018},
 3: {'_type_': 'Trip',
     'end_date': '8/26/2015 17:28',
     'start_date': '8/26/2015 17:14',
     'trip_id': 906737},
 4: {'_type_': 'Trip',
     'end_date': '8/25/2015 18:43',
     'start_date': '8/25/2015 18:27',
     'trip_id': 905083},
 5: {'_type_': 'Trip',
     'end_date': '8/17/2015 20:39',
     'start_date': '8/17/2015 20:15',
     'trip_id': 893902},
 6: {'_type_': 'Trip',
     'end_date': '8/12/2015 19:06',
     'start_date': '8/12/2015 18:49',
     'trip_id': 887403},
 7: {'_type_': 'Trip',
     'end_date': '8/11/2015 18:55',
     'start_date': '8/11/2015 18:42',
     'trip_id': 885595},
 8: {'_type_': 'Trip',
 