## This model constuct a networkx graph from data using the following nodes

### Nodes:
- City: aggregate all stations within a city

### Edges:
- BikeTrip: bike trip between cities

In [None]:
import pandas as pd
import networkx as nx
from pprint import pprint
from graphgen import create_graph

In [None]:
trips_filename = '../data/201508_trip_data.csv'
stations_filename = '../data/201508_station_data.csv'

trips_df = pd.read_csv(trips_filename)
stations_df = pd.read_csv(stations_filename)

# if columns have spaces in their names we need to replace them with underscore
# fix_columns(trips_df)
# fix_columns(stations_df)

### We need to adjust the data to allow city to city navigation by combining all stations in a city.
### The results will be trips between cities instead of different stations.

In [None]:
display(stations_df.head())

In [None]:
# we'll name our column to 'city_name' to represent the correct information
stations_df.rename(columns={'landmark':'city_name'}, inplace = True)

In [None]:
display(stations_df['city_name'].unique())

In [None]:
display(trips_df.head())

In [None]:
# simple test
display(stations_df[stations_df.station_id == trips_df.loc[0, 'Start Terminal']]['city_name'])

In [None]:
city_df = stations_df[['station_id', 'city_name']]
city_df.set_index('station_id', inplace=True)
station_to_city_dict = city_df['city_name'].to_dict()

In [None]:
trips_df['Start City'] = trips_df['Start Terminal'].map(station_to_city_dict)
trips_df['End City'] = trips_df['End Terminal'].map(station_to_city_dict)

In [None]:
trips_df.head()

In [None]:
# we'll rename the 'Start_Terminal' and 'End_Terminal' to 'Start_City' and 'End_City' respectively
# trips_df.rename(columns={'Start_Terminal':'Start_City', 'End_Terminal':'End_City'}, inplace = True)

# trips_df.head()

In [None]:
city_mapper = {
    'nodes': [
        {
            'type'      : 'City',
            'key'       : [
                {'name': 'name', 'raw': 'city_name'}
            ],
            'attributes': [
                {'name': 'name', 'raw': 'city_name'},
            ]
        },
    ]
}

edges_mapper = {
    'edges': [
        {
            'type'      : 'BikeTrip',
            'from'      : {
                'type': 'City',
                'key' : [
                    {'name': 'name', 'raw': 'Start City'}
                ]
            },
            'to'        : {
                'type': 'City',
                'key' : [
                    {'name': 'num', 'raw': 'End City'}
                ]
            },
            'attributes': [
                {'name': 'trip_id', 'raw': 'Trip ID'},
                {'name': 'start_date', 'raw': 'Start Date'},
                {'name': 'end_date', 'raw': 'End Date'},
                {'name': 'duration', 'raw': 'Duration'}
            ]
        },
    ]
}

In [None]:
# construct a bidirectional multi-edge graph object
g = nx.MultiDiGraph()

%time g = create_graph(g, graph_mapper = city_mapper, \
                 data_provider = stations_df, update=False)

%time g = create_graph(g, graph_mapper = edges_mapper, \
                 data_provider = trips_df, update=False)




In [None]:
print('nodes:', g.number_of_nodes(), '- edges:', g.number_of_edges())

In [None]:
display(g.out_degree())
display(g.in_degree())