In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from enum import Enum

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
EXPORTS_DIR = '../data/exports'
MATRIX_DIR = '../data/exports/adjacency_matrix'
SHAPEFILE_DIR = '../data/shapefiles/zipcodes'
STATIONS_DIR = '../data/exports'


class AnalysisConfig:

    def __init__(self, shapefile_path='', stations_path='', matrix_suffix=''):
        self.shapefile_path = shapefile_path
        self.stations_path = stations_path
        self.displacement_matrix_path = (
            '{}/displacement_{}.csv'.format(MATRIX_DIR, matrix_suffix))
        self.elevation_matrix_path = (
            '{}/elevation_{}.csv'.format(MATRIX_DIR, matrix_suffix))
        self.trip_count_classic_matrix_path = (
            '{}/trip_count_classic_{}.csv'.format(MATRIX_DIR, matrix_suffix))
        self.trip_count_electric_matrix_path = (
            '{}/trip_count_electric_{}.csv'.format(MATRIX_DIR, matrix_suffix))
        self.trip_count_matrix_path = (
            '{}/trip_count_{}.csv'.format(MATRIX_DIR, matrix_suffix))
        self.matrix_suffix = matrix_suffix

    def get_export_path(self, filename):
        return '{}/{}_{}.csv'.format(EXPORTS_DIR, filename, self.matrix_suffix)


def get_trip_matrix(config, path=None):
    if not path:
        path = config.trip_count_matrix_path
    trip_matrix = pd.read_csv(path).set_index('station_id')
    trip_matrix.columns = trip_matrix.columns.astype('str')
    trip_matrix.index = trip_matrix.index.astype('str')
    return trip_matrix

In [3]:
config_sf = AnalysisConfig(
    shapefile_path = '{}/san_francisco.shp'.format(SHAPEFILE_DIR),
    stations_path = '{}/SF_ele_single station.csv'.format(STATIONS_DIR),
    matrix_suffix = 'sf',
)

config_dc = AnalysisConfig(
    shapefile_path = '{}/washington_dc.shp'.format(SHAPEFILE_DIR),
    stations_path = '{}/DC_ele_single station.csv'.format(STATIONS_DIR),
    matrix_suffix = 'dc',
)

In [4]:
def get_trip_matrix(path):
    trip_matrix = pd.read_csv(path).set_index('station_id')
    trip_matrix.columns = trip_matrix.columns.astype('str')
    trip_matrix.index = trip_matrix.index.astype('str')
    return trip_matrix


def get_trips_from_matrix(matrix, reverse=False):
    station_ids = list(matrix.columns)
    data = []
    for id1 in station_ids:
        for id2 in station_ids:
            value = matrix[id1][id2] if not reverse else matrix[id2][id1]
            data.append({
                'station_id1': id1,
                'station_id2': id2,
                'value': value if id1 != id2 else 0
            })
    return pd.DataFrame(data)


class Analysis:

    def __init__(self, config):
        self.config = config
        self.trip_matrix_all = get_trip_matrix(
            self.config.trip_count_matrix_path)
        self.trip_matrix_classic = get_trip_matrix(
            self.config.trip_count_classic_matrix_path)
        self.trip_matrix_electric = get_trip_matrix(
            self.config.trip_count_electric_matrix_path)

        self.trips_df = self.merge_trip_matrices()
        self.trips_df = self.add_variables(self.trips_df)

        self.reverse_df = self.merge_trip_matrices(reverse=True)
        self.reverse_df = self.add_variables(self.reverse_df, reverse=True)

    def merge_trip_matrices(self, reverse=False):
        trips_df = get_trips_from_matrix(self.trip_matrix_all, reverse=reverse)
        trips_classic_df = get_trips_from_matrix(self.trip_matrix_all,
                                                 reverse=reverse)
        trips_classic_df = get_trips_from_matrix(self.trip_matrix_classic,
                                                 reverse=reverse)
        trips_electric_df = get_trips_from_matrix(self.trip_matrix_electric,
                                                  reverse=reverse)

        merged_df = pd.merge(trips_df,
                             trips_classic_df,
                             on=['station_id1', 'station_id2'])
        merged_df = pd.merge(merged_df,
                             trips_electric_df,
                             on=['station_id1', 'station_id2'])
        merged_df = merged_df.rename(columns={
            'value_x': 'all',
            'value_y': 'classic',
            'value': 'electric',
        })
        return merged_df

    def add_variables(self, trips_df, reverse=False):
        displacement_matrix = get_trip_matrix(
            self.config.displacement_matrix_path)
        displacement_df = get_trips_from_matrix(displacement_matrix,
                                                reverse=reverse)
        elevation_matrix = get_trip_matrix(self.config.elevation_matrix_path)
        elevation_df = get_trips_from_matrix(elevation_matrix, reverse=reverse)

        merged_df = pd.merge(
            trips_df,
            elevation_df,
            on=['station_id1', 'station_id2'],
            how='left').rename(columns={'value': 'elevation_change'})
        merged_df = pd.merge(
            merged_df,
            displacement_df,
            on=['station_id1', 'station_id2'],
            how='left').rename(columns={'value': 'displacement'})
        return merged_df

    def export(self):
        self.trips_df.to_csv(self.config.get_export_path('aggregated_trips'),
                             index=False)
        self.reverse_df.to_csv(
            self.config.get_export_path('aggregated_reverse_trips'),
            index=False)


analysis_sf = Analysis(config_sf)
analysis_sf.export()

analysis_dc = Analysis(config_dc)
analysis_dc.export()