In [1]:
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import os
import json
import shapely
import numpy as np
import pandas as pd
import sys

import dash
import dash_core_components as dcc
import dash_html_components as html

In [2]:
mapbox_token = os.environ.get('MAPBOX_TOKEN')
assert mapbox_token is not None, 'empty token'

amtrak_stations = ['BOS', 'BBY', 'RTE', 'PVD', 'KIN', 'NLC', 'NHV', 'STM',
                   'NYP', 'NWK', 'TRE', 'PHL', 'WIL', 'BAL', 'BWI', 'NCR', 'WAS']


In [4]:
geo_stations = gpd.read_file('./data/geojson/Amtrak_Project_Stations_Reordered.geojson')
geo_stations = geo_stations.loc[amtrak_stations]
geo_stations

Unnamed: 0,STNCODE,OBJECTID,STNNAME,CITY2,STATE,STFIPS,urban,geometry
0,BOS,5,"Boston (South Station), Massachusetts",Boston,MA,25,YES,POINT (-71.05530 42.35231)
1,BBY,17,"Boston (Back Bay), Massachusetts",Boston,MA,25,YES,POINT (-71.07583 42.34732)
2,RTE,24,"Westwood, Route 128 Station, Massachusetts",Route 128,MA,25,YES,POINT (-71.14789 42.21024)
3,PVD,10,"Providence, Rhode Island",Providence,RI,44,YES,POINT (-71.41348 41.82949)
4,KIN,60,"West Kingston, Rhode Island",Kingston,RI,44,,POINT (-71.56060 41.48396)
5,NLC,63,"New London, Connecticut",New London,CT,9,YES,POINT (-72.09322 41.35427)
6,NHV,16,"New Haven, Connecticut",New Haven,CT,9,YES,POINT (-72.92667 41.29771)
7,STM,27,"Stamford, Connecticut",Stamford,CT,9,YES,POINT (-73.54216 41.04713)
8,NYP,1,"New York (Penn Station), New York",New York,NY,36,YES,POINT (-73.99446 40.75033)
9,NWK,14,"Newark (Penn Station), New Jersey",Newark,NJ,34,YES,POINT (-74.16475 40.73471)


In [None]:
location_names = list(geo_stations['STNNAME'])

options = [dict(label = location_names[i], value = amtrak_stations[i]) for i in range(len(amtrak_stations))]

In [None]:
options

In [None]:
mile_markers = {'Northbound Mile': {station: None for station in amtrak_stations} , 'Southbound Mile': {station: None for station in amtrak_stations} } 

SB = [0,1,11,43,70,105,156,195,231,241,289,322,347,416,427,448,457]

NB = [457, 456, 446, 414,387,352,301,262,226,216,168,135,110,41,30,9,0]

for station, NB_mile, SB_mile in zip(amtrak_stations, NB, SB):
    mile_markers['Northbound Mile'][station] = NB_mile
    mile_markers['Southbound Mile'][station] = SB_mile

mile_cols = pd.DataFrame.from_dict(mile_markers, orient='columns')
#mile_cols.to_csv('./data/geojson/mile_markers.csv', index=False)

In [None]:
px.set_mapbox_access_token(mapbox_token)
station_mapbox = px.scatter_mapbox(geo_stations,
                        lat=geo_stations.geometry.y,
                        lon=geo_stations.geometry.x,
                        mapbox_style = 'outdoors',
                        hover_name="STNNAME",
                        center=dict(
                            lat=40.58,
                            lon=-74.00
                        ),
                        zoom=5.5,
                        )
config = dict({'scrollZoom': False})
station_mapbox.show(config=config)

In [None]:
geo_route = pd.read_csv("./data/geojson/NE_regional_lonlat.csv")

In [None]:
route = px.line_mapbox(lat=geo_route['Latitude'], lon=geo_route['Longitude'],
                     mapbox_style='mapbox://styles/elizabethchen/ckmyomzg920pz17o6n7hg2lnd', 
                     center=dict(
                            lat=40.58,
                            lon=-74.00
                     ),
                     zoom=5.5
        )

route.show(config=config)

In [None]:
import psycopg2
from psycopg2 import sql

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

In [None]:
command = """
    SELECT  d.train_num, ti.depart_origin_time, AVG(d.depart_diff), COUNT(d.depart_diff)
    FROM train_info ti
    INNER JOIN(
        SELECT d.train_num, d.depart_diff 
        FROM departures d
        WHERE d.origin_year > 2016 AND d.origin_year < 2020
    ) AS d
    ON ti.train_num = d.train_num
    WHERE reg_operates_on_mon = 't' AND reg_operates_on_thurs = 't'
    GROUP BY d.train_num, ti.depart_origin_time
    ORDER BY AVG(d.depart_diff) DESC;
    """

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    data = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

In [None]:
df = pd.DataFrame(data, columns = ['train_num', 'depart_origin_time', 'avg_depart_diff', 'num_instances'])

In [None]:
df

In [None]:
df['train_num'] = df['train_num'].astype(str)

In [None]:
bar_plt = px.bar(df, x = 'train_num', y = 'avg_depart_diff')
bar_plt.show()

## Testing COVID Ridership Theory

In [None]:
command = """
    SELECT  d.train_num, d.station_code, d.origin_year, ti.depart_origin_time, AVG(d.depart_diff), COUNT(d.depart_diff)
    FROM train_info ti
    INNER JOIN(
        SELECT d.train_num, d.station_code, d.origin_year, d.depart_diff 
        FROM departures d
    ) AS d
    ON ti.train_num = d.train_num
    WHERE reg_operates_on_mon = 't' AND reg_operates_on_thurs = 't'
    GROUP BY d.train_num, ti.depart_origin_time, d.origin_year, d.station_code
    ORDER BY AVG(d.depart_diff) DESC;
    """

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    data = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

In [None]:
data_yearly = pd.DataFrame(data, columns = ['train_num', 'station', 'year', 'depart_origin_time', 'avg_depart_diff', 'num_instances'])

In [None]:
data_yearly.head()

In [None]:
data_yearly.shape

In [None]:
data_yearly['train_num'] = data_yearly['train_num'].astype(str)

In [None]:
data_yearly['year'] = data_yearly['year'].astype(str)

In [None]:
pvd = data_yearly.loc[data_yearly['station'] == 'PVD']

In [None]:
pvd.shape

In [None]:
pvd.head()

In [None]:
pvd_yearly_bar_plt = px.bar(pvd, x = 'year', y = 'avg_depart_diff', color = 'train_num')
pvd_yearly_bar_plt.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
pvd_yearly_bar_plt.show()

In [None]:
command = """
    SELECT DISTINCT train_num
    FROM train_info
    ORDER BY train_num ASC;
    """

DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    train_nums_list = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

In [None]:
old_style = 'mapbox://styles/elizabethchen/ckhwyiwxw09o11ap9y8rpvldf

In [None]:
train_nums = []
for i in range(len(train_nums_list)):
    tup = train_nums_list[i]
    train_nums.append(tup[0])
    
print(train_nums)