In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

import dash
import dash_core_components as dcc
import dash_html_components as html

In [2]:
mapbox_token = os.environ.get('MAPBOX_TOKEN')
assert mapbox_token is not None, 'empty token'
px.set_mapbox_access_token(mapbox_token)

In [3]:
geo_route = pd.read_csv('./data/visualization/NE_regional_lonlat.csv')
geo_info = pd.read_csv('./data/visualization/geo_stations_info.csv')

amtrak_stations = ['BOS', 'BBY', 'RTE', 'PVD', 'KIN', 'NLC',
                   'NHV', 'STM', 'NYP', 'NWK', 'TRE', 'PHL',
                   'WIL', 'BAL', 'BWI', 'NCR', 'WAS']

location_names = list(geo_info['STNNAME'])

In [4]:
geo_route.head()

Unnamed: 0,Longitude,Latitude,Group,Connecting Path
0,-71.075149,42.347551,0,BBY-RTE
1,-71.07541,42.347485,0,BBY-RTE
2,-71.075579,42.347441,0,BBY-RTE
3,-71.075729,42.347391,0,BBY-RTE
4,-71.075788,42.347366,0,BBY-RTE


In [5]:
geo_info.head()

Unnamed: 0,STNNAME,CITY2,STATE,LON,LAT,Northbound Mile,Southbound Mile,STNCODE
0,"Boston (South Station), Massachusetts",Boston,MA,-71.055304,42.352311,457,0,BOS
1,"Boston (Back Bay), Massachusetts",Boston,MA,-71.075828,42.347317,456,1,BBY
2,"Westwood, Route 128 Station, Massachusetts",Route 128,MA,-71.147894,42.210242,446,11,RTE
3,"Providence, Rhode Island",Providence,RI,-71.413478,41.82949,414,43,PVD
4,"West Kingston, Rhode Island",Kingston,RI,-71.560597,41.483959,387,70,KIN


In [6]:
map_style = 'outdoors'
config = dict({'scrollZoom': False})

In [8]:
route = px.line_mapbox(geo_route,
                       lat=geo_route['Latitude'],
                       lon=geo_route['Longitude'],
                       line_group=geo_route['Group'],
                       color=geo_route['Connecting Path'],
                       color_discrete_sequence=px.colors.qualitative.T10,
                       hover_data={'Group': False},
                       mapbox_style=map_style,
                       zoom=6)
route.update_traces(line=dict(width=3))

route.add_trace(go.Scattermapbox(lat=geo_info.LAT.round(decimals=5),
                                 lon=geo_info.LON.round(decimals=5),
                                 name='Amtrak Stations',
                                 hoverinfo='text',
                                 customdata=geo_info.STNCODE,
                                 hovertext=geo_info.STNNAME,
                                 hovertemplate="%{hovertext} (%{customdata})<extra></extra>",
                                 mode='markers',
                                 marker={'size': 6, 'color': 'Navy'},
                                 fill='none'
                                 )
               )

route.update_layout(dict(paper_bgcolor="white", plot_bgcolor="white", margin=dict(t=35, l=80, b=0, r=0), height=500)) # l=0, r=0

route.update_yaxes(automargin=True)
route.show(config=config)

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

In [None]:
command = """
    SELECT  d.train_num, ti.depart_origin_time, AVG(d.depart_diff), COUNT(d.depart_diff)
    FROM train_info ti
    INNER JOIN(
        SELECT d.train_num, d.depart_diff 
        FROM departures d
        WHERE d.origin_year > 2016 AND d.origin_year < 2020
    ) AS d
    ON ti.train_num = d.train_num
    WHERE reg_operates_on_mon = 't' AND reg_operates_on_thurs = 't'
    GROUP BY d.train_num, ti.depart_origin_time
    ORDER BY AVG(d.depart_diff) DESC;
    """

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    data = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

In [None]:
df = pd.DataFrame(data, columns = ['train_num', 'depart_origin_time', 'avg_depart_diff', 'num_instances'])

In [None]:
df

In [None]:
df['train_num'] = df['train_num'].astype(str)

In [None]:
bar_plt = px.bar(df, x = 'train_num', y = 'avg_depart_diff')
bar_plt.show()

## Testing COVID Ridership Theory

In [None]:
command = """
    SELECT  d.train_num, d.station_code, d.origin_year, ti.depart_origin_time, AVG(d.depart_diff), COUNT(d.depart_diff)
    FROM train_info ti
    INNER JOIN(
        SELECT d.train_num, d.station_code, d.origin_year, d.depart_diff 
        FROM departures d
    ) AS d
    ON ti.train_num = d.train_num
    WHERE reg_operates_on_mon = 't' AND reg_operates_on_thurs = 't'
    GROUP BY d.train_num, ti.depart_origin_time, d.origin_year, d.station_code
    ORDER BY AVG(d.depart_diff) DESC;
    """

In [None]:
DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    data = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

In [None]:
data_yearly = pd.DataFrame(data, columns = ['train_num', 'station', 'year', 'depart_origin_time', 'avg_depart_diff', 'num_instances'])

In [None]:
data_yearly.head()

In [None]:
data_yearly.shape

In [None]:
data_yearly['train_num'] = data_yearly['train_num'].astype(str)

In [None]:
data_yearly['year'] = data_yearly['year'].astype(str)

In [None]:
pvd = data_yearly.loc[data_yearly['station'] == 'PVD']

In [None]:
pvd.shape

In [None]:
pvd.head()

In [None]:
pvd_yearly_bar_plt = px.bar(pvd, x = 'year', y = 'avg_depart_diff', color = 'train_num')
pvd_yearly_bar_plt.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
pvd_yearly_bar_plt.show()

In [None]:
nyp =  data_yearly.loc[data_yearly['station'] == 'NYP']

In [None]:
nyp.shape

In [None]:
nyp.head()

In [None]:
nyp_yearly_bar_plt = px.bar(nyp, x = 'year', y = 'avg_depart_diff', color = 'train_num')
nyp_yearly_bar_plt.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
nyp_yearly_bar_plt.show()

In [None]:
data_yearly.head()

In [None]:
station_yearly.head()

In [None]:
data_even_trains = data_yearly.loc[data_yearly['train_num'].astype(int) % 2 == 0]

In [None]:
data_even_trains

In [None]:
for station in amtrak_stations:
    station_yearly =  data_even_trains.loc[data_even_trains['station'] == station] 
    print(station_yearly.head())
    station_yearly_bar_plt = px.bar(station_yearly, x = 'year', y = 'avg_depart_diff', color = 'train_num', title = station)
    station_yearly_bar_plt.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
    station_yearly_bar_plt.show()

In [None]:
command = """
    SELECT DISTINCT train_num
    FROM train_info
    ORDER BY train_num ASC;
    """

DSN = "dbname='amtrakproject' user='appuser' password={}".format(os.environ.get('DB_PASS'))
conn = psycopg2.connect(DSN)
cur = conn.cursor()

try:
    cur.execute(command)
    train_nums_list = cur.fetchall()
    conn.close()
except (Exception, psycopg2.DatabaseError) as error:
    err_type, err_obj, traceback = sys.exc_info()
    line_num = traceback.tb_lineno
    conn.rollback()
    print ("\npsycopg2 ERROR:", error, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)