In [None]:
import pandas as pd
import numpy as np
from station import Station
import seaborn as sns
import matplotlib.pyplot as plt
import mpld3

%load_ext autoreload
%autoreload

In [None]:
# read in data files
ride_df = pd.read_csv('data/CTA_-_Ridership_-__L__Station_Entries_-_Daily_Totals.csv')
map_df = pd.read_csv('data/CTA_-_System_Information_-_List_of__L__Stops.csv')
station_map_names = {'Lake':'Lake/State', 'Jackson':'Jackson/State',
                     'Washington':'Washington/Dearborn', 'Morgan':'Morgan-Lake'}
map_df.replace({"STATION_NAME": station_map_names}, inplace=True)

In [None]:
station_list = ride_df.stationname.unique().tolist()
sumlist = []
for station in station_list:
    stat = Station(station, ride_df, map_df)
    stat.run_prophet()
    sumlist.append(stat.summary)
station_summary = pd.DataFrame(sumlist)
station_summary.to_csv('data/station_summary.csv')

In [None]:
to_remove = station_summary[station_summary['Sat_mean'] == 0].index[0]
station_summary.drop([to_remove], inplace=True)
station_summary['weekday-sat'] = station_summary['Weekday_mean'
                                                ] - station_summary['Sat_mean']
station_summary['weekday-sun'] = station_summary['Weekday_mean'
                                                ] - station_summary['Sun/Hol_mean']

In [None]:
station_summary = pd.read_csv('data/station_summary.csv')

In [None]:
# Find station with highest daily mean
station_summary.loc[station_summary.daily_mean.idxmax()]

In [None]:
# Find Washington/Wabash's std
station_summary[station_summary['station']=='Washington/Wabash']

In [None]:
# station_summary.columns
# cols = ['2023_mean', 'daily_mean', '5_yr_pct_diff']
# cols2 = ['daily_mean', 'Sat_mean', 'Sun/Hol_mean', 'Weekday_mean']
# station_summary.dropna(inplace=True)

### Plot stations with high standard deviations by day of the week & season

In [None]:
high_stds = station_summary.sort_values(by='daily_std', ascending=False).iloc[0:10]
high_stations = high_stds.station.tolist()
high_stds.set_index('station', inplace=True)

In [None]:
for stat in high_stations:
    stat = Station(stat, ride_df, map_df)
    stat.make_layered_hist('daytype')

In [None]:
for stat in high_stations:
    stat = Station(stat, ride_df, map_df)
    stat.make_layered_hist('season')

In [None]:
fig, ax = plt.subplots()
sns.scatterplot(x="daily_std", y="weekday-sat", data=station_summary, ax=ax)
sns.scatterplot(x="daily_std", y="weekday-sat", data=high_stds, color='red', ax=ax)

In [None]:
sns.distplot(station_summary['daily_std'])