# 2.2 | Daily BART EDA
---
* [01 API Data Requests](01_API_pulls.ipynb)
* [01.1 Additional BART Data](01_v2_bart.ipynb.ipynb)
* [02 Initial EDA](02_EDA.ipynb)
* _[02.2 EDA for Daily Ridership](02_EDA.ipynb)_
* [03 First Model: Prophet](03_prophet.ipynb)
---

# PLOTLY NOTES 
* reinstall `ipywidgets` if needed
* confirm plotly is working ( via [Facebook Prophet Issue # 1753 on GitHub](https://github.com/facebook/prophet/issues/1753) )
```python

import plotly.offline as py
import plotly.graph_objs as go

py.init_notebook_mode()

trace0 = go.Scatter(
  x=[1, 2, 3, 4],
  y=[10, 15, 13, 17]
)
data = go.Data([trace0])

py.iplot(data)

```


In [1]:
##### BASIC IMPORTS 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio

In [2]:
pio.templates.default = "plotly_dark"

In [3]:
# import plotly.offline as py
# import plotly.graph_objs as go

# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode(connected=True)  # for plots to render in jupyter notebook

# py.init_notebook_mode()
# py.iplot(data)

In [4]:
# CUSTOM IMPORTS AND SETTINGS 
pd.options.display.max_columns = 90                     # view settings
pd.options.display.max_rows = 100

path = '../data/processed/'

In [5]:
# function to covert to datetimeinedex 
def date_index(df): 
    df['date'] = pd.to_datetime(df['ds'])
    df = df.set_index('date')
    df.rename(columns = {'ridership' : 'y'}, inplace = True)

    return(df)

In [6]:
# function to output HTML to embed in wordpress
def plot_out(filename, figname):
    import plotly as plt
    out_text = plt.offline.plot(figname, include_plotlyjs=False, output_type='div');

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(out_text)

In [7]:
# open file
filename = path + 'bart_daily.csv'
bart = pd.read_csv(filename)
bart = date_index(bart)
split_date = '2020-03-20'

In [8]:
# add columns: day name and COVID note
bart['day'] = bart.index.day_name()
bart['covid'] = 'Pre-COVID'
bart['covid'][split_date:] = 'Post-03/20'



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [9]:
bart.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4165 entries, 2011-01-01 to 2022-06-02
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   y       4165 non-null   float64
 1   ds      4165 non-null   object 
 2   day     4165 non-null   object 
 3   covid   4165 non-null   object 
dtypes: float64(1), object(3)
memory usage: 291.7+ KB


In [10]:
bart.describe()

Unnamed: 0,y
count,4165.0
mean,275329.766146
std,150595.66671
min,2795.0
25%,132103.0
50%,356107.0
75%,413293.0
max,567020.0


In [21]:
df = bart

fig = px.histogram(df, 
        x = 'day', 
        y = 'y', 
        color = 'covid',
        # marginal='rug', # or violin, rug, box
        histfunc = 'avg',
        hover_data = df.columns, 
        labels = {
                'y' : 'Number of Daily Exits', 
                'day': 'Day of Week' })
fig.update_layout(
        title={
                'text': 'BART Daily Ridership by Day of Week, Pre-COVID and After',
                'y': 0.9,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'}, 
        legend = dict(
                orientation = 'h', 
                title = None, 
                y = 1.),        # location of legend above chart or within
        barmode = 'overlay', 
        ) 
fig.update_traces(opacity=0.80)
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

fig.layout.images = [dict(
        source='https://blog.giovannaguevara.net/wp-content/uploads/2020/02/siteLogo.png',
        xref="paper", yref="paper",
        x=0.1, y=1.05,
        sizex=0.4, sizey=0.4,
        xanchor="center", yanchor="bottom"
      )]

# output HTML for embed
fname = 'avg_rid.txt'
plot_out(fname, fig)

In [12]:
df = bart[:split_date]
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'})
fig.update_layout(
    title={
        'text': 'BART Daily Ridership by Day of Week, Jan 2011 - Feb 2022',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

# output HTML for embed
fname = 'violin_pre.txt'
plot_out(fname, fig)

In [13]:
df = bart[split_date:]
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'})
fig.update_layout(
    title={
        'text': 'BART Daily Ridership by Day of Week, March 20, 2020 - June 3, 2022',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

# output HTML for embed
fname = 'violin_post.txt'
plot_out(fname, fig)

In [14]:
# descriptives pre-covid
desc = bart[:split_date].groupby(['day']).describe()
desc

Unnamed: 0_level_0,y,y,y,y,y,y,y,y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Friday,482.0,382163.790456,61549.743569,2795.0,371315.5,396844.0,412663.5,543279.0
Monday,480.0,364423.583333,81829.0835,3128.0,357426.5,396108.5,410660.75,442987.0
Saturday,477.0,187787.761006,30322.288116,58878.0,169076.0,188255.0,204332.0,410231.0
Sunday,482.0,136867.221992,31888.203586,3710.0,119528.5,134741.5,151014.75,330352.0
Thursday,481.0,402610.176715,67609.329953,37090.0,389873.0,420910.0,436796.0,518151.0
Tuesday,480.0,403285.045833,53902.211104,38393.0,387166.25,417657.0,433577.25,494521.0
Wednesday,480.0,405922.495833,54724.300342,41106.0,391637.5,418973.5,435832.0,567020.0


In [15]:
# descriptives pre-covid
desc_post = bart[split_date:].groupby(['day']).describe()
desc_post

Unnamed: 0_level_0,y,y,y,y,y,y,y,y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Friday,115.0,71826.4,33513.676727,8004.0,45985.5,58836.0,104530.5,136081.0
Monday,115.0,65486.313043,29536.159294,13061.0,44569.5,53676.0,91046.0,126435.0
Saturday,115.0,43094.286957,23013.843255,11238.0,23385.0,34043.0,65138.5,92695.0
Sunday,115.0,30352.904348,15479.732387,7562.0,17453.0,26516.0,44024.5,67469.0
Thursday,115.0,74818.295652,35802.118975,13482.0,47018.5,59665.0,107798.5,145597.0
Tuesday,114.0,74750.105263,34050.551455,23553.0,47336.5,61238.5,104956.5,147629.0
Wednesday,115.0,75497.721739,35394.088185,24496.0,47397.5,59435.0,105802.0,152703.0


In [16]:
# Events for top-10 ridership dates
notes = [
    'SF Giants Parade (2012) & Halloween', 
    'Warriors Parade (2015), Oakland', 
    'Super Bowl L Village (2016)', 
    'Warriors Parade, Oakland (2017)', 
    'SF Giants Parade (2014) & Halloween', 
    'Warriors Parade, Oakland (2018)', 
    'Super Bowl L Village (2016)', 
    'Bay Bridge multi-day closure for (new) eastern span (2013)', 
    'Super Bowl L Village (2016)', 
    '??? Oakland A\'s AML wildcard Game 6??? (2016)'
]

In [17]:
# top 10 filter
filter_n = 10
top_n = bart.sort_values(by = ['y'], ascending = False).head(filter_n)[['day', 'y']]
top_n['notes'] = notes
top_n.reset_index(inplace = True)
top_n.head()

Unnamed: 0,date,day,y,notes
0,2012-10-31,Wednesday,567020.0,SF Giants Parade (2012) & Halloween
1,2015-06-19,Friday,543279.0,"Warriors Parade (2015), Oakland"
2,2016-02-05,Friday,523802.0,Super Bowl L Village (2016)
3,2017-06-15,Thursday,518151.0,"Warriors Parade, Oakland (2017)"
4,2014-10-31,Friday,505307.0,SF Giants Parade (2014) & Halloween


In [18]:
df = top_n
fig = px.bar(df, 
        x = 'y', 
        y = df.index, 
        orientation='h', 
        # hover_data = ['day', 'date'], 
        text = 'notes',
    labels = {
        'y': 'Total Exits', 
        'x': 'Date & Event'},
        )

fig.update_layout(
    hovermode = 'y',
    title={
        'text': 'Top 10 Ridership Events',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        )
fig.update_traces(hovertemplate=None)        
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

fname = 'top_10.txt'
plot_out(fname, fig)

In [19]:
fig = go.Figure(go.Scatter(
            y = bart.y,
            x = bart.ds.tolist(),
            orientation='h'))

fig.update_layout(
    title={
        'text': "BART Daily Ridership, Jan 2011 - May 2022",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

fname = 'all_daily.txt'
plot_out(fname, fig)