# 2 | Daily BART EDA
---
* [01 API Data Requests](01_API_pulls.ipynb)
* [01.1 Additional BART Data](01_v2_bart.ipynb.ipynb)
* [02 Initial EDA](02_EDA.ipynb)
* _[02.2 EDA for Daily Ridership](02_EDA.ipynb)_
* [03 First Model: Prophet](03_prophet.ipynb)
---

# PLOTLY NOTES 
* reinstall `ipywidgets` if needed
* confirm plotly is working ( via [Facebook Prophet Issue # 1753 on GitHub](https://github.com/facebook/prophet/issues/1753) )
```python

import plotly.offline as py
import plotly.graph_objs as go

py.init_notebook_mode()

trace0 = go.Scatter(
  x=[1, 2, 3, 4],
  y=[10, 15, 13, 17]
)
data = go.Data([trace0])

py.iplot(data)

```


In [None]:
# pip install ipywidgets

In [1]:
##### BASIC IMPORTS 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px

In [None]:
# import plotly.offline as py
# import plotly.graph_objs as go

# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode(connected=True)  # for plots to render in jupyter notebook

# py.init_notebook_mode()
# py.iplot(data)

In [2]:
# CUSTOM IMPORTS AND SETTINGS 
pd.options.display.max_columns = 90                     # view settings
pd.options.display.max_rows = 100

path = '../data/processed/'

In [3]:
# function to covert to datetimeinedex 
def date_index(df): 
    df['date'] = pd.to_datetime(df['ds'])
    df = df.set_index('date')
    df.rename(columns = {'ridership' : 'y'}, inplace = True)

    return(df)

In [7]:
filename = path + 'bart_daily.csv'
# filename = path + 'bart.csv'
bart = pd.read_csv(filename)
bart = date_index(bart)

In [30]:
bart['day'] = bart.index.day_name()
bart['covid'] = 'Pre-COVID'
bart['covid']['2022-03-01':] = 'Post-02/22'
bart.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,y,ds,day,covid
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011-01-01,124162.0,2011-01-01,Saturday,Pre-COVID
2011-01-02,93666.0,2011-01-02,Sunday,Pre-COVID
2011-01-03,285891.0,2011-01-03,Monday,Pre-COVID
2011-01-04,322306.0,2011-01-04,Tuesday,Pre-COVID
2011-01-05,327006.0,2011-01-05,Wednesday,Pre-COVID


In [31]:
bart.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4165 entries, 2011-01-01 to 2022-06-02
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   y       4165 non-null   float64
 1   ds      4165 non-null   object 
 2   day     4165 non-null   object 
 3   covid   4165 non-null   object 
dtypes: float64(1), object(3)
memory usage: 291.7+ KB


In [32]:
bart.isnull().sum()

y        0
ds       0
day      0
covid    0
dtype: int64

In [33]:
bart.describe()

Unnamed: 0,y
count,4165.0
mean,275329.766146
std,150595.66671
min,2795.0
25%,132103.0
50%,356107.0
75%,413293.0
max,567020.0


In [89]:
df = bart

fig = px.histogram(df, 
        x='day', 
        y='y', 
        color = 'covid',
        # marginal='rug', # or violin, rug, box
        histfunc='avg',
        hover_data=df.columns, 
        labels = {
                'y' : 'Number of Daily Exits', 
                'day': 'Day of Week' },
        title = 'BART Daily Ridership by Day of Week, Pre-COVID and After')
fig.update_layout(
    title={
        # 'text': "Plot Title",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_layout(legend=dict(orientation='h', title=None, y=1.))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.80)
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

In [91]:
df = bart[:'2020-02-28']
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'},
        title = 'BART Daily Ridership by Day of Week, Jan 2011 - Feb 2022')
fig.update_layout(
    title={
        # 'text': "Plot Title",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

In [99]:
df = bart['2020-03-20':]
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'},
        title = 'BART Daily Ridership by Day of Week, March 20, 2020 - June 3, 2022')
fig.update_layout(
    title={
        # 'text': "Plot Title",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

In [46]:
bart.groupby(['day']).describe()

Unnamed: 0_level_0,y,y,y,y,y,y,y,y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Friday,596.0,322852.468121,134814.508194,2795.0,318734.5,386885.5,408559.0,543279.0
Monday,595.0,306645.791597,139728.960207,3128.0,169534.5,383693.0,407818.0,442987.0
Saturday,592.0,159680.076014,64227.690016,11238.0,149146.5,179299.0,200135.0,410231.0
Sunday,597.0,116349.38861,51321.60947,3710.0,101589.0,128698.0,147366.0,330352.0
Thursday,596.0,339361.743289,143851.358403,13482.0,344886.0,409817.5,432823.75,518151.0
Tuesday,594.0,340232.885522,139051.524578,23553.0,352444.75,405516.5,430596.0,494521.0
Wednesday,595.0,342058.884034,140383.761443,24496.0,348049.5,409304.0,432203.0,567020.0


In [52]:
bart[:'2020-02-28'].groupby(['day']).describe()

Unnamed: 0_level_0,y,y,y,y,y,y,y,y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Friday,479.0,383393.590814,59018.41333,2795.0,371550.0,397045.0,412997.0,543279.0
Monday,477.0,365031.081761,81261.997097,3128.0,357923.0,396233.0,410720.0,442987.0
Saturday,474.0,188311.027426,29566.487187,107616.0,169639.75,188306.5,204416.0,410231.0
Sunday,479.0,137301.954071,31460.139298,3710.0,120016.5,134971.0,151050.5,330352.0
Thursday,478.0,403762.642259,65357.121513,37090.0,391573.5,421171.0,436834.25,518151.0
Tuesday,477.0,404272.209644,51405.359039,38393.0,387425.0,417803.0,433671.0,494521.0
Wednesday,477.0,406996.683438,52012.656176,41106.0,392505.0,419324.0,435937.0,567020.0


In [129]:
notes = [
    'SF Giants Parade (2012) & Halloween', 
    'Warriors Parade (2015), Oakland', 
    'Super Bowl L Village (2016)', 
    'Warriors Parade, Oakland (2017)', 
    'SF Giants Parade (2014) & Halloween', 
    'Warriors Parade, Oakland (2018)', 
    'Super Bowl L Village', 
    'Bay Bridge multi-day closure for (new) eastern span (2013)', 
    'Super Bowl L Village', 
    'Oakland A\'s AML wildcard Game 6??? (2016)'
]
notes

['SF Giants Parade (2012) & Halloween',
 'Warriors Parade (2015), Oakland',
 'Super Bowl L Village (2016)',
 'Warriors Parade, Oakland (2017)',
 'SF Giants Parade (2014) & Halloween',
 'Warriors Parade, Oakland (2018)',
 'Super Bowl L Village',
 'Bay Bridge multi-day closure\xa0for (new) eastern span (2013)',
 'Super Bowl L Village',
 "Oakland A's AML wildcard Game 6??? (2016)"]

In [130]:
filter_n = 10
top_n = bart.sort_values(by = ['y'], ascending = False).head(filter_n)[['day', 'y']]
top_n['notes'] = notes
top_n.reset_index(inplace = True)
top_n.head()

Unnamed: 0,date,day,y,notes
0,2012-10-31,Wednesday,567020.0,SF Giants Parade (2012) & Halloween
1,2015-06-19,Friday,543279.0,"Warriors Parade (2015), Oakland"
2,2016-02-05,Friday,523802.0,Super Bowl L Village (2016)
3,2017-06-15,Thursday,518151.0,"Warriors Parade, Oakland (2017)"
4,2014-10-31,Friday,505307.0,SF Giants Parade (2014) & Halloween


In [133]:
df = top_n
fig = px.bar(df, 
        x = 'y', 
        y = df.index, 
        orientation='h', 
        # hover_data = ['day', 'date'], 
        text = 'notes',
        title = 'BART Daily Ridership by Day of Week, March 20, 2020 - June 3, 2022',
    labels = {
        'y': 'Total Exits', 
        'x': 'Date & Event'},
        )

fig.update_layout(
    hovermode = 'y',
    title={
        'text': 'Top 10 Ridership',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        )
fig.update_traces(hovertemplate=None)        
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()


# fig = px.bar(df, x='year', y='pop',
#              hover_data=['lifeExp', 'gdpPercap'], color='lifeExp',
#              labels={'pop':'population of Canada'}, height=400)



In [135]:
import plotly as plt
out_text = plt.offline.plot(fig, include_plotlyjs=False, output_type='div');

txt_out = 'top_10.txt'

with open(txt_out, 'w', encoding='utf-8') as f:
    f.write(out_text)

In [134]:
# bart2 = bart.loc['2011-01-01':]
# bart_plot = plot_traces(bart, 'y', 'BART Daily Ridership, 2011 - 2022')

bart = bart.loc['2018-01-01':]

fig = go.Figure(go.Scatter(
            y = bart.y,
            x = bart.ds.tolist(),
            orientation='h'))

fig.update_layout(
    title={
        'text': "BART Daily Ridership, Jan 2011 - May 2022",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [None]:
import plotly as plt
out_text = plt.offline.plot(fig, include_plotlyjs=False, output_type='div');

with open('plotly_out.txt', 'w', encoding='utf-8') as f:
    f.write(out_text)

> <br>
>
> 2. Fuel Prices
> 
> <br>

In [None]:
filename = path + 'fuel_w.csv'
fuel_w = pd.read_csv(filename)

fuel_w = date_index(fuel_w)
fuel_w.tail()

In [None]:
fuel2 = fuel_w.loc['2010-01-01':]
fuel_plot2 = plot_traces(fuel2, 'fuel_w', 'Weekly Average Gas Price ($), California: 2010 - 2022')

In [None]:
filename = path + 'fuel_m.csv'
fuel_m = pd.read_csv(filename)

fuel_m = date_index(fuel_m)
fuel_m.tail()

In [None]:
fuel3 = fuel_m.loc['2010-01-01':]
fuel_plot3 = plot_traces(fuel3, 'fuel_m', 'Monthly Average Gas Price ($), California: 2010 - 2022')

> <br>
>
> 3. Manipulating 'REGISTERED VEHICLES' file: 
> 
> <br>

In [None]:
filename = path + 'vehs.csv'
vehs = pd.read_csv(filename)

vehs = date_index(vehs)
vehs.tail()

In [None]:
vehs2 = vehs.loc['2010-01-01':]
cars_plot = plot_traces(vehs, 'cars', 'Estimated Count of Registered Cars CA: 2010 - 2021')

> <br>
>
> 4. Manipulating 'CONSUMER DEBT' file: 
> 
> <br>

In [None]:
filename = path + 'debt.csv'
debt = pd.read_csv(filename)
debt['ds'] = debt['date']

debt = date_index(debt)
debt.tail()

In [None]:
debt2 = debt.loc['2010-01-01':]
debt_plot = plot_traces(debt2, 'debt', 'Consumer Debt ($) 2010 - 2022 (not adjusted, Federal Reserve)')