# 2.2 | Daily BART EDA
---
* [01 API Data Requests](01_API_pulls.ipynb)
* [01.1 Additional BART Data](01_v2_bart.ipynb.ipynb)
* [02 Initial EDA](02_EDA.ipynb)
* _[02.2 EDA for Daily Ridership](02_EDA.ipynb)_
* [03 First Model: Prophet](03_prophet.ipynb)
---

# PLOTLY NOTES 
* reinstall `ipywidgets` if needed
* confirm plotly is working ( via [Facebook Prophet Issue # 1753 on GitHub](https://github.com/facebook/prophet/issues/1753) )
```python

import plotly.offline as py
import plotly.graph_objs as go

py.init_notebook_mode()

trace0 = go.Scatter(
  x=[1, 2, 3, 4],
  y=[10, 15, 13, 17]
)
data = go.Data([trace0])

py.iplot(data)

```


In [1]:
##### BASIC IMPORTS 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio

import gcutsoms as gf

In [2]:
import chart_studio

# SET UP PLOTLY KEYS 
try:
    KEY = os.environ['PLOTAPI']
    USER = os.environ['PLOTID']
except KeyError:
    sys.exit('keys not found')


chart_studio.tools.set_credentials_file(username = USER, api_key = KEY)

In [3]:
pio.templates.default = "plotly_dark"

### https://towardsdatascience.com/how-to-create-a-plotly-visualization-and-embed-it-on-websites-517c1a78568b

In [4]:
# import plotly.offline as py
# import plotly.graph_objs as go

# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode(connected=True)  # for plots to render in jupyter notebook

# py.init_notebook_mode()
# py.iplot(data)

In [5]:
# CUSTOM IMPORTS AND SETTINGS 
pd.options.display.max_columns = 90                     # view settings
pd.options.display.max_rows = 100

path = '../data/processed/'

In [6]:
# function to covert to datetimeinedex 
# def date_index(df): 
#     df['d'] = pd.to_datetime(df['d'])
#     df = df.set_index('d')
#     df.rename(columns = {'ridership' : 'y'}, inplace = True)

#     return(df)

In [7]:
# function to output HTML to embed in wordpress
def plot_out(filename, figname):
    import plotly as plt
    out_text = plt.offline.plot(figname, include_plotlyjs=False, output_type='div');

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(out_text)

In [8]:
# open file
filename = path + '2022_daily_by_station.csv'
bart = pd.read_csv(filename)
split_date = '2020-03-20'
bart.head()

Unnamed: 0,d,origin,ridership
0,2022-01-01,12TH,801
1,2022-01-01,16TH,1325
2,2022-01-01,19TH,655
3,2022-01-01,24TH,1251
4,2022-01-01,ANTC,272


In [14]:
# current name of column holding date 
col_title = 'd'
# sets date as time index
bart = gf.dt_index(bart, col_title)
bart.head()

Unnamed: 0_level_0,d,origin,ridership
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-01,2022-01-01,12TH,801
2022-01-01,2022-01-01,16TH,1325
2022-01-01,2022-01-01,19TH,655
2022-01-01,2022-01-01,24TH,1251
2022-01-01,2022-01-01,ANTC,272


In [10]:
# # add columns: day name and COVID note
# bart['day'] = bart.index.day_name()
# bart['covid'] = 'Pre-COVID'
# bart['covid'][split_date:] = 'Post-03/20'

In [11]:
bart.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4600 entries, 2022-01-01 to 2022-04-02
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   d          4600 non-null   object
 1   origin     4600 non-null   object
 2   ridership  4600 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 143.8+ KB


In [12]:
bart.describe()

Unnamed: 0,ridership
count,4600.0
mean,1819.278043
std,1765.500359
min,98.0
25%,714.0
50%,1268.0
75%,2209.0
max,12691.0


In [13]:
import plotly.express as px

df = bart
fig = px.line(df, x=df.index, y='y', color='origin')
fig.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['d', 'origin', 'ridership'] but received: y

In [None]:
import chart_studio.plotly as py
py.plot(fig, filename = 'ridershipByStation', auto_open=True)

In [None]:
df = bart

fig = px.histogram(df, 
        x = 'day', 
        y = 'y', 
        color = 'covid',
        # marginal='rug', # or violin, rug, box
        histfunc = 'avg',
        hover_data = df.columns, 
        labels = {
                'y' : 'Number of Daily Exits', 
                'day': 'Day of Week' })
fig.update_layout(
        title={
                'text': 'BART Daily Ridership by Day of Week, Pre-COVID and After',
                'y': 0.9,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'}, 
        legend = dict(
                orientation = 'h', 
                title = None, 
                y = 1.),        # location of legend above chart or within
        barmode = 'overlay', 
        ) 
fig.update_traces(opacity=0.80)
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

fig.layout.images = [dict(
        source='https://blog.giovannaguevara.net/wp-content/uploads/2020/02/siteLogo.png',
        xref="paper", yref="paper",
        x=0.1, y=1.05,
        sizex=0.4, sizey=0.4,
        xanchor="center", yanchor="bottom"
      )]

# output HTML for embed
fname = 'avg_rid.txt'
plot_out(fname, fig)

In [None]:
df = bart[:split_date]
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'})
fig.update_layout(
    title={
        'text': 'BART Daily Ridership by Day of Week, Jan 2011 - Feb 2022',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

# output HTML for embed
fname = 'violin_pre.txt'
plot_out(fname, fig)

In [None]:
df = bart[split_date:]
fig = px.violin (df, 
    y = 'y', 
    x = 'day',
    labels = {
        'y' : 'Number of Daily Exits, Avg', 
        'day': 'Day of Week'})
fig.update_layout(
    title={
        'text': 'BART Daily Ridership by Day of Week, March 20, 2020 - June 3, 2022',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

# output HTML for embed
fname = 'violin_post.txt'
plot_out(fname, fig)

In [None]:
# descriptives pre-covid
desc = bart[:split_date].groupby(['day']).describe()
desc

In [None]:
# descriptives pre-covid
desc_post = bart[split_date:].groupby(['day']).describe()
desc_post

In [None]:
# Events for top-10 ridership dates
notes = [
    'SF Giants Parade (2012) & Halloween', 
    'Warriors Parade (2015), Oakland', 
    'Super Bowl L Village (2016)', 
    'Warriors Parade, Oakland (2017)', 
    'SF Giants Parade (2014) & Halloween', 
    'Warriors Parade, Oakland (2018)', 
    'Super Bowl L Village (2016)', 
    'Bay Bridge multi-day closure for (new) eastern span (2013)', 
    'Super Bowl L Village (2016)', 
    '??? Oakland A\'s AML wildcard Game 6??? (2016)'
]

In [None]:
# top 10 filter
filter_n = 10
top_n = bart.sort_values(by = ['y'], ascending = False).head(filter_n)[['day', 'y']]
top_n['notes'] = notes
top_n.reset_index(inplace = True)
top_n.head()

In [None]:
df = top_n
fig = px.bar(df, 
        x = 'y', 
        y = df.index, 
        orientation='h', 
        # hover_data = ['day', 'date'], 
        text = 'notes',
    labels = {
        'y': 'Total Exits', 
        'x': 'Date & Event'},
        )

fig.update_layout(
    hovermode = 'y',
    title={
        'text': 'Top 10 Ridership Events',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        )
fig.update_traces(hovertemplate=None)        
fig.update_xaxes(categoryorder = 'total ascending')
fig.show()

fname = 'top_10.txt'
plot_out(fname, fig)

In [None]:
fig = go.Figure(go.Scatter(
            y = bart.y,
            x = bart.ds.tolist(),
            orientation='h'))

fig.update_layout(
    title={
        'text': "BART Daily Ridership, Jan 2011 - May 2022",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

fname = 'all_daily.txt'
plot_out(fname, fig)