# 2 | Importing Data for Initial EDA, Visualizations
---
* [01 API Data Requests](01_API_pulls.ipynb)
* [01.1 Additional BART Data](01_v2_bart.ipynb.ipynb)
* _[02 Initial EDA](02_EDA.ipynb)_
* [03 First Model: Prophet](03_prophet.ipynb)
---

### Data Discussion

* [BART](bart.gov) Publishes monthly rerpots, with daily ridership that month, using faregate counts for on and off boarding.
* [EIA](https://www.eia.gov/opendata/qb.php?category=240839&sdid=PET.EMM_EPM0_PTE_SCA_DPG.M) publishes monthly and weekly fuel rates 
* [CA Energy](https://www.energy.ca.gov/data-reports/energy-almanac/zero-emission-vehicle-and-infrastructure-statistics/vehicle-population) publishes vehicle counts annualy. DMV and CA Data only provide annual counts. 
* [Fed Reserve](federalreserve.gov) publishes yearly consumer debt 

In [None]:
# pip install ipywidgets

In [58]:
##### BASIC IMPORTS 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import gcutsoms as gf

In [101]:
###### CUSTOM IMPORTS AND SETTINGS 

import plotly
import chart_studio.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# FOR PLOTLY TO RENDER WITHIN NOTEBOOK, OFFLINE: 
# init_notebook_mode(connected=True)  

# import chart_studio

# SET UP PLOTLY KEYS 
try:
    KEY = os.environ['PLOTAPI']
    USER = os.environ['PLOTID']
except KeyError:
    sys.exit('keys not found')

# API CONNECT CREDENTIALS
chart_studio.tools.set_credentials_file(username = USER, api_key = KEY)

pd.options.display.max_columns = 90                     # view settings
pd.options.display.max_rows = 100

path = '../data/processed/'

In [102]:
def date_index(df): 
    df['date'] = pd.to_datetime(df['ds'])
    df = df.set_index('date')
    df.rename(columns = {'ridership' : 'y'}, inplace = True)

    return(df)

In [122]:
# FUNCTION RETURNS PLOTLY TRACES
# TAKES 3 ARGUMENTS: (dataframe, y, and title for plot)

def plot_traces(df, y, title):
    y_trace = go.Scatter(
                    x = df.index,
                    y = df[y], 
                    name = y + 'trace',
                    line = dict(color = 'blue'),
                    opacity = 0.4)

    layout = dict(title = title)

    fig = dict(data=[y_trace], layout=layout)
    
    # ts = result.timeseries
    # fig = ts.plot()
    # plotly.io.show(fig)

    # py.iplot(fig) 
    plotly.offline.iplot(fig) 
    # iplot(fig)
    return (print ('done') )

In [121]:
import chart_studio.plotly as py
import plotly.graph_objects as go

df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")

data = [go.Bar(x=df.School,
            y=df.Gap)]

py.iplot(data, filename='jupyter-basic_bar')

> <br>
>
> 1. BART Ridership
> 
> <br>

In [105]:
filename = path + 'bart.csv'
bart = pd.read_csv(filename)
bart = date_index(bart)

In [107]:
bart.head()

Unnamed: 0_level_0,ds,y
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-01,2000-1-01,1178324
2000-02-01,2000-2-01,1178420
2000-03-01,2000-3-01,1198004
2000-04-01,2000-4-01,1220648
2000-05-01,2000-5-01,1222712


In [109]:
# bart2 = bart.loc['2011-01-01':]
bart_plot = plot_traces(bart, 'y', 'BART Monthly Ridership, 2011 - 2022')
# bart_plot = plot_traces(bart, 'ridership', 'BART Monthly Ridership, 2011 - 2022')

done


> <br>
>
> 2. Fuel Prices
> 
> <br>

In [110]:
filename = path + 'fuel_w.csv'
fuel_w = pd.read_csv(filename)

fuel_w = date_index(fuel_w)
fuel_w.tail()

Unnamed: 0_level_0,ds,fuel_w
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-06-20,2022-06-20,6.307
2022-06-27,2022-06-27,6.23
2022-07-04,2022-07-04,6.138
2022-07-11,2022-07-11,5.994
2022-07-18,2022-07-18,5.794


In [112]:
fuel2 = fuel_w.loc['2010-01-01':]
fuel_plot2 = plot_traces(fuel2, 'fuel_w', 'Weekly Average Gas Price ($), California: 2010 - 2022')

done


In [113]:
filename = path + 'fuel_m.csv'
fuel_m = pd.read_csv(filename)

fuel_m = date_index(fuel_m)
fuel_m.tail()

Unnamed: 0_level_0,ds,fuel_m
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-02-01,2022-02-01,4.66
2022-03-01,2022-03-01,5.655
2022-04-01,2022-04-01,5.692
2022-05-01,2022-05-01,5.871
2022-06-01,2022-06-01,6.294


In [114]:
fuel3 = fuel_m.loc['2010-01-01':]
fuel_plot3 = plot_traces(fuel3, 'fuel_m', 'Monthly Average Gas Price ($), California: 2010 - 2022')

done


> <br>
>
> 3. Manipulating 'REGISTERED VEHICLES' file: 
> 
> <br>

In [116]:
filename = path + 'vehs.csv'
vehs = pd.read_csv(filename)

vehs = date_index(vehs)
vehs.tail()

Unnamed: 0_level_0,ds,cars
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-01,2017-01-01,28418039
2018-01-01,2018-01-01,28681493
2019-01-01,2019-01-01,29029787
2020-01-01,2020-01-01,28665934
2021-01-01,2021-01-01,29942517


In [117]:
vehs2 = vehs.loc['2010-01-01':]
cars_plot = plot_traces(vehs, 'cars', 'Estimated Count of Registered Cars CA: 2010 - 2021')

done


> <br>
>
> 4. Manipulating 'CONSUMER DEBT' file: 
> 
> <br>

In [118]:
filename = path + 'debt.csv'
debt = pd.read_csv(filename)
debt['ds'] = debt['date']

debt = date_index(debt)
debt.tail()

Unnamed: 0_level_0,debt,ds
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-01,4408.96983,2021-11-01
2021-12-01,4431.91715,2021-12-01
2022-01-01,4448.88285,2022-01-01
2022-02-01,4486.57969,2022-02-01
2022-03-01,4539.01445,2022-03-01


In [119]:
debt2 = debt.loc['2010-01-01':]
debt_plot = plot_traces(debt2, 'debt', 'Consumer Debt ($) 2010 - 2022 (not adjusted, Federal Reserve)')

done
