# Current App 12/31/21

In [None]:
# !pip install fsds

```python
import streamlit as st

from fsds.imports import *
import pandas as pd

import os,glob,sys,joblib,zipfile,json
import datetime as dt
import re

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_dark"

plt.rcParams['figure.figsize'] = (12,6)
pd.set_option('display.max_columns',0)
# fs.check_package_versions(['statsmodels'],fpath=True)



### FORECAST SPECIFIC FUNCTIONS
import statsmodels.api as sms
import statsmodels.tsa.api as tsa
from pmdarima import auto_arima
import project_functions as fn
# from fsds import pandemic as fn
import os,json,glob

with open("FILE_DIRECTORY.json") as f:
    FPATHS = json.load(f)
```

```python
### TItle
st.markdown('# Planning for the Pandemic')
st.markdown("""
- James M. Irving, PhD.
    - james.irving.phd@gmail.com
    - [LinkedIn](https://www.linkedin.com/in/james-irving-4246b571/)
    - [GitHub Repo](https://github.com/jirvingphd/predicting-the-pandemic)

___""")

st.markdown("""## ***Goal***
- Covid-19 and the various strains that have since emerged has upended modern life and fundamentally changed how we function as a society.
- Part of what has made it difficult to tackle the pandemic is the differences between states, state laws/policies, and a lack of public understanding about the predictability of the surges in cases. 
- The goal of this dashboard is to find the provide easy access state-level coronavirus and hospital capacity statistics.
    - Furthermore, I wanted to provide on-demand timeseries forecasts into the near future for all/any of these statistics.
""")



st.markdown('## ***The Data***')
st.markdown('- This dashboard uses data from several APIs and kaggle datasets. To fetch the lateast data, click the button below.')
WORKFLOW_BUTTON = st.button("Fetch new data.",)

st.markdown('> Note: it can take up to 2 minutes to download the data.')

st.markdown("""### Sources
- Coronavirus Data by State- # of Cases/Deaths by State
    - [Kaggle Dataset: "COVID-19 data from John Hopkins University"](https://www.kaggle.com/antgoldbloom/covid19-data-from-john-hopkins-university) 
    - Repackaged version of the data from the [official Johns Hopkins Repository](https://github.com/CSSEGISandData/COVID-19)
- Hospital Hospital & ICU Occupancy Data:
    - [HealthData.gob Api: "COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries API"](https://healthdata.gov/Hospital/COVID-19-Reported-Patient-Impact-and-Hospital-Capa/g62h-syeh)
""")
# RUN_FULL_WORKFLOW=False
```

```python
def load_data(WORKFLOW_BUTTON=False):
    if WORKFLOW_BUTTON == True:
        df_states,STATES = fn.data_acquisition.FULL_WORKFLOW(merge_hospital_data=True)
        ## renaming since merge_hofspital_data=True
    #     DF = df_states.copy()
    #     print(STATES.keys())    
        
    else:
        # print(f"[i] Using previously downloaded data...")
        # df_states = pd.read_pickle(FPATHS['fpath_final_df_pickle'])
        df_states =  pd.read_csv(FPATHS['fpath_final_df_csv'],compression='gzip',parse_dates=['Date'],
                    index_col=[0,1])

    #     with open(FPATHS['fpath_final_states']) as f:
        STATES = joblib.load(FPATHS['fpath_final_states'])
    return df_states,STATES

## load data and save options
df, STATES = load_data(WORKFLOW_BUTTON)
options_stats= df.drop(['Deaths','Cases'],axis=1).columns.tolist()

st.markdown("___")
st.markdown("## ***Overview - Comparing All States***")
## plot state map
n_days = st.slider("PAST N # OF DAYS",value=30,min_value=7,max_value=180)
col = st.selectbox("Which statistic to map?", options_stats)

# calc dates
today = dt.date.today()
end_state = today
start_date = pd.Timestamp(today) - pd.Timedelta(f'{str(n_days)} days')

## get map
map = fn.app_functions.plot_map_columns(df,col=col, last_n_days=n_days,
plot_map=False,return_map=True)

# get just df
df_rank= fn.app_functions.plot_map_columns(df,col=col, last_n_days=n_days,
plot_map=False,return_map=False)

# show map
st.plotly_chart(map)

### Plot same stat for different states
st.markdown("___")
st.markdown('## ***Comparing Selected States***')

## select states and stats
stat_to_compare = st.multiselect("Which statistic to compare?",options_stats,
default=["Cases-New"])
states_to_compare = st.multiselect("Which states to compare?",list(STATES.keys()),
default=["NY",'MD','FL','CA','TX'])

## get and show plot
plot_df = fn.app_functions.get_states_to_plot(df,state_list=states_to_compare,
            plot_cols=stat_to_compare,
                            agg_func= 'mean',
                  rename_cols=True,fill_method='interpolate',
                  remove_outliers=False, state_first=True,
                  threshold_type=['0','%'], diagnose=False)
st.plotly_chart(px.line(plot_df))


st.markdown("___")

# ############################## PRIOR TO  09/21 ###########################
st.markdown('## ***Timeseries Forecasting by State/Statistic***')


default_model_start = today - pd.to_timedelta('365 days')
state_name = st.selectbox('Select State', list(STATES.keys()))
col = st.selectbox("Select statistic",options_stats)
start_date = st.date_input('Start Date for Training Data',
 value=default_model_start)#pd.to_datetime('06-2020'))


df_state = STATES[state_name].loc[start_date:].copy()

# # col = 'Cases-New'
ts = df_state[col].copy()
ax = ts.plot(title=f"{state_name}-{col}");
ax.set_ylabel(col)



st.pyplot(ax.get_figure())# plt.show()


st.markdown("""> **Click "`Run model`" below to start the modeling process for the selected state and statistic.**
-  [!] Warning: the gridsearch process may take several minutes. Try selecting a more recent start date to increase performance.""")


model_q = st.button('Run model.', 
on_click= fn.modeling.make_timeseries_model,args=(STATES,state_name,col))



# st.button('Hit me')
# st.checkbox('Check me out')
# st.radio('Radio', [1,2,3])
# st.multiselect('Multiselect', [1,2,3])
# st.slider('Slide me', min_value=0, max_value=10)
# st.select_slider('Slide to select', options=[1,'2'])
# st.text_input('Enter some text')
# st.number_input('Enter a number')
# st.text_area('Area for textual entry')
# st.date_input('Date input')
# st.time_input('Time entry')
# st.file_uploader('File uploader')
# st.color_picker('Pick a color')

```

# Test Workflow

In [9]:
import streamlit as st

from fsds.imports import *
import pandas as pd

import os,glob,sys,joblib,zipfile,json
import datetime as dt
import re

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_dark"

plt.rcParams['figure.figsize'] = (12,6)
pd.set_option('display.max_columns',0)
# fs.check_package_versions(['statsmodels'],fpath=True)



### FORECAST SPECIFIC FUNCTIONS
import statsmodels.api as sms
import statsmodels.tsa.api as tsa
from pmdarima import auto_arima
import project_functions as fn
# from fsds import pandemic as fn
import os,json,glob

with open("FILE_DIRECTORY.json") as f:
    FPATHS = json.load(f)

In [10]:
def load_data(WORKFLOW_BUTTON=False):
    if WORKFLOW_BUTTON == True:
        df_states,STATES = fn.data_acquisition.FULL_WORKFLOW(merge_hospital_data=True)
        ## renaming since merge_hofspital_data=True
    #     DF = df_states.copy()
    #     print(STATES.keys())    
        
    else:
        # print(f"[i] Using previously downloaded data...")
        # df_states = pd.read_pickle(FPATHS['fpath_final_df_pickle'])
        df_states =  pd.read_csv(FPATHS['fpath_final_df_csv'],compression='gzip',parse_dates=['Date'],
                    index_col=[0,1])

    #     with open(FPATHS['fpath_final_states']) as f:
        STATES = joblib.load(FPATHS['fpath_final_states'])
    return df_states,STATES

In [11]:
WORKFLOW_BUTTON=True
## load data and save options
df, STATES = load_data(WORKFLOW_BUTTON)
options_stats= df.drop(['Deaths','Cases'],axis=1).columns.tolist()

[i] Retrieving kaggle dataset: antgoldbloom/covid19-data-from-john-hopkins-university
- Loading data from RAW_us_confirmed_cases.csv
- Loading data from RAW_us_deaths.csv
[i] Retrieving hospital data from https://healthdata.gov/resource/g62h-syeh.csv
[i] Workflow completed.
	Run time=0:00:50.880413 sec.
[i]The final files of note:
	./data/combined_us_states_full_data.csv
	./data/STATE_DICT.joblib
[i] Final joined data (DF) saved as ./data/FINAL_STATES.csv.gz
[i] Final joined data (DF) saved as ./data/FINAL_STATES.pickle


In [14]:
STATES.keys()

dict_keys(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'])

In [15]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Cases,Hospitalized Currently,ICU-Covid Currently,Deaths-New,Cases-New
Unnamed: 0_level_1,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,2020-03-23,0,39,0.0,0.0,0.0,0.0
AK,2020-03-24,0,43,0.0,0.0,0.0,4.0
AK,2020-03-25,1,50,0.0,0.0,1.0,7.0
AK,2020-03-26,1,64,0.0,0.0,0.0,14.0
AK,2020-03-27,1,75,0.0,0.0,0.0,11.0
...,...,...,...,...,...,...,...
WY,2021-12-26,1526,114242,63.0,28.0,0.0,0.0
WY,2021-12-27,1526,114624,64.0,26.0,0.0,382.0
WY,2021-12-28,1526,114917,69.0,21.0,0.0,293.0
WY,2021-12-29,1526,115242,74.0,25.0,0.0,325.0


### NOTES:
- Will want to change how the start date is determined. Instead of creating `today`, get the last date from one of the states

```python
## CURRENT WAY
# calc dates
today = dt.date.today()
end_state = today
start_date = pd.Timestamp(today) - pd.Timedelta(f'{str(n_days)} days')


## PROPOSED NEW WAY
latest_date = df.droplevel(0).index.max()
end_date = latest_date
start_date = pd.Timestamp(latest_date) - pd.Timedelta(f'{str(n_days)} days')
```

In [17]:
df.loc["MD"].index[-1]

Timestamp('2021-12-30 00:00:00')

In [21]:
df.droplevel(0).index.max()

Timestamp('2021-12-30 00:00:00')

In [None]:

# calc dates
today = dt.date.today()
end_state = today
start_date = pd.Timestamp(today) - pd.Timedelta(f'{str(n_days)} days')