In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from datetime import date
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

#%matplotlib inline
pd.options.display.max_columns = 100
pd.options.display.max_rows = 300

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

### JHU Data for ARIMA Daily Model

In [None]:
df=pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

us_df = df[df['Country/Region'] == 'US']

us_df = us_df.T
us_df.reset_index(inplace=True)
us_df = us_df.iloc[4:,]
us_df.reset_index(drop=True, inplace=True)
us_df.rename(columns={'index':'date', 225:'counts'}, inplace=True)
us_df['date'] = us_df.date.apply(lambda x: pd.to_datetime(str(x), format="%m/%d/%y"))

us_df['daily_diff'] = us_df['counts'].diff()
us_df.loc[0, 'daily_diff'] = 0
us_df.set_index('date', inplace=True)
us_daily = us_df[['daily_diff']].copy()
us_daily['daily_diff'] = us_daily['daily_diff'].astype(int)

In [None]:
plt.figure(figsize=(10,6))
plt.title('Daily Case Counts in US', fontname='Arial', size=24)
plt.xticks(rotation=45)
plt.plot(us_daily, color='#003366', linewidth=3);

In [None]:
model = ARIMA(us_daily, order=(7,1,1))
daily_fit = model.fit()

In [None]:
forecast = daily_fit.forecast(21)
forecast

In [None]:
len(forecast[0])

In [None]:
us_daily.reset_index(inplace=True)

# create loop dates and positions to label the x_axis
s_date = us_daily.date[0]
e_date = us_daily.date.max() + timedelta(days=len(forecast[0]))
delta = timedelta(days=4)
position=[]
n = 0
dates = []
while s_date <= e_date:
    position.append(n)
    n = n+4
    s_date.strftime("%Y-%m-%d")
    dates.append(str(s_date.month)+'-'+str(s_date.day))
    s_date += delta

dummy=[];

In [None]:
plt.figure(figsize=(12,8))
plt.title('Daily Case Projections in US', fontname='Arial', size=24)
plt.xticks(position, dates, rotation=45)
plt.xlabel('Date', fontname='Arial', size=16)
plt.ylabel('Number of Cases', fontname='Arial', size=16)
plt.plot(np.arange(0, us_daily.shape[0]) ,us_daily['daily_diff'].values, color='#003366', linewidth=3, label='US Daily Cases')
plt.plot(np.arange(us_daily.shape[0], us_daily.shape[0]+len(forecast[0])), forecast[0], color='#7D98C1', linewidth=3, linestyle='-', label='Projected US Daily Cases')
plt.plot(np.arange(us_daily.shape[0], us_daily.shape[0]+len(forecast[0])), forecast[2][0:], color='#C01933', linestyle=':', label='Projected Upper/Low ')
plt.legend()
x_coor = us_daily.shape[0]
for i in range(0,len(forecast[0])):
    plt.fill_between(range(x_coor, x_coor+1), forecast[2][i][0], forecast[2][i][1], color='#D0D0CE', alpha=0.5, linewidth=3)
    x_coor +=1

dummy=[];

In [None]:
st_date = date.today() - timedelta(days=1)

curr_date = us_df.iloc[-1:]
curr_date.reset_index(inplace=True)
curr_date.loc[0, 'date'] = curr_date.date[0].strftime('%-m/%-d/%y') 
curr_date = curr_date[['date', 'counts']].copy()
curr_date.set_index('date', inplace=True)

cols = ['date', 'counts', 'Lower', 'Med', 'Upper']
predictions = [[st_date.strftime('%-m/%-d/%y'),
               curr_date['counts'][0],
               curr_date['counts'][0],
               curr_date['counts'][0],
               curr_date['counts'][0]]
              ]

n=1

for i in np.arange(0, len(forecast[0])):
    date = st_date + timedelta(days=n)
    n = n + 1
    date = date.strftime('%-m/%-d/%y')
    counts = ''
    pred = forecast[0][i]
    lower = forecast[2][i][0]
    upper = forecast[2][i][1]
    predictions.append([date, counts, lower, pred, upper])

projections = pd.DataFrame(predictions, columns=cols)
projections.set_index('date', inplace=True)
projections.index.name = None
projections['Lower CI'] = np.cumsum(projections['Lower'])
projections['Medium'] = np.cumsum(projections['Med'])
projections['Upper CI'] = np.cumsum(projections['Upper'])
projections.drop(['Lower', 'Med', 'Upper'], axis=1, inplace=True)
projections

#projections.to_csv('test.csv')