# Read In

In [None]:
df = pd.read_csv('data/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv')

In [None]:
df[df['Location'] == 'NJ'][['Date', 'Distributed', 'Dist_Per_100K',
                            'Administered', 'Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]

In [None]:
fatal_df = pd.read_csv('data/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv')

In [None]:
fatal_df['submission_date'] = pd.to_datetime(fatal_df['submission_date'])

In [None]:
fatal_df.columns

In [None]:
fatal_df[fatal_df['state'] == 'NJ'].sort_values(by = 'submission_date')[['submission_date', 'new_case']]

In [5]:
states = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
#        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
#        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
#        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
#        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
#        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
#        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
#        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

# ARIMA

In [None]:
nj_test_df = df[df['Location'] == 'NJ'][['Date', 'Distributed', 'Dist_Per_100K',
                            'Administered', 'Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]
nj_test_df['Date'] = pd.to_datetime(nj_test_df['Date'])

nj_fatal_test_df = fatal_df[fatal_df['state'] == 'NJ'].sort_values(by = 'submission_date')[['submission_date', 'new_death']]
nj_fatal_test_df.rename(columns = {'submission_date': 'Date'}, inplace = True)

nj_fatal_test_df.drop(index = nj_fatal_test_df[nj_fatal_test_df['Date'] == '2021-07-05'].index[0], inplace = True)

nj_fatal_test_df['new_death'] = np.where(nj_fatal_test_df['new_death'] < 0, 3,
                                         np.where(nj_fatal_test_df['new_death'] == 190, 3,
                                                  nj_fatal_test_df['new_death']))

nj_test_df.set_index(keys = 'Date', drop = True, inplace = True)
nj_fatal_test_df.set_index(keys = 'Date', drop = True, inplace = True)
test_df = pd.concat([nj_test_df, nj_fatal_test_df], join = 'outer', axis = 1)
test_df = test_df.fillna(0)
#test_df = test_df.resample('W').mean()

In [None]:
test_df.describe()

In [None]:
seasonal_decompose(test_df['new_death'], model = 'additive').plot();

In [None]:
test_train = test_df.iloc[:len(test_df) - 10]
test_test = test_df.iloc[len(test_df) - 10:]

In [None]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 7) for x in list(itertools.product(p, d, q))]

In [None]:
parameter_results = pd.DataFrame(columns = ['order', 'seasonal_order', 'aic'])

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod=SARIMAX(test_train['new_death'],
                        exog = test_train[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']],
                        order=param,
                        seasonal_order=param_seasonal,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
            results = mod.fit()
            parameter_results = parameter_results.append({'order': param,
                                                          'seasonal_order': param_seasonal,
                                                          'aic': results.aic},
                                                         ignore_index = True)
            print(f'Tested {param}, {param_seasonal}.')
        except: 
            print('Oops!')
            continue

In [None]:
#pickle_out = open("pickle/parameter_results.pickle","wb")
#pickle.dump(parameter_results, pickle_out)
#pickle_out.close()

infile = open("pickle/parameter_results.pickle",'rb')
parameter_results = pickle.load(infile)
infile.close()

In [None]:
lowest_aic = parameter_results.sort_values('aic', ascending = True).head(1)

In [None]:
sari=SARIMAX(test_train['new_death'],
            exog = test_train[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']],
            order=lowest_aic['order'].values[0],
            seasonal_order=lowest_aic['seasonal_order'].values[0],
            enforce_stationarity=False,
            enforce_invertibility=False).fit()

In [None]:
sari_predict = sari.predict(dynamic = False,
#                            typ = 'levels',
                            exog = test_df[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]
)

In [None]:
ax = test_df['new_death'].plot(legend=True,figsize=(12,6))
sari_predict.plot(legend=True)
ax.autoscale(axis='x', tight=True);

In [None]:
sari_test_predict = sari.predict(dynamic = len(test_train)-80,
                                 exog = test_df[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]*.02
)

In [None]:
ax = test_df['new_death'].plot(legend=True,figsize=(12,6))
sari_predict.plot(legend=True)
sari_test_predict.plot(legend=True)
ax.autoscale(axis='x', tight=True);

In [None]:
test_df_week = test_df.resample('W').mean()

In [None]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 4) for x in list(itertools.product(p, d, q))]

In [None]:
arima_parameter_results = pd.DataFrame(columns = ['order', 'seasonal_order', 'aic'])

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod=SARIMAX(test_df_week['new_death'],
                        exog = test_df_week[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']],
                        order=param,
                        seasonal_order=param_seasonal,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
            results = mod.fit()
            arima_parameter_results = arima_parameter_results.append({'order': param,
                                                          'seasonal_order': param_seasonal,
                                                          'aic': results.aic},
                                                         ignore_index = True)
            print(f'Tested {param}, {param_seasonal}.')
        except: 
            print('Oops!')
            continue

In [None]:
week_lowest_aic = arima_parameter_results.sort_values('aic', ascending = True).head(1)

In [None]:
sari_week=SARIMAX(test_df_week['new_death'],
            exog = test_df_week[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']],
            order=week_lowest_aic['order'].values[0],
            seasonal_order=week_lowest_aic['seasonal_order'].values[0],
            enforce_stationarity=False,
            enforce_invertibility=False).fit()

In [None]:
sari_week_predict = sari_week.predict(dynamic = False,
#                            typ = 'levels',
                            exog = test_df_week[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]
)
sari_week_test_predict = sari_week.predict(dynamic = len(test_df_week)-4,
                                 exog = test_df_week[['Admin_Per_100K', 'Administered_Dose1_Pop_Pct']]
)

ax = test_df_week['new_death'].plot(legend=True,figsize=(12,6))
sari_week_predict.plot(legend=True)
sari_week_test_predict.plot(legend=True)
ax.autoscale(axis='x', tight=True);