# TEST OF STATSMODELS VARMAX

In [1]:
%matplotlib inline

In [2]:
# Install Dependencies
import statsmodels.api as sm
import pandas as pd
from patsy import dmatrices
from darts import TimeSeries

In [3]:
# import dataset
cases_deaths_df=pd.read_csv("Potential Data Sources/NYT- Cty Cases Deaths Thru Oct2/us-counties.csv",index_col='date', 
                  parse_dates=['date'])
cases_deaths_df.head()

Unnamed: 0_level_0,county,state,fips,cases,deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-21,Snohomish,Washington,53061.0,1,0
2020-01-22,Snohomish,Washington,53061.0,1,0
2020-01-23,Snohomish,Washington,53061.0,1,0
2020-01-24,Cook,Illinois,17031.0,1,0
2020-01-24,Snohomish,Washington,53061.0,1,0


In [4]:
# ensure each county/ date combination only has one summed value
grouped=cases_deaths_df.groupby(['date', 'state','county','fips']).sum()
grouped=grouped.reset_index()
grouped=grouped.set_index('date')
grouped.head()

Unnamed: 0_level_0,state,county,fips,cases,deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-21,Washington,Snohomish,53061.0,1,0
2020-01-22,Washington,Snohomish,53061.0,1,0
2020-01-23,Washington,Snohomish,53061.0,1,0
2020-01-24,Illinois,Cook,17031.0,1,0
2020-01-24,Washington,Snohomish,53061.0,1,0


In [5]:
# separate cases and deaths into different dataframes and drop the unneccessary columns
dropped_cases= grouped.drop(['state', 'county','deaths'], axis=1)
dropped_deaths=grouped.drop(['state', 'county','cases'], axis=1)
dropped_cases.head()


Unnamed: 0_level_0,fips,cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-21,53061.0,1
2020-01-22,53061.0,1
2020-01-23,53061.0,1
2020-01-24,17031.0,1
2020-01-24,53061.0,1


In [6]:
# pivot so only one instance of each day in the index with fips as column names and cases as values
pivoted_cases= dropped_cases.pivot(columns='fips', values='cases')
pivoted_cases.fillna(0,inplace=True)
pivoted_cases.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# pivot so only one instance of each day in the index with fips as column names and deaths as values
pivoted_deaths= dropped_deaths.pivot(columns='fips', values='deaths')
pivoted_deaths.fillna(0,inplace=True)
pivoted_deaths.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
# transform data- expect it to be exponential so use log scale, add 1 to count before taking log so as to not get infinite values.
import numpy as np
transformed_cases=np.log(pivoted_cases+1)
np.isfinite(transformed_cases)

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-01-22,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-01-23,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-01-24,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-01-25,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-09-29,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-09-30,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-10-01,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2020-10-02,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True


In [28]:
# fill all n/a values with 0
transformed_cases.fillna(0,inplace=True)
transformed_cases.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 257 entries, 2020-01-21 to 2020-10-03
Columns: 3210 entries, 1001.0 to 78030.0
dtypes: float64(3210)
memory usage: 6.3 MB


In [20]:
# transform data- expect it to be exponential so use log scale, add 1 to count before taking log so as to not get infinite values.
transformed_deaths=np.log(pivoted_deaths+1)
transformed_deaths.fillna(0,inplace=True)
transformed_deaths.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
# fill all n/a values with 0
transformed_deaths.fillna(0,inplace=True)
transformed_deaths.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 257 entries, 2020-01-21 to 2020-10-03
Columns: 3210 entries, 1001.0 to 78030.0
dtypes: float64(3210)
memory usage: 6.3 MB


In [51]:
# split test data for cases into september onward
import datetime 
Test_cases=transformed_cases.loc[datetime.date(year=2020,month=9,day=1):datetime.date(year=2020,month=10,day=3)]
Test_cases.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-09-01,7.282074,8.42705,6.639876,6.326149,7.173192,6.327937,6.770789,7.87093,6.948897,6.035481,...,4.624973,6.173786,6.489205,4.465908,4.584967,5.32301,5.762051,6.049733,3.637586,6.527958
2020-09-02,7.281386,8.430327,6.645091,6.33328,7.177019,6.331502,6.771936,7.876259,6.952729,6.042633,...,4.644391,6.180017,6.507278,4.477337,4.59512,5.370638,5.768321,6.049733,3.637586,6.529419
2020-09-03,7.290975,8.440096,6.648985,6.336826,7.198184,6.335054,6.774224,7.893199,6.953684,6.061457,...,4.644391,6.184149,6.517671,4.477337,4.59512,5.379897,5.774552,6.054439,3.713572,6.530878
2020-09-04,7.297091,8.445697,6.65544,6.347389,7.216709,6.335054,6.782192,7.918629,6.961296,6.09357,...,4.65396,6.190315,6.520621,4.442651,4.59512,5.379897,5.774552,6.054439,3.713572,6.530878
2020-09-05,7.308543,8.452548,6.65544,6.357842,7.227662,6.340359,6.787845,7.948385,6.966024,6.109248,...,4.672829,6.198479,6.529419,4.442651,4.59512,5.398163,5.774552,6.061457,3.78419,6.546785


In [52]:
# split test data for deaths into september onward
Test_deaths=transformed_deaths.loc[datetime.date(year=2020,month=9,day=1):datetime.date(year=2020,month=10,day=3)]
Test_deaths.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-09-01,3.218876,3.7612,2.079442,2.197225,2.484907,2.70805,3.637586,3.663562,3.713572,2.564949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791759,0.0,2.397895
2020-09-02,3.218876,3.7612,2.079442,2.197225,2.484907,2.70805,3.637586,3.663562,3.713572,2.564949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791759,0.0,2.397895
2020-09-03,3.218876,3.806662,2.079442,2.197225,2.484907,2.70805,3.637586,3.663562,3.713572,2.564949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791759,0.693147,2.397895
2020-09-04,3.218876,3.850148,2.079442,2.302585,2.564949,2.70805,3.663562,3.663562,3.73767,2.564949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791759,0.693147,2.397895
2020-09-05,3.218876,3.850148,2.079442,2.302585,2.564949,2.70805,3.663562,3.688879,3.73767,2.564949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791759,0.693147,2.397895


In [53]:
# split train data for cases into end of august
Train_cases= transformed_cases.loc[datetime.date(year=2020,month=1,day=1):datetime.date(year=2020,month=8,day=31)]
Train_cases.tail(20)

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-12,7.124478,8.236685,6.472346,6.142037,6.875232,6.216606,6.670766,7.568379,6.823286,5.739793,...,4.369448,5.587249,6.095825,4.276666,4.454347,4.983607,5.556828,5.723585,3.044522,5.752573
2020-08-13,7.131699,8.246434,6.480045,6.152733,6.88551,6.2186,6.673298,7.5807,6.824374,5.771441,...,4.382027,5.63479,6.100319,4.204693,4.442651,4.927254,5.583496,5.768321,3.135494,5.897154
2020-08-14,7.133296,8.258681,6.487684,6.169611,6.897705,6.22059,6.680855,7.58883,6.827629,5.78996,...,4.394449,5.690359,6.139885,4.219508,4.442651,4.962845,5.590987,5.78996,3.178054,5.955837
2020-08-15,7.141245,8.271293,6.498282,6.182085,6.904751,6.222576,6.694562,7.610853,6.830874,5.808142,...,4.394449,5.717028,6.171701,4.248495,4.454347,5.01728,5.602119,5.78996,3.178054,5.955837
2020-08-16,7.149917,8.281218,6.510258,6.182085,6.913737,6.224558,6.6995,7.619724,6.833032,5.820083,...,4.406719,5.752573,6.184149,4.26268,4.488636,5.030438,5.609472,5.786897,3.178054,5.976351
2020-08-17,7.150701,8.284252,6.511745,6.192362,6.916715,6.224558,6.703188,7.628518,6.834109,5.831882,...,4.406719,5.799093,6.222576,4.304065,4.49981,5.062595,5.624018,5.786897,3.178054,6.023448
2020-08-18,7.163947,8.288534,6.514713,6.196444,6.942157,6.240276,6.704414,7.653495,6.84375,5.852202,...,4.49981,5.83773,6.270988,4.343805,4.51086,5.105945,5.631212,5.817111,3.178054,6.084499
2020-08-19,7.16935,8.294799,6.527958,6.222576,6.956545,6.274762,6.704414,7.677864,6.848005,5.857933,...,4.51086,5.846439,6.297109,4.343805,4.51086,5.123964,5.641907,5.840642,3.258097,6.133398
2020-08-20,7.16935,8.303009,6.54103,6.230481,6.982863,6.285998,6.705639,7.703008,6.856462,5.877736,...,4.51086,5.863631,6.313548,4.343805,4.51086,5.147494,5.645447,5.886104,3.332205,6.182085
2020-08-21,7.180831,8.312871,6.54535,6.244167,6.999422,6.287859,6.712956,7.717796,6.860664,5.891644,...,4.532599,5.891644,6.361302,4.356709,4.51086,5.17615,5.652489,5.942799,3.496508,6.308098


In [76]:
# check for columns that are constant values throughout the train cases data and remove them (the model cant process) 
# repeat with test cases but still use train cases data to define the columns to drop/keep
train_cases_no_constants = Train_cases.loc[:,Train_cases.apply(pd.Series.nunique) != 1]
train_cases_fips_constant= Train_cases.loc[:,Train_cases.apply(pd.Series.nunique) == 1]
test_cases_no_constants = Test_cases.loc[:,Train_cases.apply(pd.Series.nunique) != 1]
test_cases_fips_constant= Test_cases.loc[:,Train_cases.apply(pd.Series.nunique) == 1]
train_cases_no_constants.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [77]:
test_cases_no_constants.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-09-01,7.282074,8.42705,6.639876,6.326149,7.173192,6.327937,6.770789,7.87093,6.948897,6.035481,...,4.624973,6.173786,6.489205,4.465908,4.584967,5.32301,5.762051,6.049733,3.637586,6.527958
2020-09-02,7.281386,8.430327,6.645091,6.33328,7.177019,6.331502,6.771936,7.876259,6.952729,6.042633,...,4.644391,6.180017,6.507278,4.477337,4.59512,5.370638,5.768321,6.049733,3.637586,6.529419
2020-09-03,7.290975,8.440096,6.648985,6.336826,7.198184,6.335054,6.774224,7.893199,6.953684,6.061457,...,4.644391,6.184149,6.517671,4.477337,4.59512,5.379897,5.774552,6.054439,3.713572,6.530878
2020-09-04,7.297091,8.445697,6.65544,6.347389,7.216709,6.335054,6.782192,7.918629,6.961296,6.09357,...,4.65396,6.190315,6.520621,4.442651,4.59512,5.379897,5.774552,6.054439,3.713572,6.530878
2020-09-05,7.308543,8.452548,6.65544,6.357842,7.227662,6.340359,6.787845,7.948385,6.966024,6.109248,...,4.672829,6.198479,6.529419,4.442651,4.59512,5.398163,5.774552,6.061457,3.78419,6.546785


In [78]:
train_cases_fips_constant.head()

fips,8061.0,30011.0,31009.0,31075.0,35011.0,48033.0,49009.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [79]:
test_cases_fips_constant.head()

fips,8061.0,30011.0,31009.0,31075.0,35011.0,48033.0,49009.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-09-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-09-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-09-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-09-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-09-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [80]:
# split train data for deaths into end of august
Train_deaths= transformed_deaths.loc[datetime.date(year=2020,month=1,day=1):datetime.date(year=2020,month=8,day=31)]
Train_deaths.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,72141.0,72143.0,72145.0,72147.0,72149.0,72151.0,72153.0,78010.0,78020.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
# check for columns that are constant values throughout the train deaths data and remove them (the model cant process)
train_deaths_no_constants = Train_deaths.loc[:,Train_deaths.apply(pd.Series.nunique) != 1]
train_deaths_fips_constant= Train_deaths.loc[:,Train_deaths.apply(pd.Series.nunique) == 1]
test_deaths_no_constants = Test_deaths.loc[:,Train_deaths.apply(pd.Series.nunique) != 1]
test_deaths_fips_constant= Test_deaths.loc[:,Train_deaths.apply(pd.Series.nunique) == 1]
train_deaths_no_constants.head()

fips,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,69110.0,78010.0,78030.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
train_deaths_fips_constant.columns

Float64Index([ 2013.0,  2016.0,  2060.0,  2068.0,  2070.0,  2100.0,  2130.0,
               2150.0,  2158.0,  2164.0,
              ...
              72137.0, 72139.0, 72141.0, 72143.0, 72145.0, 72147.0, 72149.0,
              72151.0, 72153.0, 78020.0],
             dtype='float64', name='fips', length=701)

In [84]:
test_deaths_fips_constant.columns

Float64Index([ 2013.0,  2016.0,  2060.0,  2068.0,  2070.0,  2100.0,  2130.0,
               2150.0,  2158.0,  2164.0,
              ...
              72137.0, 72139.0, 72141.0, 72143.0, 72145.0, 72147.0, 72149.0,
              72151.0, 72153.0, 78020.0],
             dtype='float64', name='fips', length=701)

In [91]:
from statsmodels.tsa.api import VAR
cases_model=VAR(train_cases_no_constants)
results = cases_model.fit()



In [92]:
results.summary()

LinAlgError: 207-th leading minor of the array is not positive definite