In [58]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import warnings
import prophet
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector
warnings.filterwarnings("ignore")

In [64]:
data = pd.read_csv('../data/regional_multivariate_data.csv')
enc = pd.read_csv('../data/regional_datasets/multivariate/enc_data_multi.csv')
esc = pd.read_csv('../data/regional_datasets/multivariate/esc_data_multi.csv')
midatl = pd.read_csv('../data/regional_datasets/multivariate/mid_atlantic_data_multi.csv')
mount = pd.read_csv('../data/regional_datasets/multivariate/mountain_data_multi.csv')
ne = pd.read_csv('../data/regional_datasets/multivariate/new_england_data_multi.csv')
pac = pd.read_csv('../data/regional_datasets/multivariate/pacific_data_multi.csv')
satl = pd.read_csv('../data/regional_datasets/multivariate/south_atlantic_data_multi.csv')
wnc = pd.read_csv('../data/regional_datasets/multivariate/wnc_data_multi.csv')
wsc = pd.read_csv('../data/regional_datasets/multivariate/wsc_data_multi.csv')

In [41]:
sets = [enc, esc, midatl, mount, ne, pac, satl, wnc, wsc]

In [57]:
forwarddict = dict()
for temp in sets:
    tempdata = temp.dropna()
    region = tempdata.region.unique()[0]
    print(region)
    xtemp = tempdata.drop(columns = ['region', 'date', 'new_confirmed'])
    feature_names = xtemp.columns
    ytemp = tempdata.new_confirmed
    model = LinearRegression()
    ffs = SequentialFeatureSelector(estimator = model, direction = 'forward', tol = 0.001).fit(xtemp, ytemp)
    print("Number of Forward Selection Features: ", len(feature_names[ffs.get_support()]))
    forwarddict[region] = feature_names[ffs.get_support()]

East North Central
Number of Forward Selection Features:  4
East South Central
Number of Forward Selection Features:  6
Mid-Atlantic
Number of Forward Selection Features:  2
Mountain
Number of Forward Selection Features:  3
New England
Number of Forward Selection Features:  3
Pacific
Number of Forward Selection Features:  4
South Atlantic
Number of Forward Selection Features:  4
West North Central
Number of Forward Selection Features:  3
West South Central
Number of Forward Selection Features:  1


In [48]:
backwarddict = dict()
for temp in sets:
    tempdata = temp.dropna()
    region = tempdata.region.unique()[0]
    print(region)
    xtemp = tempdata.drop(columns = ['region', 'date', 'new_confirmed'])
    feature_names = xtemp.columns
    ytemp = tempdata.new_confirmed
    model = LinearRegression()
    bfs = SequentialFeatureSelector(estimator = model, direction = 'backward', tol = 0.001).fit(xtemp, ytemp)
    print(len(feature_names[bfs.get_support()]))
    backwarddict[region] = feature_names[bfs.get_support()]

East North Central
29
East South Central
23
Mid-Atlantic
31
Mountain
32
New England
16
Pacific
40
South Atlantic
25
West North Central
36
West South Central
25


In [52]:
for temp in sets:
    data1 = temp.dropna()
    region = data1.region.unique()[0]
    xtemp = data1[forwarddict[region]]
    ytemp = data1.new_confirmed
    model = LinearRegression()
    model.fit(xtemp, ytemp)
    print(region)
    print(model.score(xtemp, ytemp))

East North Central
0.7999337394969832
East South Central
0.7383243838277345
Mid-Atlantic
0.907508189743158
Mountain
0.7372811042995708
New England
0.6669444917700889
Pacific
0.8151644799301857
South Atlantic
0.6676180623571739
West North Central
0.7321372778065126
West South Central
0.7464990324965946


In [53]:
for temp in sets:
    data1 = temp.dropna()
    region = data1.region.unique()[0]
    print(region)
    xtemp = data1[backwarddict[region]]
    ytemp = data1.new_confirmed
    model = LinearRegression()
    model.fit(xtemp, ytemp)
    print(model.score(xtemp, ytemp))

East North Central
0.8252650163733761
East South Central
0.7597579225161926
Mid-Atlantic
0.945553030754114
Mountain
0.7136379915008402
New England
0.5693928726319077
Pacific
0.8469047830460603
South Atlantic
0.6447287908386374
West North Central
0.7917780175486865
West South Central
0.8086792133144673
