# Experiment batch 5-24

Load some initial candidates for the most likely bins, try out a bunch of different models for performing regression on new covid cases.

# Imports

In [11]:
from src.CovidDataClass import CovidDataClass as cd
from src.cfg import *
import os
import numpy as np
import pandas as pd
import numpy as np

# sklearn modules
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge, ARDRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, explained_variance_score

# Initialize data handler

In [12]:
data = cd(load_local=True, load_clean=True)

In [13]:
# TODO: consider changing bin layout to delta days:
# e.g. [(0, 7), (8, 999)] would turn to [7, 999]


bins_dict = {
  1: [(0, 7), (8, 999)],
  2: [(0, 14), (15, 999)],
  3: [(0, 21), (22, 999)],
  4: [(0, 28), (29, 999)],
  5: [(0, 35), (36, 999)],
  6: [(0, 7), (8, 14), (15, 999)],
  7: [(0, 7), (8, 21), (22, 999)],
  8: [(0, 7), (8, 35), (36, 999)],
  9: [(0, 14), (15, 21), (22, 999)],
  10: [(0, 14), (15, 35), (36, 999)],
  11: [(0, 21), (22, 35), (35, 999)],
}

models_dict = {
  'OLS': LinearRegression(),
  'Ridge': Ridge(),
  'Lasso': Lasso(),
  'ElasticNet': ElasticNet(),
  'DecisionTree': DecisionTreeRegressor(),
  'RandomForest (n=3)': RandomForestRegressor(n_estimators=3),
  'RandomForest (n=10)': RandomForestRegressor(n_estimators=10),
  'RandomForest (n=20)': RandomForestRegressor(n_estimators=20),
  'Bayesian Ridge': BayesianRidge(),
  'ARDRegression': ARDRegression(),
}

metrics_dict = {
  'R^2': r2_score,
  'MSE': mean_squared_error,
  'Explained_var': explained_variance_score,
}


results = data.run_batch(
  bins_dict=bins_dict,
  models_dict=models_dict,
  metrics_dict=metrics_dict,
  K=10,
  verbose=True,
  save_output=True,
  output_json=True,
  filename=RESULTS_DIR + "batch_524.txt",
  json_file=RESULTS_DIR + "batch_524.json",
)

bins: [(0, 7), (8, 999)]
csv saved
data shaped
bins: [(0, 7), (8, 999)]
time elapsed: 42.21660041809082
running model: OLS
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.0865798892788715', 'MSE: 2321.3350459181124', 'Explained_var: 0.08663745106443521']
fold: 1, scores: ['R^2: 0.1268254582126671', 'MSE: 1685.9813584503913', 'Explained_var: 0.12682587166605686']
fold: 2, scores: ['R^2: 0.08952769471014643', 'MSE: 2435.950970928047', 'Explained_var: 0.08961904160066392']
fold: 3, scores: ['R^2: 0.06511956263294338', 'MSE: 3317.5874451872387', 'Explained_var: 0.06514454058736074']
fold: 4, scores: ['R^2: -9.245532737531791e+19', 'MSE: 2.4816050617176282e+23', 'Explained_var: -9.220444738541247e+19']
fold: 5, scores: ['R^2: 0.0797585566098229', 'MSE: 2526.9166618895606', 'Explained_var: 0.07976497959889006']
fold: 6, scores: ['R^2: 0.09762871871890944', 'MSE: 2230.7806673185355', 'Explained_var: 0.09762874304112945']
fold: 7, scores: ['R^

fold: 7, scores: ['R^2: 0.1024749980647095', 'MSE: 2770.751263198413', 'Explained_var: 0.1024809908707991']
fold: 8, scores: ['R^2: 0.07797417999502554', 'MSE: 4246.151562325194', 'Explained_var: 0.07798902225981397']
fold: 9, scores: ['R^2: 0.14474366286201912', 'MSE: 2101.6630727363076', 'Explained_var: 0.1447452396278679']
running model: RandomForest (n=20)
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.09390222408963489', 'MSE: 3369.442032611429', 'Explained_var: 0.09390311269701679']
fold: 1, scores: ['R^2: 0.13985861703773095', 'MSE: 2212.4320437453475', 'Explained_var: 0.13986121932395779']
fold: 2, scores: ['R^2: 0.17277557836411295', 'MSE: 1605.7477703688048', 'Explained_var: 0.1728222208364225']
fold: 3, scores: ['R^2: 0.104846848737847', 'MSE: 2886.2080043982874', 'Explained_var: 0.10485897568729607']
fold: 4, scores: ['R^2: 0.1476490489266008', 'MSE: 1833.1718090934887', 'Explained_var: 0.14791534241354387']
fold: 5, score

fold: 4, scores: ['R^2: 0.08335253238175044', 'MSE: 2012.1928904385554', 'Explained_var: 0.08341453601686155']
fold: 5, scores: ['R^2: 0.055916956427000675', 'MSE: 2891.4217205825234', 'Explained_var: 0.05595206793075358']
fold: 6, scores: ['R^2: 0.060107794326895836', 'MSE: 2940.518985894214', 'Explained_var: 0.06058254773946237']
fold: 7, scores: ['R^2: 0.09974288569741085', 'MSE: 1462.1429828759824', 'Explained_var: 0.10125437139449289']
fold: 8, scores: ['R^2: 0.05201350473882682', 'MSE: 3393.900028942133', 'Explained_var: 0.05216020538629007']
fold: 9, scores: ['R^2: 0.062115172415951436', 'MSE: 2611.1493462858566', 'Explained_var: 0.06215145918658593']
running model: DecisionTree
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.10595532568152155', 'MSE: 2941.139934856417', 'Explained_var: 0.10597202005669526']
fold: 1, scores: ['R^2: 0.12814609729257276', 'MSE: 2643.7121805287316', 'Explained_var: 0.12814723573579956']
fold: 2, sc

fold: 2, scores: ['R^2: 0.09528842002749338', 'MSE: 2104.673913403776', 'Explained_var: 0.09542651665025759']
fold: 3, scores: ['R^2: 0.08219491073432195', 'MSE: 2982.929516893533', 'Explained_var: 0.08219496847123342']
fold: 4, scores: ['R^2: 0.1197330673423943', 'MSE: 1863.3227029983086', 'Explained_var: 0.1197350867545337']
fold: 5, scores: ['R^2: 0.08829508322538138', 'MSE: 2957.4246046475914', 'Explained_var: 0.08833552358266517']
fold: 6, scores: ['R^2: 0.06419473124875053', 'MSE: 3852.5492983760446', 'Explained_var: 0.06467503403858899']
fold: 7, scores: ['R^2: 0.09918347803825434', 'MSE: 2615.7782001624223', 'Explained_var: 0.09918601017251694']
fold: 8, scores: ['R^2: 0.1256538252628434', 'MSE: 1775.3974234549503', 'Explained_var: 0.12585933397481464']
fold: 9, scores: ['R^2: 0.1377778129242715', 'MSE: 1752.739216271742', 'Explained_var: 0.13794415994923437']
running model: Ridge
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0

fold: 0, scores: ['R^2: 0.16527973477901314', 'MSE: 2061.695610485319', 'Explained_var: 0.1653034590397332']
fold: 1, scores: ['R^2: 0.1728538403233283', 'MSE: 1739.950112679541', 'Explained_var: 0.1728939652437569']
fold: 2, scores: ['R^2: 0.19690678473933232', 'MSE: 1600.157829237101', 'Explained_var: 0.19695985626458523']
fold: 3, scores: ['R^2: 0.1033595529932152', 'MSE: 3361.4292074609175', 'Explained_var: 0.10336018995389207']
fold: 4, scores: ['R^2: 0.16435621996260597', 'MSE: 1903.5292920298557', 'Explained_var: 0.16441547302373127']
fold: 5, scores: ['R^2: 0.18535914471601744', 'MSE: 1859.0238263239532', 'Explained_var: 0.18554221512583235']
fold: 6, scores: ['R^2: 0.11992537671301906', 'MSE: 2545.269673563995', 'Explained_var: 0.12002678126478716']
fold: 7, scores: ['R^2: 0.10594266991448886', 'MSE: 3303.0107717129144', 'Explained_var: 0.10594650662029947']
fold: 8, scores: ['R^2: 0.14846383562554122', 'MSE: 2605.125382527072', 'Explained_var: 0.14849806466572468']
fold: 9, s

fold: 8, scores: ['R^2: 0.10098397515147395', 'MSE: 1786.966492408282', 'Explained_var: 0.10102713366588723']
fold: 9, scores: ['R^2: 0.0659156386921328', 'MSE: 2914.3066538087596', 'Explained_var: 0.06594619363947785']
running model: DecisionTree
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.16521144105220809', 'MSE: 2165.2024066295457', 'Explained_var: 0.1652486868881261']
fold: 1, scores: ['R^2: 0.08202029600241267', 'MSE: 4491.237347496372', 'Explained_var: 0.08203134975032056']
fold: 2, scores: ['R^2: 0.13984370151273884', 'MSE: 2522.6038749891727', 'Explained_var: 0.13997850986281468']
fold: 3, scores: ['R^2: 0.18661639041445677', 'MSE: 1658.1095620562874', 'Explained_var: 0.18664606120392013']
fold: 4, scores: ['R^2: 0.16424837110080992', 'MSE: 2267.308715817873', 'Explained_var: 0.1642514573494166']
fold: 5, scores: ['R^2: 0.18082857938098462', 'MSE: 1816.5297394915804', 'Explained_var: 0.18110551431433541']
fold: 6, scores: 

fold: 5, scores: ['R^2: -5.414119550677298e+16', 'MSE: 1.5743631376398487e+20', 'Explained_var: -5.407823065006861e+16']
fold: 6, scores: ['R^2: -1.5003582885216842e+18', 'MSE: 4.114132261615661e+21', 'Explained_var: -1.5003425978715028e+18']
fold: 7, scores: ['R^2: 0.06247371187485651', 'MSE: 4545.445461786905', 'Explained_var: 0.06257683036135886']
fold: 8, scores: ['R^2: 0.1250410763286195', 'MSE: 1818.2996331014658', 'Explained_var: 0.12526532899329723']
fold: 9, scores: ['R^2: 0.10089050168381353', 'MSE: 2665.039616146766', 'Explained_var: 0.10093923059081111']
running model: Ridge
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.1341288970490423', 'MSE: 1929.2939235438496', 'Explained_var: 0.13414449536509654']
fold: 1, scores: ['R^2: 0.07618481372028374', 'MSE: 3420.240264375214', 'Explained_var: 0.07620880099494554']
fold: 2, scores: ['R^2: 0.1283405954028053', 'MSE: 2056.007703415987', 'Explained_var: 0.12846609056403624']
fold

fold: 3, scores: ['R^2: 0.1622934892042145', 'MSE: 2189.2534189384314', 'Explained_var: 0.16236424403639116']
fold: 4, scores: ['R^2: 0.10922075174947354', 'MSE: 3835.022152061652', 'Explained_var: 0.10929872382077288']
fold: 5, scores: ['R^2: 0.17917363140485165', 'MSE: 1913.980792695065', 'Explained_var: 0.17923579695704206']
fold: 6, scores: ['R^2: 0.13410107913627922', 'MSE: 2946.3179554882345', 'Explained_var: 0.13423726512757594']
fold: 7, scores: ['R^2: 0.21390732185412265', 'MSE: 1581.5440778296613', 'Explained_var: 0.2139890662410796']
fold: 8, scores: ['R^2: 0.1853878135346968', 'MSE: 1850.9532859504939', 'Explained_var: 0.18539204353335947']
fold: 9, scores: ['R^2: 0.12597684573128431', 'MSE: 3021.5721980589697', 'Explained_var: 0.12597687706321836']
running model: Bayesian Ridge
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.09683489596759542', 'MSE: 2927.536497331066', 'Explained_var: 0.09685485835993701']
fold: 1, scores

fold: 0, scores: ['R^2: 0.14824862636180103', 'MSE: 2269.315319233179', 'Explained_var: 0.1482587585639814']
fold: 1, scores: ['R^2: 0.17865671290201013', 'MSE: 1735.3660769519615', 'Explained_var: 0.17880084220109482']
fold: 2, scores: ['R^2: 0.17293540021030485', 'MSE: 2232.5791297673854', 'Explained_var: 0.17298983810363677']
fold: 3, scores: ['R^2: 0.15362275387688873', 'MSE: 2015.2718564821905', 'Explained_var: 0.15363187394040556']
fold: 4, scores: ['R^2: 0.1666881377829451', 'MSE: 1681.4047372766602', 'Explained_var: 0.16685891932451902']
fold: 5, scores: ['R^2: 0.13165445919375007', 'MSE: 2775.3881540855577', 'Explained_var: 0.1317393744115012']
fold: 6, scores: ['R^2: 0.1648229122327428', 'MSE: 1965.3995670945249', 'Explained_var: 0.16482696113964534']
fold: 7, scores: ['R^2: 0.10979786670669234', 'MSE: 3036.9645579927105', 'Explained_var: 0.10979806276208481']
fold: 8, scores: ['R^2: 0.08674091645582283', 'MSE: 3597.8680874125375', 'Explained_var: 0.08674669747163721']
fold: 

fold: 8, scores: ['R^2: 0.11032452105724178', 'MSE: 2115.1108425194134', 'Explained_var: 0.11033534334485895']
fold: 9, scores: ['R^2: 0.11367432997571658', 'MSE: 2176.2645307451317', 'Explained_var: 0.11382123415562861']
running model: Ridge
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.11576758093976969', 'MSE: 2028.633107632416', 'Explained_var: 0.11581017094041979']
fold: 1, scores: ['R^2: 0.10520892140409055', 'MSE: 2419.316601731571', 'Explained_var: 0.10528760082614352']
fold: 2, scores: ['R^2: 0.12814098444380484', 'MSE: 1726.1784107468013', 'Explained_var: 0.12824246253730587']
fold: 3, scores: ['R^2: 0.05865642894247425', 'MSE: 4453.25895720372', 'Explained_var: 0.05870581469532399']
fold: 4, scores: ['R^2: 0.08070645355707495', 'MSE: 3131.11914738447', 'Explained_var: 0.08070653889834167']
fold: 5, scores: ['R^2: 0.07872481792826946', 'MSE: 3070.006678945908', 'Explained_var: 0.07874648582043375']
fold: 6, scores: ['R^2: 0

fold: 7, scores: ['R^2: 0.12478395507073903', 'MSE: 2944.3037957668403', 'Explained_var: 0.12481526051801317']
fold: 8, scores: ['R^2: 0.1661631133051482', 'MSE: 2236.264621221293', 'Explained_var: 0.1662103830227165']
fold: 9, scores: ['R^2: 0.11115968089994932', 'MSE: 2998.4000886018744', 'Explained_var: 0.11116459857323291']
running model: Bayesian Ridge
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.10213649665599789', 'MSE: 2372.0257356537895', 'Explained_var: 0.10214002253263321']
fold: 1, scores: ['R^2: 0.10711167698847357', 'MSE: 2126.273288141848', 'Explained_var: 0.10711188329598775']
fold: 2, scores: ['R^2: 0.12241997866475052', 'MSE: 1728.2203524711886', 'Explained_var: 0.12257443938236356']
fold: 3, scores: ['R^2: 0.0895837580975325', 'MSE: 2771.8092819737126', 'Explained_var: 0.08962228922765747']
fold: 4, scores: ['R^2: 0.1254197427601328', 'MSE: 1969.8614356381095', 'Explained_var: 0.12541990272304904']
fold: 5, scores

fold: 4, scores: ['R^2: 0.2041598327483538', 'MSE: 1698.069796649672', 'Explained_var: 0.2041610170481527']
fold: 5, scores: ['R^2: 0.12606641830094667', 'MSE: 2801.105557134048', 'Explained_var: 0.12608707537801156']
fold: 6, scores: ['R^2: 0.22683116665441616', 'MSE: 1617.6985931798192', 'Explained_var: 0.22703386150065896']
fold: 7, scores: ['R^2: 0.20306344400917142', 'MSE: 1759.9864467936302', 'Explained_var: 0.20319290542650603']
fold: 8, scores: ['R^2: 0.1794879511360803', 'MSE: 2071.0346349554598', 'Explained_var: 0.17960050995679566']
fold: 9, scores: ['R^2: 0.12928958150720637', 'MSE: 2880.2388948933026', 'Explained_var: 0.12929306142308272']
running model: RandomForest (n=3)
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.1155237277122112', 'MSE: 3427.1344964576533', 'Explained_var: 0.11552383988515835']
fold: 1, scores: ['R^2: 0.17992422845613398', 'MSE: 1911.6647933326417', 'Explained_var: 0.17994716900234164']
fold: 2, sc

fold: 1, scores: ['R^2: 0.10685419467591839', 'MSE: 2052.829576236726', 'Explained_var: 0.10693039158588069']
fold: 2, scores: ['R^2: 0.05396937381825029', 'MSE: 5134.612797358613', 'Explained_var: 0.053985574677624104']
fold: 3, scores: ['R^2: 0.12120888585078882', 'MSE: 2196.549112584923', 'Explained_var: 0.12194482019600328']
fold: 4, scores: ['R^2: 0.13021501889875364', 'MSE: 1729.5101281581065', 'Explained_var: 0.1302370872651354']
fold: 5, scores: ['R^2: 0.10277715565981793', 'MSE: 2126.0463350175232', 'Explained_var: 0.10277788128479926']
fold: 6, scores: ['R^2: 0.11011029026983099', 'MSE: 1975.3825072689713', 'Explained_var: 0.11011084873149923']
fold: 7, scores: ['R^2: 0.09358834692679796', 'MSE: 2690.599671452405', 'Explained_var: 0.09359314545937591']
fold: 8, scores: ['R^2: 0.09385213333268672', 'MSE: 2459.1209777525073', 'Explained_var: 0.09400802009026399']
fold: 9, scores: ['R^2: 0.1313495967177768', 'MSE: 1685.996333518392', 'Explained_var: 0.131403027312316']
running m

fold: 0, scores: ['R^2: 0.0626242657981887', 'MSE: 3836.111224717796', 'Explained_var: 0.06263128750034153']
fold: 1, scores: ['R^2: 0.10207034763315503', 'MSE: 2527.766643716091', 'Explained_var: 0.1020928651857892']
fold: 2, scores: ['R^2: 0.08033342135736354', 'MSE: 2984.8714397329154', 'Explained_var: 0.08042260770038079']
fold: 3, scores: ['R^2: 0.12390016392680303', 'MSE: 1839.4332681332903', 'Explained_var: 0.12392098144051955']
fold: 4, scores: ['R^2: 0.10441886378677212', 'MSE: 2161.0228549598587', 'Explained_var: 0.10443447834437136']
fold: 5, scores: ['R^2: 0.09947033870701716', 'MSE: 2296.93655836536', 'Explained_var: 0.09978922195784146']
fold: 6, scores: ['R^2: 0.10066687020872811', 'MSE: 2369.8700671321076', 'Explained_var: 0.10077896334767988']
fold: 7, scores: ['R^2: 0.10780003747991851', 'MSE: 2308.9013157789645', 'Explained_var: 0.10781363711742764']
fold: 8, scores: ['R^2: 0.11060667526736134', 'MSE: 1998.652418727574', 'Explained_var: 0.11083980124027515']
fold: 9,

fold: 8, scores: ['R^2: 0.14918377282480966', 'MSE: 2657.0435002587933', 'Explained_var: 0.14918548685079602']
fold: 9, scores: ['R^2: 0.18537256370895683', 'MSE: 1918.875846378557', 'Explained_var: 0.18541016068680838']
running model: RandomForest (n=3)
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.17827788315735338', 'MSE: 2042.9263230976542', 'Explained_var: 0.17836505111910383']
fold: 1, scores: ['R^2: 0.14947180328485454', 'MSE: 2463.6852601570977', 'Explained_var: 0.14951422720253416']
fold: 2, scores: ['R^2: 0.10775247324769377', 'MSE: 3537.037087312589', 'Explained_var: 0.10777779491559325']
fold: 3, scores: ['R^2: 0.16807285397130634', 'MSE: 2060.7513528413497', 'Explained_var: 0.16807294409491425']
fold: 4, scores: ['R^2: 0.13425047572333648', 'MSE: 3056.503568394624', 'Explained_var: 0.13425958366587465']
fold: 5, scores: ['R^2: 0.1932884533237219', 'MSE: 1797.2354019689787', 'Explained_var: 0.19328874128758455']
fold: 6, 

fold: 5, scores: ['R^2: 0.09292558641279836', 'MSE: 3079.139602927368', 'Explained_var: 0.09321974437068548']
fold: 6, scores: ['R^2: 0.12450961705574559', 'MSE: 1870.1385709883752', 'Explained_var: 0.12458627538085487']
fold: 7, scores: ['R^2: 0.14860600101669796', 'MSE: 1827.2935638014587', 'Explained_var: 0.14861628900673007']
fold: 8, scores: ['R^2: 0.10522163115498728', 'MSE: 2282.578062448481', 'Explained_var: 0.1052401163160076']
fold: 9, scores: ['R^2: 0.0756278708118725', 'MSE: 3259.590313332265', 'Explained_var: 0.0756594551854014']
running model: Lasso
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.059663252230690134', 'MSE: 3814.689074664157', 'Explained_var: 0.05969408647017893']
fold: 1, scores: ['R^2: 0.10102860301583061', 'MSE: 1906.8415545157327', 'Explained_var: 0.10122361908430189']
fold: 2, scores: ['R^2: 0.09939992124183317', 'MSE: 1858.3287632786958', 'Explained_var: 0.09964593557003631']
fold: 3, scores: ['R^2: 

fold: 3, scores: ['R^2: 0.10799883638284935', 'MSE: 2570.374923351964', 'Explained_var: 0.1080739945229301']
fold: 4, scores: ['R^2: 0.14028824573002274', 'MSE: 1846.393757925533', 'Explained_var: 0.14030683984399128']
fold: 5, scores: ['R^2: 0.11682059926998423', 'MSE: 2373.8331560317165', 'Explained_var: 0.11688459186708722']
fold: 6, scores: ['R^2: 0.1416875767384227', 'MSE: 1679.7666837243792', 'Explained_var: 0.14209318045117025']
fold: 7, scores: ['R^2: 0.11603995301032399', 'MSE: 2287.5584569494395', 'Explained_var: 0.1161108563886788']
fold: 8, scores: ['R^2: 0.06917723174292678', 'MSE: 3582.001090414927', 'Explained_var: 0.06921316156872637']
fold: 9, scores: ['R^2: 0.09066036179183845', 'MSE: 2726.9369341046126', 'Explained_var: 0.09075875336701855']
running model: ARDRegression
number of cross-validation folds: 10
num counties in validation set: 251
fold: 0, scores: ['R^2: 0.1183617398322302', 'MSE: 2231.9635626375484', 'Explained_var: 0.11836410798994401']
fold: 1, scores: 