In [1]:
# Packages
import numpy as np
import sklearn 
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from scipy.integrate import odeint
from scipy.optimize import minimize
from utility_code.utility import utils

In [2]:
trainrd2 = pd.read_csv("train_round2.csv")
datestofind = trainrd2['Date'] == '09-01-2020'
start = datestofind[datestofind == True].index[0]
datestofind2 = trainrd2['Date'] == '09-26-2020'
end = datestofind[datestofind == True].index[49]

bestsub = pd.read_csv("team31-nov29-2.csv")

validation = trainrd2.iloc[start:end,:]
def MAPE(pred, valid):
    pred = pred.reset_index()
    valid = valid.reset_index()
    pred = pred.astype('int64')
    valid = valid.astype('int64')
    v = pred.subtract(valid)
    v = v.divide(valid)
    v = v.abs()
    v = v.sum(axis = 0)
    #v = v[0]+v[1]+v[2]
    n = len(pred)
    return v/n

In [3]:
#import csvs
train = pd.read_csv("ucla2020-cs145-covid19-prediction/train.csv")
test = pd.read_csv("ucla2020-cs145-covid19-prediction/test.csv")
graph = pd.read_csv("ucla2020-cs145-covid19-prediction/graph.csv")
supp = pd.read_csv("data-test/raw_data_test.csv", skiprows=2, thousands=',')
supp = supp[supp['Location'].isin(train['Province_State'])]
supp['Population'] = supp['Number of COVID-19 Cases'].divide(supp['COVID-19 Cases per 1,000,000 Population']) * 1e6

states = pd.Series.unique(train['Province_State'])
num_states = len(states)

In [4]:
#features to try
print(train.columns)

Index(['ID', 'Province_State', 'Date', 'Confirmed', 'Deaths', 'Recovered',
       'Active', 'Incident_Rate', 'People_Tested', 'People_Hospitalized',
       'Mortality_Rate', 'Testing_Rate', 'Hospitalization_Rate'],
      dtype='object')


In [None]:
## MANIPULATES MODEL AND MODEL PARAMS
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
import random

import hyperparam_scan

#TRY DIFFERENT VALUES FOR PARAMETERS AND KEEP THE BEST

DEFAULT_MIN = 1e-25
DEFAULT_MAX = 999999

feature_list = ['Deaths','Mortality_Rate']
gaussian_parameters = {'alpha' : {'sd':0.2 , 'mn':DEFAULT_MIN , 'mx':DEFAULT_MAX,'normal':True, 'rnd':False},
                       'max_iter' : {'sd':100 , 'mn':1 , 'mx':DEFAULT_MAX, 'normal':True, 'rnd':True},
                       'tol' : {'sd':5e-4 , 'mn':DEFAULT_MIN , 'mx':DEFAULT_MAX, 'normal':True, 'rnd':False},
                       'random_state' : {'sd':0 , 'mn':0 , 'mx':DEFAULT_MAX, 'normal':False, 'rnd':True},
                       'window_size' : {'sd':0 , 'mn':1 , 'mx':14, 'normal':False, 'rnd':True,'ignore':True}
                      }
sd_scale = 200
initial_parameters = { 'alpha': 2.9347,
                         'max_iter': 2612,
                         'tol': 0.02269,
                         'random_state':595350,
                         'window_size':5 }
for f in feature_list:
    initial_parameters[f+'_error']=1
estimator = Ridge
hps = hyperparam_scan.hyperparam_scan(feature_list,gaussian_parameters,sd_scale,initial_parameters,estimator)


ITERATIONS = 500
parameters = {}

print("Starting using previous best setup...")

for it in range(ITERATIONS+1):
    
    reg , parameters = hps.get_reg()
        
###################################################

    ## MANIPULATES FEATURES FROM DF TO USE AND WINDOW SIZE
    #only look at the features in features list
    features = feature_list
    num_features = len(features)

    #stratify by state (into state dictionary)
    statesdata = {}
    for s in states:
        statesdata[s] = train.loc[train['Province_State'] == s,features]

    
    ###################################################
    WINDOW_SIZE = parameters['window_size']
    ###################################################
    
    state_feature_indices = utils.get_column_indices(statesdata['California'],features)

    #append the feature spaces from the W days prior (where W is the window length)
    new_features = []
    for day in range(WINDOW_SIZE):
        for f in features:
            new_features.append(f + "(-"+ str(WINDOW_SIZE-day) + " days)")
    all_new_features = new_features + features

    ## Set up dictionary of projections
    proj = {}

    ## Loop over states
    for s in states:

        a = statesdata[s]

        #fill the knn data using days from training set
        knndata = pd.DataFrame(columns = all_new_features)
        num_training_days = len(statesdata['California'])

        #fill the table
        for d in range(WINDOW_SIZE,num_training_days):
            knndata_row_index = knndata.shape[0]
            knn_row = utils.flatten_dataframe(a,slice(d-WINDOW_SIZE,d+1), state_feature_indices)
            utils.dataframe_append_row(knndata,knn_row,s,d)   

        # Actual recursive prediction
        days_to_predict = 26
        for d in range(days_to_predict):
            #x = knndata.drop(columns=features)
            #y = knndata.drop(columns=features)
            x = knndata.drop(feature_list, axis = 1)
            y = knndata[feature_list]
            toguess = 1
            trainx = x.head(len(x))
            trainy = y.head(len(y))
            # testy = y.tail(toguess)

            reg.fit(trainx, trainy)

            #rmv = [i for i in range(num_features)]
            #ftrs = knndata.drop(columns=knndata.columns[rmv]).tail(1)
            ftrs = knndata.drop(columns=knndata.columns[[0,1]]).tail(1)
            #ftrs.drop(columns=knndata.columns[[0,1]])

            ftrs.columns = knndata.columns[0:num_features*WINDOW_SIZE]

            new = reg.predict(ftrs)
            ftrs = np.append(ftrs, new)
            #print(np.amax(ftrs))
            ftrs = ftrs.astype('int64')
            #if d==0: print(ftrs)
            knndata = knndata.append(dict(zip(knndata.columns, ftrs)), ignore_index=True)

            # append to knndata
            #if d == 0: 
                #print(knndata)
        done = knndata.tail(days_to_predict)
        done = done[feature_list]
        #print(done)
        proj[s] = done

    ## Get ordering of states in test    
    order = test.loc[0:49,'Province_State']

    # format submission
    list_dict = {f:[] for f in feature_list}
    
    fid = 0
    for i in range(days_to_predict):
        for j in order:
            projection = proj[j].iloc[i]
            #print(j, 'day', i)
            for f in feature_list:
                list_dict[f].append(int(projection[f]))
            #print(fid)
            fid+=1

    for f in feature_list:
        test[f] = list_dict[f]

###################################################
    hps.evaluate_test(parameters,test,validation)
        
best_parameters = hps.best_parameters
print("Found",len(best_parameters) - 1,"good solutions!")
###################################################

Starting using previous best setup...
(*) SETUP:[('Deaths_error', 0.0002075651001322734), ('Mortality_Rate_error', 0.0)]
(*) ITERATION 1: [('Deaths_error', 0.00018527001433175545)]
(-) ITERATION 2: []
(-) ITERATION 3: []
(-) ITERATION 4: []
(-) ITERATION 5: []
(-) ITERATION 6: []
(-) ITERATION 7: []
(-) ITERATION 8: []
(-) ITERATION 9: []
(-) ITERATION 10: []
(-) ITERATION 11: []
(-) ITERATION 12: []
(-) ITERATION 13: []
(-) ITERATION 14: []
(-) ITERATION 15: []
(*) ITERATION 16: [('Deaths_error', 0.00017521004333507492)]
(-) ITERATION 17: []
(-) ITERATION 18: []
(-) ITERATION 19: []
(-) ITERATION 20: []
(-) ITERATION 21: []
(-) ITERATION 22: []
(-) ITERATION 23: []
(-) ITERATION 24: []
(-) ITERATION 25: []
(-) ITERATION 26: []
(-) ITERATION 27: []
(-) ITERATION 28: []
(-) ITERATION 29: []
(-) ITERATION 30: []
(-) ITERATION 31: []
(-) ITERATION 32: []
(-) ITERATION 33: []
(-) ITERATION 34: []
(-) ITERATION 35: []
(-) ITERATION 36: []
(-) ITERATION 37: []
(-) ITERATION 38: []
(-) ITERAT

  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 51: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 52: []
(-) ITERATION 53: []
(-) ITERATION 54: []
(-) ITERATION 55: []
(-) ITERATION 56: []
(-) ITERATION 57: []
(-) ITERATION 58: []
(-) ITERATION 59: []
(-) ITERATION 60: []
(-) ITERATION 61: []
(-) ITERATION 62: []
(-) ITERATION 63: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 64: []
(-) ITERATION 65: []
(-) ITERATION 66: []
(-) ITERATION 67: []
(-) ITERATION 68: []
(-) ITERATION 69: []
(-) ITERATION 70: []
(-) ITERATION 71: []
(-) ITERATION 72: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 73: []
(-) ITERATION 74: []
(-) ITERATION 75: []
(-) ITERATION 76: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 77: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 78: []
(-) ITERATION 79: []
(-) ITERATION 80: []
(-) ITERATION 81: []
(-) ITERATION 82: []
(-) ITERATION 83: []
(-) ITERATION 84: []
(-) ITERATION 85: []
(-) ITERATION 86: []
(-) ITERATION 87: []
(-) ITERATION 88: []
(-) ITERATION 89: []
(-) ITERATION 90: []
(-) ITERATION 91: []
(-) ITERATION 92: []
(-) ITERATION 93: []
(-) ITERATION 94: []
(-) ITERATION 95: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 96: []
(-) ITERATION 97: []
(-) ITERATION 98: []
(-) ITERATION 99: []
(-) ITERATION 100: []
(-) ITERATION 101: []
(-) ITERATION 102: []
(-) ITERATION 103: []
(-) ITERATION 104: []
(-) ITERATION 105: []
(-) ITERATION 106: []
(-) ITERATION 107: []
(-) ITERATION 108: []
(-) ITERATION 109: []
(-) ITERATION 110: []
(-) ITERATION 111: []
(-) ITERATION 112: []
(-) ITERATION 113: []
(-) ITERATION 114: []
(-) ITERATION 115: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 116: []
(-) ITERATION 117: []
(-) ITERATION 118: []
(-) ITERATION 119: []
(-) ITERATION 120: []
(-) ITERATION 121: []
(-) ITERATION 122: []
(-) ITERATION 123: []
(-) ITERATION 124: []
(-) ITERATION 125: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 126: []
(-) ITERATION 127: []
(-) ITERATION 128: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 129: []
(-) ITERATION 130: []
(-) ITERATION 131: []
(-) ITERATION 132: []
(-) ITERATION 133: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 134: []
(-) ITERATION 135: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 136: []
(-) ITERATION 137: []
(-) ITERATION 138: []
(-) ITERATION 139: []
(-) ITERATION 140: []
(-) ITERATION 141: []
(-) ITERATION 142: []
(-) ITERATION 143: []
(-) ITERATION 144: []
(-) ITERATION 145: []
(-) ITERATION 146: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 147: []
(-) ITERATION 148: []
(-) ITERATION 149: []
(-) ITERATION 150: []
(-) ITERATION 151: []
(-) ITERATION 152: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 153: []
(-) ITERATION 154: []
(-) ITERATION 155: []
(-) ITERATION 156: []
(-) ITERATION 157: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 158: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 159: []
(-) ITERATION 160: []
(-) ITERATION 161: []
(-) ITERATION 162: []
(-) ITERATION 163: []
(-) ITERATION 164: []
(-) ITERATION 165: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 166: []
(-) ITERATION 167: []
(-) ITERATION 168: []
(-) ITERATION 169: []
(-) ITERATION 170: []


  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,
  return linalg.solve(A, Xy, sym_pos=True,


(-) ITERATION 171: []
(-) ITERATION 172: []
(-) ITERATION 173: []
(-) ITERATION 174: []
(-) ITERATION 175: []


In [None]:
hps.print_best()

In [None]:
print(MAPE(bestsub[feature_list], validation[feature_list]))
print("Error of current run")
print(MAPE(test[feature_list], validation[feature_list]))