In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import RegressorChain
from sktime.transformations.panel.rocket import Rocket

In [8]:
from variable_assignation import *
from load_functions import *

# Variables

In [25]:
target_variant = 'delta'
num_kernels = 50000

# Load data

In [14]:
df_data_death, df_data_prop = {}, {}
for variant in ['alpha', 'gamma', 'kappa', 'delta']:
    input_files_folder = f'../input_files/{variant}'
    df_data_death[variant] = load_obj(f'{input_files_folder}/df_prop_deaths_{variant}')
    df_data_prop[variant] = load_obj(f'{input_files_folder}/df_prop_{variant}')

all_variables = []
for var_list in variables_vars.values():
    all_variables += var_list

In [19]:
all_variables

['variantalpha_transmissibility',
 'variantalpha_transmissibility_factor',
 'variantalpha_imports_factor',
 'variantalpha_cross_protection_prob',
 'variantgamma_transmissibility',
 'variantgamma_transmissibility_factor',
 'variantgamma_imports_factor',
 'variantgamma_cross_protection_prob',
 'variantgamma_severity_factor',
 'variantkappa_transmissibility',
 'variantkappa_transmissibility_factor',
 'variantkappa_cross_protection_prob',
 'variantkappa_introduction_day',
 'variantkappa_severity_factor',
 'variantdelta_transmissibility',
 'variantdelta_transmissibility_factor',
 'variantdelta_imports_factor',
 'variantdelta_cross_protection_prob',
 'variantdelta_severity_factor']

In [20]:
np.where(np.isin(all_variables, ['variantdelta_cross_protection_prob', 'variantdelta_severity_factor']))[0]

array([17, 18])

In [3]:
## ~0.7 variantgamma_transmissibility + variantgamma_imports_factor + variantgamma_cross_protection_prob
## ~0.5 variable_alpha + variantgamma_transmissibility_factor - GradientBoostingRegressor
## ~0.6 variable_alpha + variantgamma_severity_factor - GradientBoostingRegressor

In [22]:
df_data = df_data_prop[target_variant]

In [23]:
n_train = 1900
X_train = df_data[:n_train][['dim_0']]
y_train = np.array(df_data[:n_train]['y'].tolist())

X_test = df_data[n_train:][['dim_0']]
y_test = np.array(df_data[n_train:]['y'].tolist())

In [26]:
rocket = Rocket(num_kernels=num_kernels, n_jobs=-1, normalise=True)
rocket.fit(X_train, y_train)

In [27]:
X_train_transform = rocket.transform(X_train, y_train)
X_test_transform = rocket.transform(X_test, y_test)

# ExtraTreesRegressor

In [32]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.multioutput import MultiOutputRegressor

In [34]:
# Define the ML model with MultiOutputRegressor and ExtraTreesRegressor
model = MultiOutputRegressor(ExtraTreesRegressor(random_state=0), n_jobs=-1)

# Define the hyperparameters grid for tuning
param_grid = {
    "estimator__n_estimators": [50, 100, 200],
    "estimator__max_depth": [None, 5, 10],
}

# Perform grid search for hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_transform, y_train)

# Get the best model and its hyperparameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Predict on the test set using the best model
y_pred = best_model.predict(X_test_transform)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Best Hyperparameters:", best_params)

KeyboardInterrupt: 

In [None]:
print('Score: ',best_model.score(X_test_transform, y_test))
print('####################')
y_pred = best_model.predict(X_test_transform)
for n in range(20):
    print('Pred:  ',y_pred[n])
    print('Target:',y_test[n])
    print('####################')

Score:  0.5905781685054887
####################
Pred:   [0.63857121 1.50929053 3.03167114 0.61350647 1.71301636]
Target: [0.74283918 1.6411377  2.77062988 0.65468506 2.14733887]
####################
Pred:   [0.85367839 1.91837158 5.09622803 0.66458069 1.84740479]
Target: [0.88994612 1.9661377  5.14562988 0.71718506 1.77233887]
####################
Pred:   [0.61723727 1.36557324 5.83043701 0.67650818 1.73389771]
Target: [0.66928571 1.4786377  6.33312988 0.68593506 1.20983887]
####################
Pred:   [0.80651918 1.77498437 8.90007813 0.70185547 1.72097412]
Target: [0.81639265 1.8036377  8.70812988 0.62343506 1.58483887]
####################
Pred:   [0.85637038 1.86368652 6.4511377  0.69439758 1.62377808]
Target: [0.92672285 2.0473877  5.73937988 0.76406006 1.67858887]
####################
Pred:   [0.63604835 1.4153833  5.46242798 0.66777466 1.95791626]
Target: [0.50379041 1.1130127  9.00500488 0.58437256 1.81921387]
####################
Pred:   [0.64304671 1.53025684 1.81230347 0.65

# Regressor chain

In [250]:
from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.000001, normalize=True)
# ~0.6 - 3 parameters

# from sklearn import linear_model
# reg = linear_model.LassoLarsIC(criterion='bic', normalize=True)

# from sklearn.linear_model import LassoLarsCV
# reg = LassoLarsCV(cv=3, normalize=True)

# from sklearn.linear_model import ElasticNet
# reg = ElasticNet(alpha=0.1, l1_ratio=0.1, normalize=False, random_state=0)

# from sklearn import linear_model
# reg = linear_model.TweedieRegressor(alpha=0.1, max_iter=1500, link='identity')
# # 0.37 - 4 parameters alpha=0.1, max_iter=1500, link='identity'

# from sklearn.linear_model import RANSACRegressor
# reg = RANSACRegressor(random_state=0, min_samples=0.99)

# scaler = preprocessing.StandardScaler().fit(X_train_twe)
# X_train_scaled = scaler.transform(X_train_twe)

In [251]:
classifier = RegressorChain(reg, order=[i for i in range(len(target_variables))])
classifier.fit(X_train_transform, y_train)

RegressorChain(base_estimator=LassoLars(alpha=1e-06, normalize=True),
               order=[0, 1, 2, 3])

In [252]:
save_obj(classifier, 'classifier_'+variant)

In [253]:
print('Score: ',classifier.score(X_test_transform, y_test))
print('####################')
y_pred = classifier.predict(X_test_transform)
for n in range(20):
    print('Pred:  ',y_pred[n])
    print('Target:',y_test[n])
    print('####################')

Score:  0.5178806677380583
####################
Pred:   [0.58693722 1.25781369 5.86610679 0.95      ]
Target: [0.51880832 1.14619141 5.76721191 0.95      ]
####################
Pred:   [0.57679875 1.36200497 6.4843561  0.95      ]
Target: [0.65459933 1.44619141 8.14221191 0.95      ]
####################
Pred:   [0.5955695  1.37695718 2.4861957  0.95      ]
Target: [0.6319675  1.39619141 2.20471191 0.95      ]
####################
Pred:   [0.58231059 1.28185269 5.15751188 0.95      ]
Target: [0.62630954 1.38369141 4.87658691 0.95      ]
####################
Pred:   [0.52882933 1.20588755 2.81344491 0.95      ]
Target: [0.58104587 1.28369141 2.50158691 0.95      ]
####################
Pred:   [0.5384082  1.24869962 7.76051062 0.95      ]
Target: [0.51315036 1.13369141 8.43908691 0.95      ]
####################
Pred:   [0.54667477 1.26729816 2.03836521 0.95      ]
Target: [0.52446628 1.15869141 1.90783691 0.95      ]
####################
Pred:   [0.52919498 1.16797619 0.86351488 0.95   