In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import RegressorChain
from sktime.transformations.panel.rocket import Rocket

In [3]:
from variable_assignation import *
from load_functions import *

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet, LassoLars
from sklearn.preprocessing import StandardScaler

# Variables

In [51]:
target_variant = 'delta'
num_kernels = 10000

# Load data

In [52]:
df_data_death, df_data_prop = {}, {}
# for variant in ['alpha', 'gamma', 'kappa', 'delta']:
for variant in ['delta']:
    input_files_folder = f'../input_files/{variant}'
    df_data_death[variant] = load_obj(f'{input_files_folder}/df_prop_deaths_{variant}')
    df_data_prop[variant] = load_obj(f'{input_files_folder}/df_prop_{variant}')

all_variables = []
for var_list in variables_vars.values():
    all_variables += var_list

In [53]:
## ~0.7 variantgamma_transmissibility + variantgamma_imports_factor + variantgamma_cross_protection_prob
## ~0.5 variable_alpha + variantgamma_transmissibility_factor - GradientBoostingRegressor
## ~0.6 variable_alpha + variantgamma_severity_factor - GradientBoostingRegressor

In [54]:
df_data = df_data_prop[target_variant]

In [55]:
n_train = 1900
X_train = df_data[:n_train][['dim_0']]
y_train = np.array(df_data[:n_train]['y'].tolist())

X_test = df_data[n_train:][['dim_0']]
y_test = np.array(df_data[n_train:]['y'].tolist())

In [56]:
rocket = Rocket(num_kernels=num_kernels, n_jobs=-1)
rocket.fit(X_train, y_train)

Rocket(n_jobs=-1)

In [57]:
X_train_transform = rocket.transform(X_train, y_train)
X_test_transform = rocket.transform(X_test, y_test)

# ExtraTreesRegressor

In [63]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.multioutput import MultiOutputRegressor

In [68]:
%%time
# Define the ML model with MultiOutputRegressor and ExtraTreesRegressor
model = MultiOutputRegressor(RandomForestRegressor(random_state=0), n_jobs=-1)

model.fit(X_train_transform, y_train)

# Predict on the test set using the best model
y_pred = model.predict(X_test_transform)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.2815653256961773
CPU times: user 344 ms, sys: 568 ms, total: 912 ms
Wall time: 33min 37s


In [69]:
print('Score: ',model.score(X_test_transform, y_test))
print('####################')
y_pred = model.predict(X_test_transform)
for n in range(20):
    print('Pred:  ',y_pred[n])
    print('Target:',y_test[n])
    print('####################')

Score:  0.45237648000150477
####################
Pred:   [0.58817718 1.49913843 5.82011597 0.82890457 1.19574219]
Target: [0.6370954  1.63137207 4.87194824 0.78807069 1.46350098]
####################
Pred:   [0.57225146 1.45437842 9.31377808 0.82049422 1.28113525]
Target: [0.42230536 1.08137207 9.62194824 0.75797397 1.21350098]
####################
Pred:   [0.50414582 1.28542798 3.82031494 0.9498308  1.24553711]
Target: [0.52970038 1.35637207 2.49694824 0.94215048 1.33850098]
####################
Pred:   [0.70582507 1.79058472 6.52554199 0.78719669 1.28973877]
Target: [0.74449042 1.90637207 7.24694824 0.87754645 1.08850098]
####################
Pred:   [0.64188518 1.64499316 6.25102539 0.87900405 1.23856079]
Target: [0.79818793 2.04387207 8.43444824 0.87972291 1.40100098]
####################
Pred:   [0.66607947 1.70150513 2.63279175 0.95636002 1.25497559]
Target: [0.58339789 1.49387207 3.68444824 0.94983387 1.15100098]
####################
Pred:   [0.46201635 1.18203442 5.89992432 0.9

In [65]:
%%time
# Define the ML model with MultiOutputRegressor and ExtraTreesRegressor
model = MultiOutputRegressor(ExtraTreesRegressor(random_state=0), n_jobs=-1)

# Define the hyperparameters grid for tuning
param_grid = {
    "estimator__n_estimators": [300],
    "estimator__max_depth": [15],
}

# Perform grid search for hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, n_jobs=-1)
grid_search.fit(X_train_transform, y_train)

# Get the best model and its hyperparameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Predict on the test set using the best model
y_pred = best_model.predict(X_test_transform)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Best Hyperparameters:", best_params)

KeyboardInterrupt: 

In [66]:
print('Score: ',best_model.score(X_test_transform, y_test))
print('####################')
y_pred = best_model.predict(X_test_transform)
for n in range(20):
    print('Pred:  ',y_pred[n])
    print('Target:',y_test[n])
    print('####################')

Score:  0.38799216256813496
####################
Pred:   [0.63551303 1.62452057 5.18472913 0.91506165 1.14763224]
Target: [0.6370954  1.63137207 4.87194824 0.78807069 1.46350098]
####################
Pred:   [0.52098488 1.334238   9.68792463 0.85784709 1.22603772]
Target: [0.42230536 1.08137207 9.62194824 0.75797397 1.21350098]
####################
Pred:   [0.56114678 1.4411608  2.64117828 0.91599778 1.26608042]
Target: [0.52970038 1.35637207 2.49694824 0.94215048 1.33850098]
####################
Pred:   [0.69268629 1.77424942 6.52689878 0.7194257  1.28423508]
Target: [0.74449042 1.90637207 7.24694824 0.87754645 1.08850098]
####################
Pred:   [0.63669389 1.63063589 6.99225436 0.84342272 1.27297002]
Target: [0.79818793 2.04387207 8.43444824 0.87972291 1.40100098]
####################
Pred:   [0.65375689 1.67372512 3.99266853 0.95113289 1.26324566]
Target: [0.58339789 1.49387207 3.68444824 0.94983387 1.15100098]
####################
Pred:   [0.45887914 1.17427996 6.13316159 0.9

# Regressor chain

In [47]:
# from sklearn import linear_model
# reg = linear_model.LassoLars(alpha=0.000001, normalize=True)

# classifier = RegressorChain(reg)
# classifier.fit(X_train_transform, y_train)

# # ~0.6 - 3 parameters

# # from sklearn import linear_model
# # reg = linear_model.LassoLarsIC(criterion='bic', normalize=True)

# # from sklearn.linear_model import LassoLarsCV
# # reg = LassoLarsCV(cv=3, normalize=True)

# # from sklearn.linear_model import ElasticNet
# # reg = ElasticNet(alpha=0.1, l1_ratio=0.1, normalize=False, random_state=0)

# # from sklearn import linear_model
# # reg = linear_model.TweedieRegressor(alpha=0.1, max_iter=1500, link='identity')
# # # 0.37 - 4 parameters alpha=0.1, max_iter=1500, link='identity'

# # from sklearn.linear_model import RANSACRegressor
# # reg = RANSACRegressor(random_state=0, min_samples=0.99)

# # scaler = preprocessing.StandardScaler().fit(X_train_twe)
# # X_train_scaled = scaler.transform(X_train_twe)

In [61]:
# Standardize the transformed features
scaler = StandardScaler()
X_train_transform = scaler.fit_transform(X_train_transform)
X_test_transform = scaler.transform(X_test_transform)

# Create the ElasticNet regressor
regressor = LassoLars(normalize=False)

# Define the parameter grid for grid search
param_grid = {
    'alpha': [0.0001, 0.001, 0.01],
}

# Perform grid search
grid_search = GridSearchCV(regressor, param_grid)
grid_search.fit(X_train_transform, y_train)

# Print the best hyperparameters and the corresponding score
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

TypeError: __init__() got an unexpected keyword argument 'cv'

In [59]:
# Get the best model and its hyperparameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

In [62]:
# Predict on the test set using the best model
print('Score: ',best_model.score(X_test_transform, y_test))

y_pred = best_model.predict(X_test_transform)
for n in range(10):
    print('Pred:  ',y_pred[n])
    print('Target:',y_test[n])
    print('####################')

Score:  0.38799216256813496
Pred:   [0.63551303 1.62452057 5.18472913 0.91506165 1.14763224]
Target: [0.6370954  1.63137207 4.87194824 0.78807069 1.46350098]
####################
Pred:   [0.52098488 1.334238   9.68792463 0.85784709 1.22603772]
Target: [0.42230536 1.08137207 9.62194824 0.75797397 1.21350098]
####################
Pred:   [0.56114678 1.4411608  2.64117828 0.91599778 1.26608042]
Target: [0.52970038 1.35637207 2.49694824 0.94215048 1.33850098]
####################
Pred:   [0.69268629 1.77424942 6.52689878 0.7194257  1.28423508]
Target: [0.74449042 1.90637207 7.24694824 0.87754645 1.08850098]
####################
Pred:   [0.63669389 1.63063589 6.99225436 0.84342272 1.27297002]
Target: [0.79818793 2.04387207 8.43444824 0.87972291 1.40100098]
####################
Pred:   [0.65375689 1.67372512 3.99266853 0.95113289 1.26324566]
Target: [0.58339789 1.49387207 3.68444824 0.94983387 1.15100098]
####################
Pred:   [0.45887914 1.17427996 6.13316159 0.9191061  1.25364973]
T