In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from train import ShallowModel
from scipy.stats import uniform, randint, loguniform

type = "pxp"
data = "LOOE" # Apply for Leave-one-out encoding

ShallowTrainer = ShallowModel(type, data)

Train Shape: (141635, 7) | Val Shape: (35409, 7) | Test Shape: (44262, 7)


## Parameter gird for shallow models

In [2]:
# KNN Hyperparameter tuning
knn_params = {
    'n_neighbors': range(1, 50, 2),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Ridge Hyperparameter tuning
ridge_params = {
    'alpha': [0, 0.001, 0.01, 0.1, 1, 10, 100]
}

# Decision Tree Hyperparameter tuning
dt_params = {
    'max_depth': range(1, 50, 5),
    'min_samples_leaf': range(1, 4),
    "min_samples_split": range(2, 10, 2),
    'max_features': ['sqrt', 'log2']
}

# Random Forest Hyperparameter tuning
rf_params = {
    'n_estimators': range(50, 200, 50),
    'max_depth': range(1, 50, 5),
    'min_samples_leaf': range(1, 4),
    "min_samples_split": range(2, 10, 2),
    'max_features': ['sqrt', 'log2']
}

# DBDT Hyperparameter tuning
dbdt_params = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': range(50, 200, 50),
    'max_depth': range(1, 50, 5),
    'min_samples_leaf': range(1, 4),
    "min_samples_split": range(2, 10, 2),
    'max_features': ['sqrt', 'log2']
}

In [3]:
# Tune KNN
ShallowTrainer.tune("KNN", knn_params, 30)
ShallowTrainer.models["KNN"]

Training KNN with {'n_neighbors': 13, 'weights': 'distance', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 45, 'weights': 'uniform', 'metric': 'euclidean'}...
Training KNN with {'n_neighbors': 5, 'weights': 'uniform', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 27, 'weights': 'uniform', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 11, 'weights': 'uniform', 'metric': 'euclidean'}...
Training KNN with {'n_neighbors': 49, 'weights': 'uniform', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 29, 'weights': 'distance', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 19, 'weights': 'uniform', 'metric': 'euclidean'}...
Training KNN with {'n_neighbors': 15, 'weights': 'uniform', 'metric': 'euclidean'}...
Training KNN with {'n_neighbors': 43, 'weights': 'uniform', 'metric': 'euclidean'}...
Training KNN with {'n_neighbors': 21, 'weights': 'distance', 'metric': 'manhattan'}...
Training KNN with {'n_neighbors': 31, 'weights': 'un

In [4]:
# Tune Ridge
ShallowTrainer.tune("Ridge", ridge_params, 30)
ShallowTrainer.models["Ridge"]

Training Ridge with {'alpha': 100.0}...
Training Ridge with {'alpha': 0.001}...
Training Ridge with {'alpha': 0.1}...
Training Ridge with {'alpha': 0.0}...
Training Ridge with {'alpha': 10.0}...
Training Ridge with {'alpha': 0.01}...
Training Ridge with {'alpha': 1.0}...
Best parameters: {'alpha': 100.0}, MSE: 2.525546562428131


In [5]:
# Tune Decision Tree
ShallowTrainer.tune("DT", dt_params, 30)
ShallowTrainer.models["DT"]

Training DT with {'max_depth': 21, 'min_samples_leaf': 2, 'min_samples_split': 8, 'max_features': 'log2'}...
Training DT with {'max_depth': 46, 'min_samples_leaf': 2, 'min_samples_split': 4, 'max_features': 'log2'}...
Training DT with {'max_depth': 26, 'min_samples_leaf': 1, 'min_samples_split': 6, 'max_features': 'sqrt'}...
Training DT with {'max_depth': 26, 'min_samples_leaf': 3, 'min_samples_split': 8, 'max_features': 'sqrt'}...
Training DT with {'max_depth': 1, 'min_samples_leaf': 3, 'min_samples_split': 6, 'max_features': 'sqrt'}...
Training DT with {'max_depth': 11, 'min_samples_leaf': 1, 'min_samples_split': 6, 'max_features': 'log2'}...
Training DT with {'max_depth': 6, 'min_samples_leaf': 1, 'min_samples_split': 8, 'max_features': 'sqrt'}...
Training DT with {'max_depth': 31, 'min_samples_leaf': 1, 'min_samples_split': 4, 'max_features': 'log2'}...
Training DT with {'max_depth': 11, 'min_samples_leaf': 1, 'min_samples_split': 2, 'max_features': 'log2'}...
Training DT with {'ma

In [6]:
# Tune Random Forest
ShallowTrainer.tune("RF", rf_params, 30)
ShallowTrainer.models["RF"]

Training RF with {'n_estimators': 100, 'max_depth': 26, 'min_samples_leaf': 1, 'min_samples_split': 2, 'max_features': 'log2'}...
Training RF with {'n_estimators': 50, 'max_depth': 1, 'min_samples_leaf': 2, 'min_samples_split': 2, 'max_features': 'sqrt'}...
Training RF with {'n_estimators': 50, 'max_depth': 41, 'min_samples_leaf': 1, 'min_samples_split': 4, 'max_features': 'log2'}...
Training RF with {'n_estimators': 150, 'max_depth': 21, 'min_samples_leaf': 2, 'min_samples_split': 4, 'max_features': 'sqrt'}...
Training RF with {'n_estimators': 150, 'max_depth': 26, 'min_samples_leaf': 1, 'min_samples_split': 2, 'max_features': 'log2'}...
Training RF with {'n_estimators': 150, 'max_depth': 36, 'min_samples_leaf': 2, 'min_samples_split': 2, 'max_features': 'sqrt'}...
Training RF with {'n_estimators': 50, 'max_depth': 1, 'min_samples_leaf': 2, 'min_samples_split': 2, 'max_features': 'log2'}...
Training RF with {'n_estimators': 150, 'max_depth': 16, 'min_samples_leaf': 1, 'min_samples_spl

In [7]:
# Tune DBDT
ShallowTrainer.tune("GBDT", dbdt_params, 30)
ShallowTrainer.models["GBDT"]

Training GBDT with {'learning_rate': 0.01, 'n_estimators': 50, 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 8, 'max_features': 'sqrt'}...
Training GBDT with {'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 36, 'min_samples_leaf': 1, 'min_samples_split': 4, 'max_features': 'log2'}...
Training GBDT with {'learning_rate': 1.0, 'n_estimators': 50, 'max_depth': 31, 'min_samples_leaf': 2, 'min_samples_split': 6, 'max_features': 'log2'}...
Training GBDT with {'learning_rate': 0.01, 'n_estimators': 50, 'max_depth': 1, 'min_samples_leaf': 2, 'min_samples_split': 4, 'max_features': 'log2'}...
Training GBDT with {'learning_rate': 1.0, 'n_estimators': 150, 'max_depth': 1, 'min_samples_leaf': 3, 'min_samples_split': 2, 'max_features': 'sqrt'}...
Training GBDT with {'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 11, 'min_samples_leaf': 1, 'min_samples_split': 6, 'max_features': 'log2'}...
Training GBDT with {'learning_rate': 1.0, 'n_estimators': 100, 'max_depth': 26, 

In [8]:
ShallowTrainer.train("test", iter = 10)

Training KNN in test mode for 10 times...
Training Ridge in test mode for 10 times...
Training DT in test mode for 10 times...
Training RF in test mode for 10 times...
Training GBDT in test mode for 10 times...


Unnamed: 0,MSE mean,MSE std,Time mean (s),Time std (s)
KNN,1.151529,0.017826,0.025692,0.000666
Ridge,2.514394,0.000197,0.003847,0.002202
DT,0.847804,0.070424,0.253882,0.018717
RF,0.302121,0.004075,16.510942,0.060273
GBDT,0.219009,0.007187,32.847151,0.388366
