## Extra Trees Hyperparameter Tuning

In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.metrics import mean_absolute_error, make_scorer
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PowerTransformer
import optuna

In [3]:
df = pd.read_csv('spot_30k_clean.csv', index_col=0)
df.head()

Unnamed: 0,y,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,key_B,key_C,key_C#,key_D,key_D#,key_E,key_F,key_F#,key_G,key_G#
0,66,0.748,0.916,-2.634,0.0583,0.102,0.0,0.0653,0.518,122.036,...,0,0,0,0,0,0,0,1,0,0
1,67,0.726,0.815,-4.969,0.0373,0.0724,0.00421,0.357,0.693,99.972,...,1,0,0,0,0,0,0,0,0,0
2,70,0.675,0.931,-3.432,0.0742,0.0794,2.3e-05,0.11,0.613,124.008,...,0,0,1,0,0,0,0,0,0,0
3,60,0.718,0.93,-3.778,0.102,0.0287,9e-06,0.204,0.277,121.956,...,0,0,0,0,0,0,0,0,1,0
4,69,0.65,0.833,-4.672,0.0359,0.0803,0.0,0.0833,0.725,123.976,...,0,0,1,0,0,0,0,0,0,0


In [4]:
df_num = df[['duration', 'loudness', 'energy', 'tempo', 'instrumentalness', 
             'speechiness', 'danceability', 'valence', 'acousticness', 'liveness']]

X_train, X_test, y_train, y_test = train_test_split(df_num, df.y, test_size=0.2, random_state=42)

In [9]:
#More robust hyperparameter tuning, without max_features
def objective(trial):

    n_estimators = trial.suggest_int('n_estimators', 100, 2000)
    max_depth = trial.suggest_int('max_depth', 2, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)

    model = ExtraTreesRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42)

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mae = make_scorer(mean_absolute_error)
    scores = cross_val_score(model, X_train, y_train, scoring=mae, cv=kf)
    return np.min([np.mean(scores), np.median([scores])])

In [10]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, show_progress_bar=True)

[I 2024-04-03 18:28:53,786] A new study created in memory with name: no-name-2dcdf1d3-adf3-41ba-a60e-330435d577d0


  0%|          | 0/100 [00:00<?, ?it/s]

[I 2024-04-03 18:30:28,944] Trial 0 finished with value: 18.493383618147874 and parameters: {'n_estimators': 723, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 5}. Best is trial 0 with value: 18.493383618147874.
[I 2024-04-03 18:33:00,408] Trial 1 finished with value: 18.902204932140446 and parameters: {'n_estimators': 1386, 'max_depth': 50, 'min_samples_split': 24, 'min_samples_leaf': 10}. Best is trial 0 with value: 18.493383618147874.
[I 2024-04-03 18:33:53,913] Trial 2 finished with value: 19.181882218429713 and parameters: {'n_estimators': 556, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 18.493383618147874.
[I 2024-04-03 18:35:27,255] Trial 3 finished with value: 18.53995182115078 and parameters: {'n_estimators': 714, 'max_depth': 40, 'min_samples_split': 6, 'min_samples_leaf': 7}. Best is trial 0 with value: 18.493383618147874.
[I 2024-04-03 18:37:54,675] Trial 4 finished with value: 19.4866095704635 and parameters

In [11]:
print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)

Best trial: FrozenTrial(number=83, state=1, values=[16.082216266678287], datetime_start=datetime.datetime(2024, 4, 3, 21, 47, 10, 264030), datetime_complete=datetime.datetime(2024, 4, 3, 22, 25, 36, 642100), params={'n_estimators': 1458, 'max_depth': 46, 'min_samples_split': 3, 'min_samples_leaf': 1}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=2000, log=False, low=100, step=1), 'max_depth': IntDistribution(high=50, log=False, low=2, step=1), 'min_samples_split': IntDistribution(high=32, log=False, low=2, step=1), 'min_samples_leaf': IntDistribution(high=32, log=False, low=1, step=1)}, trial_id=83, value=None)
Best hyperparameters: {'n_estimators': 1458, 'max_depth': 46, 'min_samples_split': 3, 'min_samples_leaf': 1}


In [5]:
et = ExtraTreesRegressor(random_state=42)

param_grid = {'n_estimators': [100, 1458], 'max_depth': [46, None], 'min_samples_split': [2, 3]}

gs = GridSearchCV(estimator=et, param_grid=param_grid, scoring='neg_mean_absolute_error', 
                  cv=5, verbose=3, n_jobs=-1)

gs = gs.fit(X_train, y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [6]:
print(-gs.best_score_)
print(gs.best_estimator_)
print(gs.best_params_)

15.891349867180603
ExtraTreesRegressor(max_depth=46, n_estimators=1458, random_state=42)
{'max_depth': 46, 'min_samples_split': 2, 'n_estimators': 1458}


In [11]:
#Best model
et = ExtraTreesRegressor(n_estimators=1458, max_depth=46, random_state=42, n_jobs=-1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)
mae = make_scorer(mean_absolute_error)
scores = cross_val_score(et, X_train, y_train, scoring=mae, cv=kf, n_jobs=-1)
np.mean(scores)

15.95612030265151

## Feature Scaling

In [12]:
def scale_and_cv(model):
    
    scalers = [StandardScaler(), MinMaxScaler(), RobustScaler(), PowerTransformer()]
    
    for i in range(4):
        scalers[i].fit(X_train)
        X_train_scaled = scalers[i].transform(X_train)
    
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        mae = make_scorer(mean_absolute_error)
        scores = cross_val_score(model, X_train_scaled, y_train, scoring=mae, cv=kf, n_jobs=-1)
        print(scalers[i], np.min([np.mean(scores), np.median([scores])]))

In [13]:
scale_and_cv(et)

StandardScaler() 15.939587547497377
MinMaxScaler() 15.939128305259553
RobustScaler() 15.933172708598457
PowerTransformer() 15.933076582300538
