In [24]:
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

seed = 42

In [37]:
BP_train = pd.read_pickle("dataset/BP_train_solution_modified.pkl")
BP_test = pd.read_pickle("dataset/BP_test_solution.pkl")
SC_train = pd.read_pickle("dataset/SC_train_solution_modified.pkl")
SC_test = pd.read_pickle("dataset/SC_test_solution.pkl")

BP_train = BP_train.sample(frac=1, random_state=seed)
BP_test = BP_test.sample(frac=1, random_state=seed)
SC_train = SC_train.sample(frac=1, random_state=seed)
SC_test = SC_test.sample(frac=1, random_state=seed)

train = pd.concat([BP_train, SC_train], ignore_index=True)
test = pd.concat([BP_test, SC_test], ignore_index=True)

X_train = train.drop(columns=['score'])
Y_train = train['score']

X_test = test.drop(columns=['score'])
Y_test = test['score']

In [None]:
params = {
    "n_estimators": [10, 20, 50, 100],
    "min_samples_leaf": [5, 10, 20, 30],
    "max_depth": [5, 10, 20],
}

et = ExtraTreesRegressor(random_state=seed)

gs = GridSearchCV(et, params)

gs.fit(X_train, Y_train)
print(sorted(gs.cv_results_))

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=seed)
scores = cross_val_score(et, X_train, Y_train, cv=kf, scoring='r2')
print("Cross-validation scores:", scores)