In [5]:
# Code from https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingRegressor.html
# and https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html

import pandas as pd
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV, HalvingRandomSearchCV, GridSearchCV, RandomizedSearchCV
from sklearn.decomposition import PCA, KernelPCA, FastICA
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from enum import Enum

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import RidgeCV, LogisticRegression
from sklearn.svm import LinearSVR, LinearSVC
from sklearn.ensemble import StackingRegressor, StackingClassifier

# Define an Enum class
class ML_TASK(Enum):
    CLASSIFICATION = 1
    REGRESSION = 2
    CLUSTERING = 3

class HYPERPARAM_SEARCH(Enum):
    RANDOM = 1
    GRID = 2
    HALVING_RANDOM = 3
    HALVING_GRID = 4

class DIM_REDUCTION_METHOD(Enum):
    PCA = 1
    KERNEL_PCA = 2
    ICA = 3

# Params
algo_name = 'Stacking'
ml_task = ML_TASK.REGRESSION # True if you want to map fractions to integers for classification
hyperparam_search_method = HYPERPARAM_SEARCH.GRID # True if you want to use Random Grid Search instead of brute force
dim_reduction_method = DIM_REDUCTION_METHOD.PCA # False if you don't want to use PCA
kfold_num = 10 # Train-Test-Split = 1, LOOCV = n

# Read
X_train = pd.read_csv('../pc_X_train.csv')
y_train = pd.read_csv('../pc_y_train.csv')
y_train = y_train.iloc[:, -1] # With iloc we extract the labels

X_test = pd.read_csv('../pc_X_test.csv')
ids = X_test.iloc[:, 0]

# If classification -> Do mapping
if ml_task == ML_TASK.CLASSIFICATION:
    possible_numbers = list(set(y_train))
    mapping = {val: idx for idx, val in enumerate(possible_numbers)}
    y_train = [mapping[val] for val in y_train]


In [6]:
# Normalize Trainset
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X_train)

# PCA
dim_reduction = None
match dim_reduction_method:
    case DIM_REDUCTION_METHOD.PCA:
        dim_reduction = PCA()
    case DIM_REDUCTION_METHOD.KERNEL_PCA:
        dim_reduction = KernelPCA()
    case DIM_REDUCTION_METHOD.ICA:
        dim_reduction = FastICA()

# If classification
model = None
scoring_method = None

if ml_task == ML_TASK.CLASSIFICATION:
    estimators = [('rf', RandomForestClassifier(n_estimators=20, random_state=42)),
                  ('svr', LinearSVC(dual="auto", random_state=42))]
    model = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
    scoring_method = 'f1_weighted'
    # raise ValueError("Logistic Regression is a regression algorithm. Please use ML_TASK.REGRESSION")
elif ml_task == ML_TASK.REGRESSION:
    estimators = [
     ('lr', RidgeCV()),
     ('svr', LinearSVR(dual="auto", random_state=42))]
    model = StackingRegressor(estimators=estimators,final_estimator=RandomForestRegressor(n_estimators=20,random_state=42))
    scoring_method = 'neg_root_mean_squared_error'
    # raise ValueError("OneVsRest is a classification algorithm. Please use ML_TASK.CLASSIFICATION")

# Pipeline
steps = []
steps.append(('dim_reduction', dim_reduction))
steps.append(('model', model))

pipe = Pipeline(steps=steps)

# parameters = {'dim_reduction__
# n_components': [1,2,4,8,16], 
#               'model__criterion': ['gini', 'entropy', 'log_loss'], 
#               'model__splitter': ['best', 'random'],
#               'model__min_samples_split': [1,2,4,8,16,32],
#               'model__max_depth': [1,2,3,4,5,6,7,8,16,32,64]}

parameters = {'dim_reduction__n_components': [3,8,16,32,40,50,64]}

# Grid Search
grid_params_list = [pipe, parameters] 
grid_params_dict = {'cv': kfold_num, 'scoring': scoring_method, 'verbose':2, 'n_jobs':6}

grid = None
match hyperparam_search_method:
    case HYPERPARAM_SEARCH.GRID:
        grid = GridSearchCV(*grid_params_list, **grid_params_dict)
    case HYPERPARAM_SEARCH.RANDOM:
        grid = RandomizedSearchCV(*grid_params_list, **grid_params_dict)
    case HYPERPARAM_SEARCH.HALVING_GRID:
        grid = HalvingGridSearchCV(*grid_params_list, **grid_params_dict)
    case HYPERPARAM_SEARCH.HALVING_RANDOM:
        grid = HalvingRandomSearchCV(*grid_params_list, **grid_params_dict)

grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)

model = grid.best_estimator_

# Normalize Testset
X_normalized_test = scaler.fit_transform(X_test)
X_reduced_test = dim_reduction.fit_transform(X_normalized_test)

# Predict
predictions = model.predict(X_reduced_test)

# If classification -> Transform back to Regression Task
if ml_task == ML_TASK.CLASSIFICATION:
    predictions = [possible_numbers[val] for val in predictions]

# Save 
output_filename = f'bork_GYCAOB_{algo_name}_C.csv' if ml_task == ML_TASK.CLASSIFICATION else f'bork_GYCAOB_{algo_name}_R.csv'
results_df = pd.DataFrame({'id': ids,'score': predictions})
results_df.to_csv(output_filename, index=False)

Fitting 10 folds for each of 7 candidates, totalling 70 fits
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s




[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.3s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=3; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s




[CV] END ......................dim_reduction__n_components=3; total time=   0.3s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.3s




[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s
[CV] END ......................dim_reduction__n_components=8; total time=   0.2s




[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s




[CV] END .....................dim_reduction__n_components=32; total time=   0.4s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=16; total time=   0.3s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s




[CV] END .....................dim_reduction__n_components=32; total time=   0.5s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s




[CV] END .....................dim_reduction__n_components=32; total time=   0.4s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s




[CV] END .....................dim_reduction__n_components=32; total time=   0.4s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=32; total time=   0.4s




[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=40; total time=   0.6s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s




[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s




[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s
[CV] END .....................dim_reduction__n_components=40; total time=   0.5s




[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.6s




[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s




[CV] END .....................dim_reduction__n_components=64; total time=   0.6s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=64; total time=   0.6s




[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=50; total time=   0.5s
[CV] END .....................dim_reduction__n_components=64; total time=   0.6s




[CV] END .....................dim_reduction__n_components=64; total time=   0.7s
[CV] END .....................dim_reduction__n_components=64; total time=   0.7s
[CV] END .....................dim_reduction__n_components=64; total time=   0.8s




[CV] END .....................dim_reduction__n_components=64; total time=   0.6s
[CV] END .....................dim_reduction__n_components=64; total time=   0.6s
[CV] END .....................dim_reduction__n_components=64; total time=   0.6s




[CV] END .....................dim_reduction__n_components=64; total time=   0.6s




Best Parameters: {'dim_reduction__n_components': 64}
Best Score: -1.2077053252887784


