In [1]:
import sys

sys.path.append('../')

In [2]:
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge

from xgboost import XGBRegressor

from config.regressors import VotingRegressor, StackingRegressor, NNRegressor

from config.models import ConvNN

from config.loss_functions import RMSELoss

import pyriemann
import pyriemann.regression

from config.transformers import TimeDomainTransformer, TimeWindowTransformer, LabelWindowExtractor
from config.validation import RMSE, NMSE, cross_validate_pipeline, cross_validate_NN

# Models

### Baseline models

In [3]:
baseline_guided_kr = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KernelRidge(
            alpha = 0.01,
            gamma = 0.01,
            kernel='laplacian'))
    ]
)

baseline_guided_knn = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor(
            n_neighbors = 5))
    ]
)

baseline_guided_rf = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', RandomForestRegressor(
            n_estimators = 50,
            max_depth = 10))
    ]
)

timedomain_xgboost = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', XGBRegressor(
            n_estimators = 100,
            max_depth = 5,
            learning_rate = 0.1,
            objective='reg:squarederror',
            n_jobs=-1,
            verbosity=1
        ))
    ]
)

### Riemannian models

In [4]:
# Riemannian geometry of covariance matrices
riem1 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', KernelRidge(
            alpha = 0.01,
            gamma = 0.01,
            kernel='laplacian'))
    ]
)

riem2 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor(
            n_neighbors = 5))
    ]
)

riem3 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', RandomForestRegressor(
            n_estimators = 50,
            max_depth = 10))
    ]
)

riem4 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', XGBRegressor(
            n_estimators = 100,
            max_depth = 5,
            learning_rate = 0.1,
            objective='reg:squarederror',
            n_jobs=-1,
            verbosity=1
        ))
    ]
)

### Ensemble models

In [5]:
voting_estimator = VotingRegressor(
    estimators = [
        baseline_guided_kr,
        baseline_guided_knn,
        baseline_guided_rf,
        riem1,
        riem2,
        riem3
    ]
)

stacking_estimator = StackingRegressor(
    estimators = [
        baseline_guided_kr,
        baseline_guided_knn,
        baseline_guided_rf,
        riem1,
        riem2,
        riem3
    ],
    end_estimator = RandomForestRegressor(
        n_estimators = 50,
        max_depth = 10)
)

# Final generalization evaluation

In [None]:
PATH = f'/Users/marco/PROJECTS/data/'
# PATH = r'C:\Users\gianm\Documents\Uni\Big Data\F422\project\data\\'

# ciao
model = StackingRegressor(
    estimators = [
        baseline_guided_kr,
        # baseline_guided_knn,
        baseline_guided_rf,
        # riem1,
        # riem2,
        riem3
    ],
    end_estimator = RandomForestRegressor(
        n_estimators = 50,
        max_depth = 10)
)

model_name = 'stacking_kr_rf_riem3'

step = 250 # step used for testing

metric_fns = {'RMSE': RMSE, 'NMSE': NMSE}

In [7]:
tw_extractor = TimeWindowTransformer(size = 500, step = step)
label_extractor = LabelWindowExtractor(size = 500, step = step)

# guided
DATASET = 'guided'

X = np.load(PATH + f'{DATASET}/{DATASET}_dataset_X.npy')
Y = np.load(PATH + f'{DATASET}/{DATASET}_dataset_Y.npy')
X_windows = tw_extractor.transform(X)
Y_labels = label_extractor.transform(Y)

#### 5-fold cross-validation freemoves

In [8]:
results = cross_validate_pipeline(model, X_windows, Y_labels, metric_fns, n_folds=5, verbose=2)


Fold 1
RMSE: train=0.7502, val=5.4499
NMSE: train=0.0027, val=0.1532

Fold 2
RMSE: train=0.7367, val=4.2818
NMSE: train=0.0027, val=0.0932

Fold 3
RMSE: train=0.7129, val=4.5268
NMSE: train=0.0025, val=0.0963

Fold 4
RMSE: train=0.7206, val=3.5814
NMSE: train=0.0025, val=0.0650

Fold 5
RMSE: train=0.7262, val=4.3402
NMSE: train=0.0026, val=0.0896

Average Scores across folds:
RMSE: train=0.7293, val=4.4360
NMSE: train=0.0026, val=0.0994


In [9]:
expected_rmse = str(np.round(results['avg_val_RMSE'], 2)).replace('.', '-')

expected_rmse

'4-44'

# Prediction generation


In [10]:
step_prediction = 50 # step for prediction

# preparing the training data
tw_extractor = TimeWindowTransformer(size = 500, step = step_prediction)
label_extractor = LabelWindowExtractor(size = 500, step = step_prediction)

DATASET = 'guided'

X = np.load(PATH + f'{DATASET}/{DATASET}_dataset_X.npy')
Y = np.load(PATH + f'{DATASET}/{DATASET}_dataset_Y.npy')
X_windows = tw_extractor.transform(X)
Y_labels = label_extractor.transform(Y)

# stacking the sessions 
X_train = X_windows.reshape(-1, *X_windows.shape[2:])
Y_train = Y_labels.reshape(-1, *Y_labels.shape[2:])

# training
model.fit(X_train, Y_train)

# predicting
X_test = np.load(PATH + f'{DATASET}/{DATASET}_testset_X.npy')
X_test = X_test.reshape(-1, *X_windows.shape[2:])
Y_pred = model.predict(X_test)

# saving
FOLDER_PATH = f'{DATASET}/'
file_name = FOLDER_PATH + model_name + '_steps_' + str(step_prediction) + '_rmse_' + expected_rmse

np.save(file_name, Y_pred)