In [86]:
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge

from config.regressors import VotingRegressor, StackingRegressor, NNRegressor

from config.models import ConvNN

from config.loss_functions import RMSELoss

import pyriemann
import pyriemann.regression

from config.transformers import TimeDomainTransformer, TimeWindowTransformer, LabelWindowExtractor
from config.validation import RMSE, NMSE, cross_validate_pipeline

# Models

### Baseline models

In [87]:
baseline_guided_kr = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KernelRidge(
            alpha = 0.01,
            gamma = 0.01,
            kernel='laplacian'))
    ]
)

baseline_guided_knn = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor(
            n_neighbors = 5))
    ]
)

baseline_guided_rf = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', RandomForestRegressor(
            n_estimators = 50,
            max_depth = 10))
    ]
)

### Riemannian models

In [88]:
# Riemannian geometry of covariance matrices
riem1 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', KernelRidge(
            alpha = 0.01,
            gamma = 0.01,
            kernel='laplacian'))
    ]
)

riem2 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor(
            n_neighbors = 5))
    ]
)

riem3 = Pipeline(
    [
        ('feature_extraction', pyriemann.estimation.Covariances()),
        ('transformation', pyriemann.tangentspace.TangentSpace(
            metric = 'riemann',
            tsupdate = True)),
        ('scaler', StandardScaler()),
        ('regressor', RandomForestRegressor(
            n_estimators = 50,
            max_depth = 10))
    ]
)

### Ensemble models

In [89]:
voting_estimator = VotingRegressor(
    estimators = [
        baseline_guided_kr,
        baseline_guided_knn,
        baseline_guided_rf,
        riem1,
        riem2,
        riem3
    ]
)

stacking_estimator = StackingRegressor(
    estimators = [
        baseline_guided_kr,
        baseline_guided_knn,
        baseline_guided_rf,
        riem1,
        riem2,
        riem3
    ],
    end_estimator = RandomForestRegressor(
        n_estimators = 50,
        max_depth = 10)
)

# Final generalization evaluation

In [90]:
PATH = f'/Users/marco/PROJECTS/data/'
    # PATH = r'C:\Users\gianm\Documents\Uni\Big Data\F422\project\data\\'

model = baseline_guided_kr
step = 100

metric_fns = {'RMSE': RMSE, 'NMSE': NMSE}

In [91]:
tw_extractor = TimeWindowTransformer(size = 500, step = step)
label_extractor = LabelWindowExtractor(size = 500, step = step)

# guided
X_guided = np.load(PATH + f'guided/guided_dataset_X.npy')
Y_guided = np.load(PATH + f'guided/guided_dataset_Y.npy')
X_guided_windows = tw_extractor.transform(X_guided)
Y_guided_labels = label_extractor.transform(Y_guided)

# freemoves
X_freemoves = np.load(PATH + f'freemoves/freemoves_dataset_X.npy')
Y_freemoves = np.load(PATH + f'freemoves/freemoves_dataset_Y.npy')
X_freemoves_windows = tw_extractor.transform(X_freemoves)
Y_freemoves_labels = label_extractor.transform(Y_freemoves)

# # stacked
# X_stacked_windows = np.concatenate([X_guided_windows, X_freemoves_windows], axis=1)
# Y_stacked_labels = np.concatenate([Y_guided_labels, Y_freemoves_labels], axis=1)

#### RMSE guided

In [92]:
results_guided = cross_validate_pipeline(model, X_guided_windows, Y_guided_labels, metric_fns, n_folds=5, verbose=1)


Average Scores across folds:
RMSE: train=0.1576, val=4.5220
NMSE: train=0.0001, val=0.1032


#### RMSE freemoves

In [93]:
results_freemoves = cross_validate_pipeline(model, X_freemoves_windows, Y_freemoves_labels, metric_fns, n_folds=5, verbose=1)


Average Scores across folds:
RMSE: train=0.5261, val=10.5424
NMSE: train=0.0018, val=0.7883


#### RMSE total

In [94]:
results_combined = \
    ((results_guided['avg_val_RMSE']**2 * X_guided.shape[0] + \
    results_freemoves['avg_val_RMSE']**2 * X_freemoves.shape[0])/(X_guided.shape[0]+X_freemoves.shape[0]))**0.5

results_combined # I think that this should be the result that is most correlated to the score

np.float64(8.111433178886406)

# Submission


### Guided training and predictions

In [95]:
DATASET = 'guided'

# training
X = np.load(PATH + f'{DATASET}/{DATASET}_dataset_X.npy')
Y = np.load(PATH + f'{DATASET}/{DATASET}_dataset_Y.npy')

tw_extractor = TimeWindowTransformer(size = 500, step = step)
label_extractor = LabelWindowExtractor(size = 500, step = step)

X_windows = tw_extractor.transform(X)
Y_labels = label_extractor.transform(Y)

X_train = X_windows.reshape(-1, *X_windows.shape[2:])
Y_train = Y_labels.reshape(-1, *Y_labels.shape[2:])

model.fit(X_train, Y_train)

# predicting
X_test = np.load(PATH + f'{DATASET}/{DATASET}_testset_X.npy')
X_test = X_test.reshape(-1, *X_windows.shape[2:])

Y_guided_pred = model.predict(X_test)

### Freemoves training and predictions

In [96]:
DATASET = 'freemoves'

# training
X = np.load(PATH + f'{DATASET}/{DATASET}_dataset_X.npy')
Y = np.load(PATH + f'{DATASET}/{DATASET}_dataset_Y.npy')

tw_extractor = TimeWindowTransformer(size = 500, step = step)
label_extractor = LabelWindowExtractor(size = 500, step = step)

X_windows = tw_extractor.transform(X)
Y_labels = label_extractor.transform(Y)

X_train = X_windows.reshape(-1, *X_windows.shape[2:])
Y_train = Y_labels.reshape(-1, *Y_labels.shape[2:])

model.fit(X_train, Y_train)

# predicting
X_test = np.load(PATH + f'{DATASET}/{DATASET}_testset_X.npy')
X_test = X_test.reshape(-1, *X_windows.shape[2:])

Y_freemoves_pred = model.predict(X_test)

### CSV generation

In [97]:
# import pandas as pd

# fname = 'stacked_step_250.csv'

# Y_pred = np.vstack([Y_guided_pred, Y_freemoves_pred])
# Y_pred_df = pd.DataFrame(Y_pred)
# Y_pred_df.to_csv(fname, index=False, header=None)