In [1]:
import numpy as np

from prep import TimeWindowTransformer, LabelWindowExtractor

from validation import RMSE, NMSE, cross_validate_NN

### Loading, windowing

In [2]:
# PATH = f'/Users/marco/PROJECTS/data/'
PATH = r'C:\Users\gianm\Documents\Uni\Big Data\F422\project\data\\'
DATASET = 'freemoves' # change this to guided/freemoves if needed

X = np.load(PATH + f'{DATASET}/{DATASET}_dataset_X.npy')
Y = np.load(PATH + f'{DATASET}/{DATASET}_dataset_Y.npy')

tw_extractor = TimeWindowTransformer(size = 500, step = 100)
label_extractor = LabelWindowExtractor(size = 500, step = 100)

X_windows = tw_extractor.transform(X)
Y_labels = label_extractor.transform(Y)

train_val_idx = [0,1,2,3]
test_idx = 4

X_train_val_folds = X_windows[train_val_idx]
Y_train_val_folds = Y_labels[train_val_idx]

X_test = X_windows[test_idx]
Y_test = Y_labels[test_idx]

In [3]:
from prep import TimeDomainTransformer

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge

from validation import RMSE, NMSE, cross_validate_pipeline

baseline1 = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KernelRidge(
            alpha = 0.001,
            kernel='laplacian'))
    ]
)

baseline2 = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer(sigma_mpr=0.3)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor())
    ]
)

baseline3 = Pipeline(
    [
        ('feature_extraction', TimeDomainTransformer()),
        ('scaler', StandardScaler()),
        ('regressor', RandomForestRegressor())
    ])

In [4]:
# Evaluation
metric_fns = {'RMSE': RMSE, 'NMSE': NMSE}
models = {
    'Time domain features + Kernel Ridge': baseline1,
    'Time domain features + KNN': baseline2,
    'Time domain features + Random Forests': baseline3
}

for model_name, model in models.items():
    print(f'\nModel: {model_name}')
    result = cross_validate_pipeline(model, X_train_val_folds, Y_train_val_folds, metric_fns, verbose=1)


Model: Time domain features + Kernel Ridge

Average Scores across folds:
RMSE: train=0.0570, val=11.0242
NMSE: train=0.0000, val=0.8166

Model: Time domain features + KNN

Average Scores across folds:
RMSE: train=4.0959, val=12.2073
NMSE: train=0.1024, val=0.9949

Model: Time domain features + Random Forests

Average Scores across folds:
RMSE: train=2.4229, val=10.8465
NMSE: train=0.0358, val=0.7863


In [6]:
# Test Wavelet
import pywt
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class WaveletFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, wavelet='db4', level=3, stat_funcs=None):
        self.wavelet = wavelet
        self.level = level
        self.stat_funcs = stat_funcs or [np.mean, np.std, np.max, np.min]

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        # X shape: (n_samples, n_channels, window_size)
        if X.ndim == 4:
            X = X.reshape(-1, X.shape[2], X.shape[3])  # (n_samples, n_channels, window_size)

        features = []
        for sample in X:
            sample_features = []
            for channel in sample:
                coeffs = pywt.wavedec(channel, self.wavelet, level=self.level)
                for c in coeffs:
                    sample_features.extend([f(c) for f in self.stat_funcs])
            features.append(sample_features)
        return np.array(features)

In [7]:
baseline_wavelet = Pipeline([
    ('wavelet', WaveletFeatureExtractor(wavelet='db4', level=3)),
    ('scaler', StandardScaler()),
    ('regressor', KernelRidge(alpha=0.001, kernel='laplacian'))
])
baseline_wavelet_kn = Pipeline(
    [
        ('wavelet', WaveletFeatureExtractor(wavelet='db4', level=3)),
        ('scaler', StandardScaler()),
        ('regressor', KNeighborsRegressor())
    ]
)
baseline_wavelet_rf = Pipeline([
    ('wavelet', WaveletFeatureExtractor(wavelet='sym5', level=4)),
    ('scaler', StandardScaler()),
    ('regressor', RandomForestRegressor())
])

In [8]:
# Evaluation
metric_fns = {'RMSE': RMSE, 'NMSE': NMSE}
models = {
    'Wavelet + Kernel Ridge': baseline_wavelet,
    'Wavelet + KNN': baseline_wavelet_kn,
    'Wavelet + Random Forests': baseline_wavelet_rf
}

for model_name, model in models.items():
    print(f'\nModel: {model_name}')
    result = cross_validate_pipeline(model, X_train_val_folds, Y_train_val_folds, metric_fns, verbose=1)


Model: Wavelet + Kernel Ridge

Average Scores across folds:
RMSE: train=0.0972, val=11.8307
NMSE: train=0.0001, val=0.9406

Model: Wavelet + KNN

Average Scores across folds:
RMSE: train=6.0292, val=12.8150
NMSE: train=0.2217, val=1.0976

Model: Wavelet + Random Forests

Average Scores across folds:
RMSE: train=2.7606, val=11.4190
NMSE: train=0.0465, val=0.8750
