In [8]:
from dataset import *
from preprocessing import *

import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# load the dataset
DATA_DIR = './data/'
PLOTS_DIR = './plots/'

X, y = load_dataset(DATA_DIR)

In [3]:
# split the dataset in train and test set while keeping the 
# ratio between the target classes (as the data is imbalanced)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    train_size=0.7, 
                                                    random_state=42, 
                                                    stratify=y)

# split the test set in test and validation set while keeping 
# the ratio between the target classes
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, 
                                                train_size=0.5, 
                                                random_state=42, 
                                                stratify=y_test)

data = [X_train, X_test, X_val]
labels = [y_train, y_test, y_val]

In [4]:
LOWCUT = 0.5
HIGHCUT = 40
SAMPLINGRATE = 173.61

# define the scaler 
scaler = scaler(X_train)
# define the filter coefficients
b, a = butter_bandpass(LOWCUT,HIGHCUT,SAMPLINGRATE)

In [5]:
processed_data = list()
processed_labels = list()

In [6]:
# apply the preprocessing pipeline to the different sets
for X, y in zip(data, labels):
    filtered_X = butter_bandpass_filter(X,b,a)
    standardized_X = standardize_data(scaler, filtered_X)
    prepared_X, prepared_y = sliding_window(standardized_X, y, 241)
    processed_data.append(prepared_X)
    processed_labels.append(prepared_y)