In [2]:
%load_ext autoreload
%autoreload 2

import os.path
import numpy as np

# Load in data, this assumed you have a folder in env named data
data_dir = "data"
X_test = np.load(os.path.join(data_dir, "X_test.npy"))
y_test = np.load(os.path.join(data_dir, "y_test.npy"))
person_test = np.load(os.path.join(data_dir, "person_test.npy")).squeeze(axis=1)
X_train_valid = np.load(os.path.join(data_dir, "X_train_valid.npy"))
y_train_valid = np.load(os.path.join(data_dir, "y_train_valid.npy"))
person_train_valid = np.load(os.path.join(data_dir, "person_train_valid.npy")).squeeze(axis=1)

# Predefine some useful variables and fix data a bit
n_class = len(set(y_train_valid))
n_trials = 5
min_y = min(y_train_valid)
y_train_valid = y_train_valid - min_y
y_test = y_test - min_y

In [3]:
# Validate data loaded in correctly and print shapes
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'. format (X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115,)
Person test shape: (443,)


In [4]:
import random
import torch

# Define random seed so that we can reproduce results
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# This is for macbook M1, if you have intel I think you use cuda not mps
# Research what works for your device and change the torch.device
device = torch.device("cuda")
device

device(type='cuda')

In [5]:
# Import the models and functions
from models import *
from trainer import *
import matplotlib.pyplot as plt

In [10]:
# Make data loader for all subjects

# Convert data to tensors
X_tensor_full = torch.FloatTensor(X_test[:,:,:400])
y_tensor_full = torch.LongTensor(y_test)

# Combine X and y into a TensorDataset
dataset_full = TensorDataset(X_tensor_full, y_tensor_full)

# Prepare dataloaders
test_dataloader_full = DataLoader(dataset_full, batch_size=256, shuffle=False)

# Ensemble

Test the effect of ensembling models using max voting

## The Goal of this Notebook is Maximum Accuracy

In [None]:
ensemble_models = []

for i in range(5):
    cnn = CNN(input_size=X_train_valid[:,:,:400].shape[1:], N=n_class).to(device)
    fit(cnn, X_train_valid[:,:,:400], y_train_valid, device, epochs=400)
    ensemble_models.append(cnn)

In [11]:
# CNN
y_pred_CNN = []

print("Now training the CNN model\n")
for i in range(5):
    cnn = CNN(input_size=X_train_valid[:,:,:300].shape[1:], N=n_class).to(device)
    fit(cnn, X_train_valid[:,:,:300], y_train_valid, device, epochs=100)
    _, y_pred = evaluate(cnn, test_dataloader_full, device)
    y_pred_CNN.append(y_pred)

# Lets try max voting since thats used for classification tasks
def max_voting(*args):
    """
    Perform max voting ensemble for multiple classifiers.
    
    Args:
    *args: Arrays of predicted class labels from each classifier.
    
    Returns:
    Ensemble predictions based on max voting.
    """
    # Concatenate predicted labels along axis 1
    concatenated_predictions = np.concatenate(args, axis=0)
    
    # Find the most frequent prediction for each sample
    ensemble_predictions = np.array([np.argmax(np.bincount(sample_predictions)) for sample_predictions in concatenated_predictions.T])

    return ensemble_predictions

# Now lets try it out
# ensemble_predictions = max_voting(y_pred_CNN, y_pred_LSTM, y_pred_CRNN, y_pred_CRNN_2)
ensemble_predictions = max_voting(y_pred_CNN)

# Get accuracy of ensemble
correct = np.sum(ensemble_predictions == y_test)
total = len(y_test)
accuracy_ensemble = correct/total

print("Accuracy of ensemble: " + str(round(accuracy_ensemble, 5)))

Now training the CNN model



  0%|          | 0/100 [00:00<?, ?it/s]

Best valid accuracy: 0.65485


  0%|          | 0/100 [00:00<?, ?it/s]

Best valid accuracy: 0.66903


  0%|          | 0/100 [00:00<?, ?it/s]

Best valid accuracy: 0.70213


  0%|          | 0/100 [00:00<?, ?it/s]

Best valid accuracy: 0.69976


  0%|          | 0/100 [00:00<?, ?it/s]

Best valid accuracy: 0.7234
Accuracy of ensemble: 0.68849
