In [1]:
import os.path
import numpy as np

# Load in data, this assumed you have a folder in env named data
data_dir = "data"
X_test = np.load(os.path.join(data_dir, "X_test.npy"))
y_test = np.load(os.path.join(data_dir, "y_test.npy"))
person_test = np.load(os.path.join(data_dir, "person_test.npy")).squeeze(axis=1)
X_train_valid = np.load(os.path.join(data_dir, "X_train_valid.npy"))
y_train_valid = np.load(os.path.join(data_dir, "y_train_valid.npy"))
person_train_valid = np.load(os.path.join(data_dir, "person_train_valid.npy")).squeeze(axis=1)

# Predefine some useful variables and fix data a bit
n_class = len(set(y_train_valid))
n_trials = 5
min_y = min(y_train_valid)
y_train_valid = y_train_valid - min_y
y_test = y_test - min_y

In [3]:
# Validate data loaded in correctly and print shapes
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'. format (X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115,)
Person test shape: (443,)


In [4]:
import random
import torch

# Define random seed so that we can reproduce results
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# This is for macbook M1, if you have intel I think you use cuda not mps
# Research what works for your device and change the torch.device
device = torch.device("mps")
device

device(type='mps')

In [5]:
# Import the models and functions
from models import *
from trainer import *

In [6]:
# Set up data sets
s1_indices_train = [i for i, s in enumerate(person_train_valid) if s == 1]
s1_indices_test = [i for i, s in enumerate(person_test) if s == 1]
X_train_valid_s1 = X_train_valid[s1_indices_train]
y_train_valid_s1 = y_train_valid[s1_indices_train]
X_test_s1 = X_test[s1_indices_test]
y_test_s1 = y_test[s1_indices_test]

# Make dataloader test set for the single subject
# Convert data to tensors
X_tensor = torch.FloatTensor(X_test_s1)
y_tensor = torch.LongTensor(y_test_s1)

# Combine X and y into a TensorDataset
dataset = TensorDataset(X_tensor, y_tensor)

# Prepare dataloaders
test_dataloader = DataLoader(dataset, batch_size=256, shuffle=False)

# Train for the single subject data
Currently using low epochs just for testing purposes

## CNN

In [27]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    cnn = CNN(input_size=X_train_valid_s1.shape[1:], N=n_class).to(device)
    valid_acc = fit(cnn, X_train_valid_s1, y_train_valid_s1, device, epochs=150)
    test_acc, _ = evaluate(cnn, test_dataloader, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/150 [00:00<?, ?it/s]

100%|██████████| 150/150 [00:06<00:00, 23.31it/s]


Best valid accuracy: 0.54167


100%|██████████| 150/150 [00:06<00:00, 24.22it/s]


Best valid accuracy: 0.70833


100%|██████████| 150/150 [00:06<00:00, 24.22it/s]


Best valid accuracy: 0.625


100%|██████████| 150/150 [00:06<00:00, 24.28it/s]


Best valid accuracy: 0.45833


100%|██████████| 150/150 [00:06<00:00, 24.09it/s]

Best valid accuracy: 0.58333





(0.58333, 0.5)

## RNN (LSTM)

In [38]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    rnn = LSTM().to(device)
    valid_acc = fit(rnn, X_train_valid_s1, y_train_valid_s1, device, epochs=7)
    test_acc, _ = evaluate(rnn, test_dataloader, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:00<00:00, 10.12it/s]


Best valid accuracy: 0.35417


100%|██████████| 7/7 [00:00<00:00, 13.17it/s]


Best valid accuracy: 0.25


100%|██████████| 7/7 [00:00<00:00, 13.26it/s]


Best valid accuracy: 0.45833


100%|██████████| 7/7 [00:00<00:00, 13.51it/s]


Best valid accuracy: 0.25


100%|██████████| 7/7 [00:00<00:00, 13.27it/s]

Best valid accuracy: 0.25





(0.3125, 0.232)

## Another RNN (GRU)

GRU takes super long to train, not entirely sure why

I even cut out a fc layer to speed it up, and increase lr

In [39]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    gru = GRU().to(device)
    valid_acc = fit(gru, X_train_valid_s1, y_train_valid_s1, device, epochs=7)
    test_acc, _ = evaluate(gru, test_dataloader, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:00<00:00, 10.05it/s]


Best valid accuracy: 0.29167


100%|██████████| 7/7 [00:00<00:00, 15.18it/s]


Best valid accuracy: 0.35417


100%|██████████| 7/7 [00:00<00:00, 15.69it/s]


Best valid accuracy: 0.41667


100%|██████████| 7/7 [00:00<00:00, 15.77it/s]


Best valid accuracy: 0.33333


100%|██████████| 7/7 [00:00<00:00, 15.55it/s]

Best valid accuracy: 0.375





(0.35417, 0.228)

# CNN + RNN (LSTM)

In [47]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    crnn = CNN_RNN(output_size=n_class,).to(device)
    valid_acc = fit(crnn, X_train_valid_s1, y_train_valid_s1, device, epochs=40)
    test_acc, _ = evaluate(crnn, test_dataloader, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/40 [00:00<?, ?it/s]

100%|██████████| 40/40 [00:03<00:00, 10.46it/s]


Best valid accuracy: 0.35417


100%|██████████| 40/40 [00:03<00:00, 11.10it/s]


Best valid accuracy: 0.375


100%|██████████| 40/40 [00:03<00:00, 11.09it/s]


Best valid accuracy: 0.39583


100%|██████████| 40/40 [00:03<00:00, 11.12it/s]


Best valid accuracy: 0.375


100%|██████████| 40/40 [00:03<00:00, 11.11it/s]

Best valid accuracy: 0.3125





(0.3625, 0.304)

# Train for all subjects

In [7]:
# Make data loader for all subjects

# Convert data to tensors
X_tensor_full = torch.FloatTensor(X_test)
y_tensor_full = torch.LongTensor(y_test)

# Combine X and y into a TensorDataset
dataset_full = TensorDataset(X_tensor_full, y_tensor_full)

# Prepare dataloaders
test_dataloader_full = DataLoader(dataset_full, batch_size=256, shuffle=False)

## CNN 

In [8]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    cnn = CNN(input_size=X_train_valid.shape[1:], N=n_class).to(device)
    valid_acc = fit(cnn, X_train_valid, y_train_valid, device, epochs=5)
    test_acc, _ = evaluate(cnn, test_dataloader_full, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/5 [00:00<?, ?it/s]

Best valid accuracy: 0.47281


  0%|          | 0/5 [00:00<?, ?it/s]

KeyboardInterrupt: 

## LSTM

In [9]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    rnn = LSTM().to(device)
    valid_acc = fit(rnn, X_train_valid, y_train_valid, device, epochs=5)
    test_acc, _ = evaluate(rnn, test_dataloader_full, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/5 [00:00<?, ?it/s]

Best valid accuracy: 0.26005


KeyboardInterrupt: 

## GRU

In [None]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    gru = GRU().to(device)
    valid_acc = fit(gru, X_train_valid, y_train_valid, device, epochs=7)
    test_acc, _ = evaluate(gru, test_dataloader_full, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

## CRNN

In [10]:
valid_accs = []
test_accs = []

for i in range(n_trials):
    crnn = CNN_RNN(output_size=n_class,).to(device)
    valid_acc = fit(crnn, X_train_valid, y_train_valid, device, epochs=5)
    test_acc, _ = evaluate(crnn, test_dataloader_full, device)
    valid_accs.append(valid_acc)
    test_accs.append(test_acc)

round(np.mean(valid_accs), 5), round(np.mean(test_accs), 5)

  0%|          | 0/5 [00:00<?, ?it/s]

Best valid accuracy: 0.26241


  0%|          | 0/5 [00:00<?, ?it/s]

Best valid accuracy: 0.2695


  0%|          | 0/5 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Ensemble

Test the effect of ensembling models using max voting

## Lets train a CNN and CRNN, saving the y pred labels

In [22]:
# CNN
y_pred_CNN = []

print("Now training the CNN model\n")
for i in range(n_trials):
    cnn = CNN(input_size=X_train_valid.shape[1:], N=n_class).to(device)
    fit(cnn, X_train_valid, y_train_valid, device, epochs=1)
    _, y_pred = evaluate(cnn, test_dataloader_full, device)
    y_pred_CNN.append(y_pred)

# RNN (LSTM)
y_pred_LSTM = []
print("Now training the LSTM model\n")
for i in range(n_trials):
    rnn = LSTM().to(device)
    fit(rnn, X_train_valid, y_train_valid, device, epochs=1)
    _, y_pred = evaluate(rnn, test_dataloader_full, device)
    y_pred_LSTM.append(y_pred)

# CRNN
y_pred_CRNN = []

print("Now training the CRNN model\n")
for i in range(n_trials):
    crnn = CNN_RNN(output_size=n_class,).to(device)
    fit(crnn, X_train_valid, y_train_valid, device, epochs=1)
    _, y_pred= evaluate(crnn, test_dataloader_full, device)
    y_pred_CRNN.append(y_pred)

# Lets try max voting since thats used for classification tasks
def max_voting(*args):
    """
    Perform max voting ensemble for multiple classifiers.
    
    Args:
    *args: Arrays of predicted class labels from each classifier.
    
    Returns:
    Ensemble predictions based on max voting.
    """
    # Concatenate predicted labels along axis 1
    concatenated_predictions = np.concatenate(args, axis=0)
    
    # Find the most frequent prediction for each sample
    ensemble_predictions = np.array([np.argmax(np.bincount(sample_predictions)) for sample_predictions in concatenated_predictions.T])

    return ensemble_predictions

# Now lets try it out
ensemble_predictions = max_voting(y_pred_CNN, y_pred_LSTM, y_pred_CRNN)

# Get accuracy of ensemble
correct = np.sum(ensemble_predictions == y_test)
total = len(y_test)
accuracy_ensemble = correct/total

print("Accuracy of ensemble: " + str(round(accuracy_ensemble, 5)))

Now training the CNN model



  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.37589


  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.32388


  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.38534


  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.33097


  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.41135
Now training the LSTM model



  0%|          | 0/1 [00:00<?, ?it/s]

Best valid accuracy: 0.26714
