In [1]:
import torch
from utils import prepare_data
from train import get_dataloaders
import pandas as pd
import numpy as np
from params import PARAMS
from sklearn.model_selection import train_test_split
from models import GCNAuto, GCRAM
from main import init_model_params

from train import train_model_2
from main import model_predict, print_classification_report

RANDOM_SEED = 0
SEQ_LEN = 400
N_EPOCHS = 5000
BATCH_SIZE = 64

# Load dataset

In [2]:
from utils import load_data



X_train, y_train, X_test, y_test = load_data('dataset/train/cross_subject_data_0.pickle')
print('Original shapes:')
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train = X_train[np.isin(y_train, [0, 1])]
y_train = y_train[np.isin(y_train, [0, 1])]
X_test = X_test[np.isin(y_test, [0, 1])]
y_test = y_test[np.isin(y_test, [0, 1])]

print('Shape with 2 classe (left, right arm):')
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train = np.vstack([X_train, X_test])
y_train = np.hstack([y_train, y_test])

X_train, y_train = prepare_data(X_train, y_train, SEQ_LEN)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=PARAMS['TEST_SIZE'], shuffle=True, random_state=RANDOM_SEED)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=PARAMS['VALID_SIZE'], shuffle=True, random_state=RANDOM_SEED)
dataloaders = get_dataloaders(X_train, y_train, X_valid, y_valid, X_test, y_test, PARAMS['BATCH_SIZE'], random_seed=RANDOM_SEED, device=PARAMS['DEVICE'])

Original shapes:
X_train shape: (5606348, 64)
y_train shape: (5606348,)
X_test shape: (590400, 64)
y_test shape: (590400,)
Shape with 2 classe (left, right arm):
X_train shape: (2802428, 64)
y_train shape: (2802428,)
X_test shape: (295200, 64)
y_test shape: (295200,)


In [3]:
model = GCNAuto(in_features=SEQ_LEN, 
        n_nodes=PARAMS['N_CHANNELS'], 
        num_classes=PARAMS['N_CLASSES'], 
        hidden_sizes=PARAMS['GCNAUTO_HIDDEN_SIZES'], 
        dropout_p=0.4, 
        device=PARAMS['DEVICE'])

model = GCRAM(graph_type='n', 
        seq_len=SEQ_LEN, 
        cnn_in_channels=PARAMS['GCRAM_CNN_IN_CHANNELS'], 
        cnn_n_kernels=PARAMS['GCRAM_CNN_N_KERNELS'], 
        cnn_kernel_size=PARAMS['GCRAM_CNN_KERNEL_SIZE'], 
        cnn_stride=PARAMS['GCRAM_CNN_STRIDE'], 
        maxpool_kernel_size=PARAMS['GCRAM_MAXPOOL_KERNEL_SIZE'], 
        maxpool_stride=PARAMS['GCRAM_MAXPOOL_STRIDE'], 
        lstm_hidden_size=PARAMS['GCRAM_LSTM_HIDDEN_SIZE'], 
        is_bidirectional=PARAMS['GCRAM_LSTM_IS_BIDIRECTIONAL'], 
        lstm_n_layers=PARAMS['GCRAM_LSTM_N_LAYERS'], 
        attn_embed_dim=PARAMS['GCRAM_ATTN_EMBED_DIM'], 
        n_classes=PARAMS['N_CLASSES'], 
        lstm_dropout_p=PARAMS['GCRAM_LSTM_DROPOUT_P'], 
        dropout1_p=PARAMS['GCRAM_DROPOUT1_P'], 
        dropout2_p=PARAMS['GCRAM_DROPOUT2_P'], 
        device=PARAMS['DEVICE'])

model = init_model_params(model, random_seed=RANDOM_SEED)
# model.init_node_embeddings()

model = model.to(PARAMS['DEVICE'])

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=PARAMS['LR'])
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, PARAMS['SCHEDULER_STEPSIZE'], PARAMS['SCHEDULER_GAMMA'])

# best_model, history = train_model(dataloaders, dataset_sizes, model, criterion, optimizer, scheduler, PARAMS['N_EPOCHS'], random_seed=random_seed)
best_model, history = train_model_2(model, optimizer, criterion, dataloaders['train'], dataloaders['val'], N_EPOCHS, RANDOM_SEED, PARAMS['DEVICE'])
best_model = best_model.to(PARAMS['DEVICE'])

y_preds, y_test = model_predict(best_model, test_loader=dataloaders['test'])

cr, cm, auroc = print_classification_report(y_test, y_preds, PARAMS['N_CLASSES'], PARAMS['LABEL_MAP'])

train() called: model=GCRAM, opt=Adam(lr=0.001000), epochs=5000, device=cuda

Epoch   1/5000, train loss: 0.7233, train acc: 0.4978, val loss: 0.6299, val acc: 0.5484
Epoch  10/5000, train loss: 0.6867, train acc: 0.5483, val loss: 0.6294, val acc: 0.5609
Epoch  20/5000, train loss: 0.6851, train acc: 0.5438, val loss: 0.6203, val acc: 0.5844
Epoch  30/5000, train loss: 0.6862, train acc: 0.5563, val loss: 0.6250, val acc: 0.5672
Epoch  40/5000, train loss: 0.6829, train acc: 0.5574, val loss: 0.6328, val acc: 0.5328
Epoch  50/5000, train loss: 0.6834, train acc: 0.5625, val loss: 0.6233, val acc: 0.5797
Epoch  60/5000, train loss: 0.6830, train acc: 0.5555, val loss: 0.6290, val acc: 0.5344
Epoch  70/5000, train loss: 0.6812, train acc: 0.5599, val loss: 0.6262, val acc: 0.5797
Epoch  80/5000, train loss: 0.6821, train acc: 0.5679, val loss: 0.6217, val acc: 0.6031
Epoch  90/5000, train loss: 0.6820, train acc: 0.5654, val loss: 0.6219, val acc: 0.5719
Epoch 100/5000, train loss: 0.68

KeyboardInterrupt: 

In [4]:
y_preds, y_test = model_predict(model, test_loader=dataloaders['test'])

cr, cm, auroc = print_classification_report(y_test, y_preds, PARAMS['N_CLASSES'], PARAMS['LABEL_MAP'])
print(cr)

{'0': {'precision': 0.5942028985507246, 'recall': 0.643979057591623, 'f1-score': 0.6180904522613065, 'support': 382}, '1': {'precision': 0.615819209039548, 'recall': 0.5647668393782384, 'f1-score': 0.5891891891891892, 'support': 386}, 'accuracy': 0.6041666666666666, 'macro avg': {'precision': 0.6050110537951363, 'recall': 0.6043729484849307, 'f1-score': 0.6036398207252478, 'support': 768}, 'weighted avg': {'precision': 0.6050673462703676, 'recall': 0.6041666666666666, 'f1-score': 0.6035645570193309, 'support': 768}}
