In [1]:
import os

import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from script.dataset import get_data, cols_emg, cols_emg_cal

In [2]:
X, y = get_data()

In [3]:
# extract center 100 data points from each set & one-hot encode labels

window_size = 100
data = list()
labels = list()
people = list()

for i, count in X.value_counts('id').items():
    start = int(count / 2 - window_size / 2)
    idx_range = X[X['id'] == i].index[start:start + window_size]
    data.append(np.array(X.iloc[idx_range][cols_emg + cols_emg_cal]))
    labels.append(y[i])
    people.append(i.split('_')[1])
    
data = np.array(data)
data_dim = data.shape
print(f'data shape: {data_dim}')

scaler = StandardScaler()
data = data.reshape(-1, 8)  # reshape to 2D for scaling
data = scaler.fit_transform(data)
data = data.reshape(*data_dim)  # reshape back to original shape

encoder = LabelBinarizer()
labels = encoder.fit_transform(labels)
print(f'labels shape: {labels.shape}, classes: {encoder.classes_}')

people = pd.Series(people)

data shape: (312, 100, 16)
labels shape: (312, 4), classes: ['crimp_20' 'crimp_45' 'jug' 'sloper_30']


In [4]:
# train/test split

test_person = 'p1'
split = people == test_person
data_train, data_test, labels_train, labels_test = data[~split], data[split], labels[~split], labels[split]
print(f'training data shape: {data_train.shape}')

training data shape: (232, 100, 16)


In [5]:
# pytorch device & tensors

# use gpu > apple silicon > cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else (torch.device("mps") if torch.backends.mps.is_available() else "cpu"))

def dataset_to_tensors(data, labels):   
    data = torch.tensor(data, dtype=torch.float32, device=device)
    # change dimensions to (batch_size, num_features, seq_length)
    data = data.transpose(1, 2)
    labels = torch.tensor(labels, dtype=torch.float32, device=device)
    return (data, labels)

data_train, labels_train = dataset_to_tensors(data_train, labels_train)
data_test, labels_test = dataset_to_tensors(data_test, labels_test)

In [6]:
# network

class CNN(nn.Module):
    def __init__(self, seq_length, kernel_size = 5, num_features = 32, in_channels = 16):
        super(CNN, self).__init__()

        conv_diff = kernel_size - 1
        lin_in = int(((seq_length - conv_diff) / 2 - conv_diff) / 2 * num_features)
        conv1_feats = int(in_channels + (num_features - in_channels) / 2)
        
        self.stack = nn.Sequential(
            nn.Conv1d(in_channels=in_channels, out_channels=conv1_feats, kernel_size=kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            
            nn.Conv1d(in_channels=conv1_feats, out_channels=num_features, kernel_size=kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),

            nn.Dropout(0.5),
            nn.Flatten(),
            nn.Linear(lin_in, 4),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        i = torch.nn.Identity()
        c = i(x)
        c = self.stack(c)
        return c

model = CNN(window_size)
print(f'number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
del model

number of parameters: 8636


In [7]:
# training

train = True

# model save
model_dir = 'models'
os.makedirs('models', exist_ok=True)
model_path = os.path.join('models', 'best_model.pth')

def train_epoch(model, optimizer, criterion, dataloader):
    model.train()
    running_loss = 0
    
    for inputs, targets in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
    # epoch loss
    return running_loss / len(dataloader.dataset)

def train(model, optimizer, criterion, data, labels, num_epochs = 20, batch_size=8):
    # data loading
    dataset = TensorDataset(data_train, labels_train)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    best_loss = float('inf')
    for epoch in range(num_epochs):
        epoch_loss = train_epoch(model, optimizer, criterion, dataloader)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        
        # save best model
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), model_path)
    print('Training complete')

if not train:
    print('Skipping training')
else:
    kf = KFold(n_splits=5)
    best_loss = float('inf')
    for fold, (idx_train, idx_val) in enumerate(kf.split(data_train)):
        d_train, d_val = data_train[idx_train], data_train[idx_val]
        l_train, l_val = labels_train[idx_train], labels_train[idx_val]
        
        model = CNN(window_size).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        train(model, optimizer, criterion, d_train, l_train)
        
        model.eval()
        with torch.no_grad():
            outputs = model(d_val)
            val_loss = criterion(outputs, l_val).item()
            print(f'Validation Loss: {val_loss:.4f}')
            # save best model
            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), model_path)
    print(f'Best validation loss: {best_loss}')

Epoch 1/20, Loss: 1.3583
Epoch 2/20, Loss: 1.2189
Epoch 3/20, Loss: 1.1130
Epoch 4/20, Loss: 1.0452
Epoch 5/20, Loss: 1.0225
Epoch 6/20, Loss: 0.9867
Epoch 7/20, Loss: 0.9574
Epoch 8/20, Loss: 0.9720
Epoch 9/20, Loss: 0.9565
Epoch 10/20, Loss: 0.9436
Epoch 11/20, Loss: 0.9746
Epoch 12/20, Loss: 0.9278
Epoch 13/20, Loss: 0.9702
Epoch 14/20, Loss: 0.9280
Epoch 15/20, Loss: 0.9196
Epoch 16/20, Loss: 0.9087
Epoch 17/20, Loss: 0.9119
Epoch 18/20, Loss: 0.9003
Epoch 19/20, Loss: 0.9052
Epoch 20/20, Loss: 0.8805
Training complete
Validation Loss: 0.8112
Epoch 1/20, Loss: 1.3426
Epoch 2/20, Loss: 1.1859
Epoch 3/20, Loss: 1.0869
Epoch 4/20, Loss: 1.0328
Epoch 5/20, Loss: 0.9959
Epoch 6/20, Loss: 0.9783
Epoch 7/20, Loss: 0.9937
Epoch 8/20, Loss: 0.9743
Epoch 9/20, Loss: 0.9605
Epoch 10/20, Loss: 0.9586
Epoch 11/20, Loss: 0.9477
Epoch 12/20, Loss: 0.9434
Epoch 13/20, Loss: 0.9342
Epoch 14/20, Loss: 0.9170
Epoch 15/20, Loss: 0.9195
Epoch 16/20, Loss: 0.9088
Epoch 17/20, Loss: 0.9133
Epoch 18/20, L

In [8]:
# load best model
model.load_state_dict(torch.load(model_path))
model.eval()

with torch.no_grad():
    outputs = model(data_test)
    _, predicted = torch.max(outputs, 1)
    _, true_labels = torch.max(labels_test, 1)
    accuracy = (predicted == true_labels).sum().item() / true_labels.size(0)
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.6000
