In [1]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer, StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

from script.dataset import get_data, cols_emg, cols_emg_cal

In [2]:
X, y = get_data()

In [3]:
# extract center 100 data points from each set & one-hot encode labels

window_size = 100
data = list()
labels = list()

for i, count in X.value_counts('id').items():
    start = int(count / 2 - window_size / 2)
    idx_range = X[X['id'] == i].index[start:start + window_size]
    data.append(np.array(X.iloc[idx_range][cols_emg + cols_emg_cal]))
    labels.append(y[i])
    
data = np.array(data)
data_dim = data.shape
print(f'data shape: {data_dim}')

scaler = StandardScaler()
data = data.reshape(-1, 8)  # reshape to 2D for scaling
data = scaler.fit_transform(data)
data = data.reshape(*data_dim)  # reshape back to original shape

encoder = LabelBinarizer()
labels = encoder.fit_transform(labels)
print(f'labels shape: {labels.shape}, classes: {encoder.classes_}')

data shape: (312, 100, 16)
labels shape: (312, 4), classes: ['crimp_20' 'crimp_45' 'jug' 'sloper_30']


In [4]:
# pytorch device & tensors

# use gpu > apple silicon > cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else (torch.device("mps") if torch.backends.mps.is_available() else "cpu"))

data = torch.tensor(data, dtype=torch.float32, device=device)
# change dimensions to (batch_size, num_features, seq_length)
data = data.transpose(1, 2)

labels = torch.tensor(labels, dtype=torch.float32, device=device)

In [5]:
class CNN(nn.Module):
    def __init__(self, seq_length, kernel_size = 5):
        super(CNN, self).__init__()

        conv_diff = 2 * (kernel_size - 1)
        lin_in = int(((seq_length - conv_diff) / 2 - conv_diff) / 2 * 64)
        self.stack = nn.Sequential(
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=kernel_size),
            nn.ReLU(),
            nn.Conv1d(in_channels=32, out_channels=32, kernel_size=kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=kernel_size),
            nn.ReLU(),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=kernel_size),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),

            nn.Flatten(),
            nn.Linear(lin_in, 4),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        i = torch.nn.Identity()
        c = i(x)
        c = self.stack(c)
        return c

In [6]:
# Instantiate the model, define the loss function and the optimizer
model = CNN(window_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
from torch.utils.data import DataLoader, TensorDataset

# Create a DataLoader
dataset = TensorDataset(data, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Training loop
num_epochs = 20
best_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(dataloader.dataset)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')
    
    # Early stopping
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save(model.state_dict(), 'best_model.pth')

print('Training complete')

Epoch 1/20, Loss: 1.2872
Epoch 2/20, Loss: 1.2181
Epoch 3/20, Loss: 1.1717
Epoch 4/20, Loss: 1.1553
Epoch 5/20, Loss: 1.0508
Epoch 6/20, Loss: 1.0631
Epoch 7/20, Loss: 1.0070
Epoch 8/20, Loss: 1.0292
Epoch 9/20, Loss: 1.0099
Epoch 10/20, Loss: 1.0030
Epoch 11/20, Loss: 0.9679
Epoch 12/20, Loss: 0.9575
Epoch 13/20, Loss: 0.9857
Epoch 14/20, Loss: 0.9465
Epoch 15/20, Loss: 0.9283
Epoch 16/20, Loss: 0.9486
Epoch 17/20, Loss: 0.9922
Epoch 18/20, Loss: 0.9593
Epoch 19/20, Loss: 0.9370
Epoch 20/20, Loss: 0.9534
Training complete


In [8]:
# Load the best model
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    outputs = model(data)
    _, predicted = torch.max(outputs, 1)
    _, true_labels = torch.max(labels, 1)
    accuracy = (predicted == true_labels).sum().item() / labels.size(0)
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.7724
