In [None]:
# !pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.8.0-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting threadpoolctl>=3.2.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.8.0-cp311-cp311-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: C:\Users\sulta\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
import matplotlib.pyplot as plt
import numpy as np
import time

In [5]:
# Load data
data = pd.read_csv('EMG-data/EMG-data.csv')
data.head()

Unnamed: 0,time,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8,class,label
0,1,1e-05,-2e-05,-1e-05,-3e-05,0.0,-1e-05,0.0,-1e-05,0,1
1,5,1e-05,-2e-05,-1e-05,-3e-05,0.0,-1e-05,0.0,-1e-05,0,1
2,6,-1e-05,1e-05,2e-05,0.0,1e-05,-2e-05,-1e-05,1e-05,0,1
3,7,-1e-05,1e-05,2e-05,0.0,1e-05,-2e-05,-1e-05,1e-05,0,1
4,8,-1e-05,1e-05,2e-05,0.0,1e-05,-2e-05,-1e-05,1e-05,0,1


In [6]:
X = data.iloc[:, 1:9]  
y = data['class']  

In [7]:
# Data normalization
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [9]:
# Convert data for use with the model
class EMGDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).unsqueeze(-1)
        self.labels = torch.tensor(labels.values, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [10]:
# Create datasets
train_dataset = EMGDataset(X_train, y_train)
val_dataset = EMGDataset(X_val, y_val)
test_dataset = EMGDataset(X_test, y_test)

In [11]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [12]:
# Define the CNN-LSTM model architecture with adjustable hyperparameters
class CNN_LSTM_Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout_rate=0.5):
        super(CNN_LSTM_Model, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.lstm = nn.LSTM(input_size=128, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 128)
        self.fc2 = nn.Linear(128, output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # [batch_size, sequence_length=1, num_channels] -> [batch_size, num_channels, sequence_length=1]
        x = x.permute(0, 2, 1)
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.permute(0, 2, 1)
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = F.relu(self.fc1(out[:, -1, :]))
        out = self.dropout(out)
        out = self.fc2(out)
        return out

In [13]:
# Hyperparameters
hidden_size = 128
num_layers = 3
dropout_rate = 0.4
learning_rate = 0.0005

In [14]:
# Define the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
# Create the model, define the loss function, and optimizer
model = CNN_LSTM_Model(input_size=1, hidden_size=hidden_size, output_size=len(y.unique()), num_layers=num_layers,
                       dropout_rate=dropout_rate).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
# Logging metrics for visualization
train_losses = []
val_accuracies = []
val_f1_scores = []
training_times = []
testing_times = []

In [17]:
# Train the model with validation data
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20):
    best_val_accuracy = 0.0
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        start_train_time = time.time()

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        end_train_time = time.time()
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)
        training_times.append(end_train_time - start_train_time)
        # Validation
        model.eval()
        val_loss = 0.0
        all_labels = []
        all_preds = []
        start_test_time = time.time()

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())

        end_test_time = time.time()
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = accuracy_score(all_labels, all_preds)
        val_f1 = f1_score(all_labels, all_preds, average='macro')
        val_accuracies.append(val_accuracy)
        val_f1_scores.append(val_f1)
        testing_times.append(end_test_time - start_test_time)

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}, Val F1: {val_f1:.4f}')

        # Save the model with the best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_model.pth')

In [18]:
# Evaluate the model
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    test_accuracy = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='macro')
    test_recall = recall_score(all_labels, all_preds, average='macro')
    test_precision = precision_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {test_accuracy:.4f}, Test F1: {test_f1:.4f}, Test Recall: {test_recall:.4f}, Test Precision: {test_precision:.4f}')

In [19]:
# Train and evaluate the model
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)
evaluate_model(model, test_loader)

Epoch [1/20], Loss: 0.9325, Val Loss: 0.8960, Val Accuracy: 0.6454, Val F1: 0.1412
Epoch [2/20], Loss: 0.8912, Val Loss: 0.8707, Val Accuracy: 0.6488, Val F1: 0.1766
Epoch [3/20], Loss: 0.8558, Val Loss: 0.8246, Val Accuracy: 0.6575, Val F1: 0.3133
Epoch [4/20], Loss: 0.7892, Val Loss: 0.7493, Val Accuracy: 0.6826, Val F1: 0.4106
Epoch [5/20], Loss: 0.7020, Val Loss: 0.6605, Val Accuracy: 0.7211, Val F1: 0.5028
Epoch [6/20], Loss: 0.6195, Val Loss: 0.5949, Val Accuracy: 0.7543, Val F1: 0.5905
Epoch [7/20], Loss: 0.5482, Val Loss: 0.5341, Val Accuracy: 0.7867, Val F1: 0.6618
Epoch [8/20], Loss: 0.4910, Val Loss: 0.4858, Val Accuracy: 0.8133, Val F1: 0.7070
Epoch [9/20], Loss: 0.4450, Val Loss: 0.4406, Val Accuracy: 0.8369, Val F1: 0.7528
Epoch [10/20], Loss: 0.4070, Val Loss: 0.4320, Val Accuracy: 0.8415, Val F1: 0.7484
Epoch [11/20], Loss: 0.3760, Val Loss: 0.3875, Val Accuracy: 0.8642, Val F1: 0.7978
Epoch [12/20], Loss: 0.3502, Val Loss: 0.3640, Val Accuracy: 0.8762, Val F1: 0.8146
E

In [20]:
# Add a sample for comparison with true labels
# Select a subset of the data (0.1% of the test set)
subset_size = int(len(X_test) * 0.1)
subset_indices = np.random.choice(len(X_test), subset_size, replace=False)
X_subset = X_test[subset_indices]
y_subset_true = y_test.iloc[subset_indices]

In [21]:
# Model predictions on the subset
X_subset_tensor = torch.tensor(X_subset, dtype=torch.float32).unsqueeze(-1).to(device)
model.eval()
with torch.no_grad():
    y_subset_pred = model(X_subset_tensor)
    _, predicted_classes = torch.max(y_subset_pred, 1)

In [22]:
# Compare predicted classes with true labels
for i in range(len(X_subset)):
    print(f'True class: {y_subset_true.iloc[i]}, Predicted class: {predicted_classes[i].item()}')

True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 5, Predicted class: 5
True class: 0, Predicted class: 0
True class: 0, Predicted class: 5
True class: 0, Predicted class: 0
True class: 1, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 4, Predicted class: 4
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 3, Predicted class: 3
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 3, Predicted class: 3
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 0, Predicted class: 0
True class: 1,

In [24]:
# Plot training loss
axs[0, 0].plot(train_losses, color=color, label='Train Loss')
axs[0, 0].set_title('Training Loss', color='black', fontweight='bold')
axs[0, 0].set_xlabel('Epoch', color='black')
axs[0, 0].set_ylabel('Loss', color='black')

Text(4.444444444444452, 0.5, 'Loss')

In [25]:
# Plot validation accuracy
axs[0, 1].plot(val_accuracies, color=color, label='Val Accuracy')
axs[0, 1].set_title('Validation Accuracy', color='black', fontweight='bold')
axs[0, 1].set_xlabel('Epoch', color='black')
axs[0, 1].set_ylabel('Accuracy', color='black')

Text(596.2626262626262, 0.5, 'Accuracy')

In [26]:
# Plot training and testing time
axs[1, 0].plot(training_times, color=color, label='Training Time', marker='o')
axs[1, 0].plot(testing_times, color='#F59E0B', label='Testing Time', marker='o')
axs[1, 0].set_title('Training and Testing Time', color='black', fontweight='bold')
axs[1, 0].set_xlabel('Epoch', color='black')
axs[1, 0].set_ylabel('Time (s)', color='black')
axs[1, 0].legend()

<matplotlib.legend.Legend at 0x1c9856b1450>

In [27]:
# Plot F1-Score
axs[1, 1].plot(val_f1_scores, color=color, label='Val F1-Score')
axs[1, 1].set_title('Validation F1-Score', color='black', fontweight='bold')
axs[1, 1].set_xlabel('Epoch', color='black')
axs[1, 1].set_ylabel('F1-Score', color='black')

Text(596.2626262626262, 0.5, 'F1-Score')