In [1]:
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch

In [2]:
DATA_DIR = '../data/data_normalized'
data_set = os.listdir(DATA_DIR)

In [3]:
data_set[1]

'CA1_17419014_resampled.csv'

In [4]:
test_file = os.path.join(DATA_DIR, data_set[10])
test = pd.read_csv(test_file)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [6]:
device

device(type='cuda')

# Dataset Class

In [7]:
class IeegDataset(Dataset):
    def __init__(self, data_dir, seq_length=3035):
        self.data_dir = data_dir
        self.signals = os.listdir(self.data_dir)
        self.seq_length = seq_length
        self.data = []
        self.labels = []
        
        self.classes = [f.split('_')[0] for f in self.signals]

        self.label_encoder = LabelEncoder()
        self.label_encoder.fit(self.classes)

        for file in self.signals:
            file_path = os.path.join(data_dir, file)
            df = pd.read_csv(file_path)

            for column in df.columns: 
                signal_data = df[column].values[:self.seq_length]
                if len(signal_data) < self.seq_length:
                    # Padding if necesary 
                    signal_data = np.pad(signal_data, (0, self.seq_length - len(signal_data)), 'constant')
    
                class_label = self.label_encoder.transform([file.split('_')[0]])
                self.data.append(signal_data)
                self.labels.append(class_label)
        
        self.data = torch.tensor(self.data, dtype=torch.float32)
        self.labels = torch.tensor(self.labels, dtype=torch.long)

    def __len__(self):
        return(len(self.data))
    def __getitem__(self, index) -> torch.tensor:
        return self.data[index] , self.labels[index]
    def get_class_mapping(self):
        return {i: class_name for i, class_name in enumerate(self.label_encoder.classes_)}

In [8]:
dataset = IeegDataset('../data/data_normalized')

  self.data = torch.tensor(self.data, dtype=torch.float32)


In [9]:
dataset.__len__()

2625

In [10]:
# Create data loaders
batch_size = 128

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
import torch.nn as nn
import torch.optim as optim

class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define model, loss function, and optimizer
input_size = 3035  # sequence length
num_classes = len(dataset.label_encoder.classes_)
model = SimpleNN(input_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
print(

In [34]:
import mlflow
import mlflow.pytorch
mlflow.set_tracking_uri("http://mlflow:5000")

print('tracking uri:', mlflow.get_tracking_uri())


tracking uri: http://mlflow:5000


In [46]:


# Define the experiment name
experiment_name = "IEEG_Classification_Baseline"

# Create a new experiment or set the existing one
mlflow.set_experiment(experiment_name)
# Training and evaluation function
def train_and_evaluate(model, train_loader, test_loader, num_epochs=200):
    # Start a new run
    mlflow.pytorch.autolog()
    with mlflow.start_run(run_name="ANN-Baseline") as run:
        # Log parameters
        # mlflow.log_param("epochs", num_epochs)
        # mlflow.log_param("batch_size", batch_size)
        # mlflow.log_param("learning_rate", 0.001)

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)  # Move to GPU

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels.squeeze())  # Squeeze to remove extra dimension
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            
            avg_loss = running_loss / len(train_loader)
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
            
            # Log loss
            # mlflow.log_metric("loss", avg_loss, step=epoch)

        # Testing loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)  # Move to GPU
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.squeeze()).sum().item()  # Squeeze to remove extra dimension

        accuracy = 100 * correct / total
        print(f'Accuracy of the model on the test data: {accuracy:.2f}%')
        
        # # Log accuracy
        # mlflow.log_metric("accuracy", accuracy)
        
        # # Log the model
        # mlflow.pytorch.log_model(model, "model")



In [47]:
# Train and evaluate the model while tracking with MLflow
train_and_evaluate(model, train_loader, test_loader, num_epochs=200)



Epoch [1/200], Loss: 0.0196
Epoch [2/200], Loss: 0.0295
Epoch [3/200], Loss: 0.0153
Epoch [4/200], Loss: 0.0235
Epoch [5/200], Loss: 0.0203
Epoch [6/200], Loss: 0.0194
Epoch [7/200], Loss: 0.0231
Epoch [8/200], Loss: 0.0206
Epoch [9/200], Loss: 0.0196
Epoch [10/200], Loss: 0.0254
Epoch [11/200], Loss: 0.0155
Epoch [12/200], Loss: 0.0108
Epoch [13/200], Loss: 0.0180
Epoch [14/200], Loss: 0.0113
Epoch [15/200], Loss: 0.0203
Epoch [16/200], Loss: 0.0172
Epoch [17/200], Loss: 0.0236
Epoch [18/200], Loss: 0.0218
Epoch [19/200], Loss: 0.0175
Epoch [20/200], Loss: 0.0141
Epoch [21/200], Loss: 0.0190
Epoch [22/200], Loss: 0.0367
Epoch [23/200], Loss: 0.0273
Epoch [24/200], Loss: 0.0311
Epoch [25/200], Loss: 0.0208
Epoch [26/200], Loss: 0.0290
Epoch [27/200], Loss: 0.0364
Epoch [28/200], Loss: 0.0316
Epoch [29/200], Loss: 0.0218
Epoch [30/200], Loss: 0.0306
Epoch [31/200], Loss: 0.0249
Epoch [32/200], Loss: 0.0210
Epoch [33/200], Loss: 0.0220
Epoch [34/200], Loss: 0.0247
Epoch [35/200], Loss: 0

In [42]:
mlflow.end_run()

In [54]:
import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Define the experiment name
experiment_name = "IEEG_Classification_Baseline"

# Create a new experiment or set the existing one
mlflow.set_experiment(experiment_name)

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the model, loss function, and optimizer
input_size = 3035  # sequence length
num_classes = len(dataset.label_encoder.classes_)
model = SimpleNN(input_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and evaluation function
def train_and_evaluate(model, train_loader, test_loader, num_epochs=200):
    
    with mlflow.start_run(run_name="ANN-Baseline") as run:
        # Log parameters
        mlflow.log_param("epochs", num_epochs)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("learning_rate", 0.001)
        mlflow.log_param("model", "SimpleNN")
        mlflow.log_param("input_size", input_size)
        mlflow.log_param("num_classes", num_classes)
        mlflow.log_dict(dataset.get_class_mapping(), "class_mapping.json")

        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            y_true_train = []
            y_pred_train = []
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels.squeeze())
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                y_true_train.extend(labels.squeeze().cpu().numpy())
                y_pred_train.extend(predicted.cpu().numpy())
            
            avg_loss = running_loss / len(train_loader)
            train_accuracy = accuracy_score(y_true_train, y_pred_train)
            precision, recall, f1, _ = precision_recall_fscore_support(y_true_train, y_pred_train, average='weighted')
            
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {train_accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
            
            mlflow.log_metric("train_loss", avg_loss, step=epoch)
            mlflow.log_metric("train_accuracy", train_accuracy, step=epoch)
            mlflow.log_metric("train_precision", precision, step=epoch)
            mlflow.log_metric("train_recall", recall, step=epoch)
            mlflow.log_metric("train_f1", f1, step=epoch)

        model.eval()
        y_true_test = []
        y_pred_test = []
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                y_true_test.extend(labels.squeeze().cpu().numpy())
                y_pred_test.extend(predicted.cpu().numpy())
                
        test_accuracy = accuracy_score(y_true_test, y_pred_test)
        precision, recall, f1, _ = precision_recall_fscore_support(y_true_test, y_pred_test, average='weighted')
        
        print(f'Accuracy of the model on the test data: {test_accuracy:.2f}%')
        print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
        
        mlflow.log_metric("test_accuracy", test_accuracy)
        mlflow.log_metric("test_precision", precision)
        mlflow.log_metric("test_recall", recall)
        mlflow.log_metric("test_f1", f1)
        
        # Confusion matrix
        cm = confusion_matrix(y_true_test, y_pred_test)
        cm_df = pd.DataFrame(cm, index=dataset.label_encoder.classes_, columns=dataset.label_encoder.classes_)
        
        plt.figure(figsize=(10, 7))
        sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.title('Confusion Matrix')
        plt.savefig("confusion_matrix.png")
        mlflow.log_artifact("confusion_matrix.png")
        plt.close()
        
        # Log the model
        mlflow.pytorch.log_model(model, "model")

# Train and evaluate the model while tracking with MLflow
train_and_evaluate(model, train_loader, test_loader, num_epochs=200)


Epoch [1/200], Loss: 1.1699, Accuracy: 0.5286, Precision: 0.5349, Recall: 0.5286, F1 Score: 0.4853
Epoch [2/200], Loss: 0.7733, Accuracy: 0.7110, Precision: 0.7181, Recall: 0.7110, F1 Score: 0.6799
Epoch [3/200], Loss: 0.5675, Accuracy: 0.8057, Precision: 0.8043, Recall: 0.8057, F1 Score: 0.7986
Epoch [4/200], Loss: 0.4368, Accuracy: 0.8538, Precision: 0.8523, Recall: 0.8538, F1 Score: 0.8519
Epoch [5/200], Loss: 0.3408, Accuracy: 0.8824, Precision: 0.8819, Recall: 0.8824, F1 Score: 0.8813
Epoch [6/200], Loss: 0.3002, Accuracy: 0.9038, Precision: 0.9033, Recall: 0.9038, F1 Score: 0.9031
Epoch [7/200], Loss: 0.2571, Accuracy: 0.9100, Precision: 0.9095, Recall: 0.9100, F1 Score: 0.9094
Epoch [8/200], Loss: 0.2026, Accuracy: 0.9295, Precision: 0.9291, Recall: 0.9295, F1 Score: 0.9292
Epoch [9/200], Loss: 0.1869, Accuracy: 0.9424, Precision: 0.9423, Recall: 0.9424, F1 Score: 0.9423
Epoch [10/200], Loss: 0.1748, Accuracy: 0.9433, Precision: 0.9431, Recall: 0.9433, F1 Score: 0.9429
Epoch [11

