In [1]:
import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader, Subset
from tqdm.notebook import tqdm
import os
from sklearn.preprocessing import LabelEncoder
import torch
import math 
import sys

# Add the src directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
from ieeg_dataset import IeegDataset


In [2]:
os.environ['AWS_ACCESS_KEY_ID'] = 'dIgexhE2iDrGls2qargL'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'IzEzgQpztotDnrIInJdUfUIYngpjJoT18d0FDZf7'
os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://localhost:9000'
os.environ['MLFLOW_S3_IGNORE_TLS'] = 'true'
mlflow.set_tracking_uri("http://localhost:5000")

print('tracking uri:', mlflow.get_tracking_uri())

tracking uri: http://localhost:5000


# Transformer

In [3]:
# Configuration
DATA_DIR = '../data/data_normalized_exp2'
SEQ_LENGTH = 500
BATCH_SIZE = 64
NUM_EPOCHS = 50
LEARNING_RATE = 0.0001
MODEL_DIM = 256
NUM_HEADS = 4
NUM_LAYERS = 4
EXPERIMENT_NAME = "IEEG_Classification_Architecture Search"
RUN_NAME = "Transformer"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INPUT_SIZE = SEQ_LENGTH

In [4]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, seq_length=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(seq_length, d_model)
        position = torch.arange(0, seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, num_classes, num_heads, num_layers, seq_length, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, model_dim)
        self.positional_encoding = PositionalEncoding(model_dim, seq_length=seq_length)
        encoder_layers = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc1 = nn.Linear(model_dim, model_dim // 2)
        self.fc2 = nn.Linear(model_dim // 2, model_dim // 4)
        self.fc3 = nn.Linear(model_dim // 4, num_classes)

    def forward(self, x):
        x = x.unsqueeze(-1)  # Ensure x has shape [batch_size, seq_length, 1]
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)  # Average pooling over sequence length
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [5]:
# Data Loaders and Partitioning
def create_data_loaders(dataset, batch_size):
    labels = np.array([dataset[i][1].item() for i in range(len(dataset))])
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.6, random_state=42)
    train_index, test_index = next(sss.split(np.zeros(len(labels)), labels))

    train_dataset = Subset(dataset, train_index)
    test_dataset = Subset(dataset, test_index)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [6]:
# Plotting Functions
def plot_pie_chart(counts, class_names, title):
    fig, ax = plt.subplots()
    ax.pie(counts, labels=class_names, autopct='%1.1f%%', startangle=90, counterclock=False)
    ax.axis('equal')
    plt.title(title)
    return fig

In [7]:
# Training function
def train_model(model, train_loader, optimizer, criterion, num_epochs, device):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        y_true_train = []
        y_pred_train = []
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)  # Move to device

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.squeeze())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Clip gradients
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            y_true_train.extend(labels.squeeze().cpu().numpy())
            y_pred_train.extend(predicted.cpu().numpy())

            avg_loss = running_loss / len(train_loader)
            train_accuracy = accuracy_score(y_true_train, y_pred_train)
            precision, recall, f1, _ = precision_recall_fscore_support(y_true_train, y_pred_train, average='weighted', zero_division=0)
            
            progress_bar.set_postfix({
                'loss': f'{avg_loss:.4f}',
                'accuracy': f'{train_accuracy:.4f}',
                'precision': f'{precision:.4f}',
                'recall': f'{recall:.4f}',
                'f1': f'{f1:.4f}'
            })

        mlflow.log_metric("train_loss", avg_loss, step=epoch)
        mlflow.log_metric("train_accuracy", train_accuracy, step=epoch)
        mlflow.log_metric("train_precision", precision, step=epoch)
        mlflow.log_metric("train_recall", recall, step=epoch)
        mlflow.log_metric("train_f1", f1, step=epoch)

In [8]:
# Evaluation function
def evaluate_model(model, test_loader, dataset, device):
    model.eval()
    y_true_test = []
    y_pred_test = []
    progress_bar = tqdm(test_loader, desc="Evaluating", unit="batch")
    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)  # Move to device
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            y_true_test.extend(labels.squeeze().cpu().numpy())
            y_pred_test.extend(predicted.cpu().numpy())

    test_accuracy = accuracy_score(y_true_test, y_pred_test)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true_test, y_pred_test, average='weighted', zero_division=0)

    print(f'Accuracy of the model on the test data: {test_accuracy:.2f}%')
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

    mlflow.log_metric("test_accuracy", test_accuracy)
    mlflow.log_metric("test_precision", precision)
    mlflow.log_metric("test_recall", recall)
    mlflow.log_metric("test_f1", f1)


In [9]:
dataset = IeegDataset(DATA_DIR, SEQ_LENGTH)




  self.data = torch.tensor(self.data, dtype=torch.float32)


In [10]:
train_loader, test_loader = create_data_loaders(dataset, BATCH_SIZE)
NUM_CLASSES = len(dataset.label_encoder.classes_)
model = TransformerModel(input_dim=1, model_dim=MODEL_DIM, num_classes=NUM_CLASSES, num_heads=NUM_HEADS, num_layers=NUM_LAYERS, seq_length=SEQ_LENGTH).to(DEVICE)
model

TransformerModel(
  (embedding): Linear(in_features=1, out_features=256, bias=True)
  (positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_feat

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [12]:
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='mlflow-artifacts:/4', creation_time=1716522047198, experiment_id='4', last_update_time=1716522047198, lifecycle_stage='active', name='IEEG_Classification_Architecture Search', tags={}>

In [13]:
with mlflow.start_run(run_name="TransformerModel_Experiment") as run:
    # Log parameters
    mlflow.log_param("epochs", NUM_EPOCHS)
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("learning_rate", LEARNING_RATE)
    mlflow.log_param("model", "TransformerModel")
    mlflow.log_param("input_size", SEQ_LENGTH)
    mlflow.log_param("num_classes", NUM_CLASSES)
    mlflow.log_dict(dataset.get_class_mapping(), "class_mapping.json")

    # Train and Evaluate the Model
    train_model(model, train_loader, optimizer, criterion, NUM_EPOCHS, DEVICE)
    evaluate_model(model, test_loader, dataset, DEVICE)

    # Log the model
    mlflow.pytorch.log_model(model, "model")

Epoch 1/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 2/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 3/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 4/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 5/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 6/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 7/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 8/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 9/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 10/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 11/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 12/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 13/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 14/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 15/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 16/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 17/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 18/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 19/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 20/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 21/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 22/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 23/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 24/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 25/50:   0%|          | 0/820 [00:00<?, ?batch/s]

Epoch 26/50:   0%|          | 0/820 [00:00<?, ?batch/s]

KeyboardInterrupt: 

: 