In [1]:
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import random
import numpy as np
import torch

train_df = pd.read_pickle("train_df.pkl")
val_df = pd.read_pickle("val_df.pkl")


In [2]:
def set_seed(seed=42):
    random.seed(seed)                           # Python random
    np.random.seed(seed)                        # NumPy
    torch.manual_seed(seed)                     # PyTorch CPU
    torch.cuda.manual_seed(seed)                # PyTorch GPU
    torch.backends.cudnn.deterministic = True   
    torch.backends.cudnn.benchmark = False      

set_seed(42)
print("Seed set for reproducibility.")


Seed set for reproducibility.


# Model-1


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# STDFT function to compute the Short-Time Fourier Transform (STFT) for a batch of signals
# It convert into time -frequency representation
# n_fft: size of the FFT window(300 hz is the sampling rate, so 256 is a good choice to capture more than 1 heart beat)
# hop_length: number of samples between successive frames (128 is a good choice since every window will overlap by 50%)
def compute_stft_batch(x, n_fft=256, hop_length=128):
    stft = torch.stft(
        x, n_fft=n_fft, hop_length=hop_length,
        return_complex=True
    )
    return torch.abs(stft)  

# 2. Model
class ECGModel(nn.Module):
    def __init__(self,hidden_size=128, dropout_rate=0.0):
        super(ECGModel, self).__init__()
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate

        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        )

        # RNN
        self.rnn = nn.GRU(
            input_size=32 * 32, # out_channels * frequency_bins // 2 // 2 (due to max pooling) ()
            hidden_size= hidden_size,
            batch_first=True
        )

        # Fully connected
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 4)  # 4 classes: Normal, AF, Other, Noisy

    def forward(self, x:torch.Tensor)-> torch.Tensor:
        # x: (batch_size, signal_length)
        #print("Input:", x.shape)
        x = compute_stft_batch(x)  # STFT → (batch, freq, time)
        #print("After STFT:", x.shape)
        x = torch.log1p(x)  # logarithmic scaling
        #print("After log1p:", x.shape)
        

        x = x.unsqueeze(1)  # CNN input shape: (batch, channel, freq, time)
        #print("After Unsqueeze:", x.shape)

        x = self.conv1(x)
        x = self.conv2(x)
        #print("After conv2:", x.shape)

        # Flatten the output for RNN input
        b, c, f, t = x.shape  # batch, channel, freq, time
        x = x.view(b, c * f, t)  # (batch, features, time)
        #print("After view:", x.shape)
        x = x.permute(0, 2, 1)   # (batch, time, features)
        #print("After permute:", x.shape)

        # RNN
        output, h_n = self.rnn(x)
        #print("After RNN output:", output.shape)
        x = self.dropout(h_n[-1])
        x = self.fc(x)  # use the last hidden state for classification
        #print("Final output:", x.shape)
        return x


## Signal Padding and Trimming
In our dataset the signal lengths are not fixed, for example:

Min: ~2700

Max: ~18286

But a very large part: 9000

Therefore we have to process this length difference when giving the signal to your model. Otherwise, Tensor sizes do not match and we cannot process in batch on GPU

1. We will padding very short signals with 0 → complete to 9000

2. We will cut very long signals and reduce to 9000

In [4]:
def pad_or_trim(signal, target_length=9000):
    current_length = len(signal)

    if current_length < target_length:
        # Pad with zeros at the end
        padding = target_length - current_length
        signal = np.pad(signal, (0, padding), 'constant')
    elif current_length > target_length:
        # Trim from center
        start = (current_length - target_length) // 2
        signal = signal[start : start + target_length]

    return signal


In [5]:
from torch.utils.data import Dataset
import torch


class ECGDataset(Dataset):
    def __init__(self, df, target_length=9000):
        self.df = df
        self.target_length = target_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Signal processing
        # Pad or trim the signal to the target length
        signal = pad_or_trim(row['signal'], self.target_length)
        signal = torch.tensor(signal, dtype=torch.float32)
        label = int(row['label']) 

        return signal, label


## Data Processing
We use PyTorch's DataLoader to handle the batching and shuffling of ECG data.
This allows efficient training by automatically grouping samples into mini-batches,
converting signals and labels into tensors, and optionally shuffling the training data each epoch.


In [6]:
from torch.utils.data import DataLoader

train_dataset = ECGDataset(train_df)
val_dataset = ECGDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,generator=torch.Generator().manual_seed(42))
val_loader = DataLoader(val_dataset, batch_size=32)


## Base Model Parameter Choice
This is the baseline configuration. We use ECGModel() with default parameters (e.g., hidden size, dropout), and Adam optimizer with a learning rate of 0.001. These choices are standard and can be further tuned later to improve model performance. Also for the error function,we use CrossEntropyLoss because:

1. We are doing a multi-class (4-class) task and it is a classic multi-class classification problem.

2. It can calculate the loss by taking the outputs (logits) of the model directly. The higher the probability that the model gives to the correct class, the lower the loss.

3. We corrected the class imbalance with class_weights in later stage


In [7]:
import torch.optim as optim
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ECGModel().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
import torch.utils
import torch.utils.data


def train_one_epoch(model:nn.Module, 
                    dataloader:torch.utils.data.DataLoader, 
                    optimizer:torch.optim.Optimizer, 
                    loss_fn:nn.Module):
    """
    Trains the model for one full epoch.

    Args:
        model: The neural network model (ECGModel).
        dataloader: The training DataLoader providing batches.
        optimizer: The optimizer (e.g., Adam).
        loss_fn: The loss function (e.g., CrossEntropyLoss).

    Returns:
        A tuple of (average_loss, accuracy) for the epoch.
    """
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for signals, labels in dataloader:
        signals = signals.to(device)
        labels = labels.to(device)

        outputs = model(signals)
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * signals.size(0)

        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy


In [9]:
def evaluate(model:nn.Module, dataloader:torch.utils.data.DataLoader, loss_fn:nn.Module):
    """
    Evaluates the model on validation data.

    Args:
        model: Trained model to evaluate.
        dataloader: DataLoader for validation data.
        loss_fn: Same loss function used during training.

    Returns:
        A tuple of (average_loss, accuracy) for validation set.
    """
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for signals, labels in dataloader:
            signals = signals.to(device)
            labels = labels.to(device)

            outputs = model(signals)
            loss = loss_fn(outputs, labels)

            total_loss += loss.item() * signals.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy


In [10]:
from sklearn.metrics import f1_score, confusion_matrix, classification_report
import numpy as np

def evaluate_with_metrics(model:nn.Module, dataloader, loss_fn, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0

    with torch.no_grad():
        for signals, labels in dataloader:
            signals = signals.to(device)
            labels = labels.to(device)

            outputs = model(signals)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    acc = 100 * np.mean(np.array(all_preds) == np.array(all_labels))

    f1 = f1_score(all_labels, all_preds, average='macro')
    cm = confusion_matrix(all_labels, all_preds, labels=[0, 1, 2, 3])
    report = classification_report(
    all_labels, all_preds,
    labels=[0, 1, 2, 3],
    target_names=['Normal', 'AF', 'Other', 'Noisy'],
    zero_division=0  # uyarı vermesin
)

    return avg_loss, acc, f1, cm, report


## First Base Model Metrics

In [11]:
num_epochs = 5

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, loss_fn)
    val_loss, val_acc = evaluate(model, val_loader, loss_fn)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}")
    print("-" * 40)


Epoch 1/5
Train Loss: 0.9944 | Train Acc: 0.5889
Val   Loss: 1.0063 | Val   Acc: 0.5782
----------------------------------------
Epoch 2/5
Train Loss: 0.9892 | Train Acc: 0.5874
Val   Loss: 1.0086 | Val   Acc: 0.5868
----------------------------------------
Epoch 3/5
Train Loss: 0.9754 | Train Acc: 0.5901
Val   Loss: 0.9281 | Val   Acc: 0.6052
----------------------------------------
Epoch 4/5
Train Loss: 0.9541 | Train Acc: 0.5971
Val   Loss: 0.9905 | Val   Acc: 0.5868
----------------------------------------
Epoch 5/5
Train Loss: 0.9062 | Train Acc: 0.6116
Val   Loss: 0.8805 | Val   Acc: 0.6246
----------------------------------------


In [12]:
val_loss, val_acc, val_f1, val_cm, val_report = evaluate_with_metrics(model, val_loader, loss_fn, device)

print(f"Validation F1 Score: {val_f1:.4f}")
print("Confusion Matrix:\n", val_cm)
print("Classification Report:\n", val_report)


Validation F1 Score: 0.3092
Confusion Matrix:
 [[520   0  24   0]
 [ 57   0  22   5]
 [207   0  55   2]
 [ 13   0  18   4]]
Classification Report:
               precision    recall  f1-score   support

      Normal       0.65      0.96      0.78       544
          AF       0.00      0.00      0.00        84
       Other       0.46      0.21      0.29       264
       Noisy       0.36      0.11      0.17        35

    accuracy                           0.62       927
   macro avg       0.37      0.32      0.31       927
weighted avg       0.53      0.62      0.54       927



## Class Weight loss
Form the previous observation, the model tries to maximize overall accuracy by predicting only the largest class. We wanted standardize the class weight to prevent the model from excessive punishment small classes.

original_weights.max() → normalizes to the heaviest class

0.5 + (w * 0.5) → all classes get a minimum weight of 0.5, which prevents over-penalization

So the model does not over-weight classes with few samples like "Noisy"## Class Weight loss

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch

# Compute class weights based on the training labels
original_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1, 2, 3]),
    y=train_df['label'].values
)

# Normalize and scale the weights
scaled_weights = original_weights / original_weights.max()  # normalize to max=1
scaled_weights = 0.5 + (scaled_weights * 0.5)  # shrink range to [0.5, 1.0] for balance

weights_tensor = torch.tensor(scaled_weights, dtype=torch.float32).to(device)

# Weighted loss function
loss_fn_weighted = nn.CrossEntropyLoss(weight=weights_tensor)

print("Original weights: ", original_weights)
print("Scaled weights: ", scaled_weights)


Original weights:  [0.42436975 2.82365591 0.87475017 6.83854167]
Scaled weights:  [0.5310278  0.70645161 0.56395736 1.        ]


## Class Weighted Base Model Metrics

In [51]:
num_epochs = 10

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, loss_fn_weighted)
    val_loss, val_acc = evaluate(model, val_loader, loss_fn_weighted)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}")
    print("-" * 40)


Epoch 1/10
Train Loss: 0.9060 | Train Acc: 0.6409
Val   Loss: 0.8320 | Val   Acc: 0.6375
----------------------------------------
Epoch 2/10
Train Loss: 0.8490 | Train Acc: 0.6636
Val   Loss: 0.8250 | Val   Acc: 0.6494
----------------------------------------
Epoch 3/10
Train Loss: 0.8202 | Train Acc: 0.6712
Val   Loss: 0.8126 | Val   Acc: 0.6710
----------------------------------------
Epoch 4/10
Train Loss: 0.7914 | Train Acc: 0.6883
Val   Loss: 0.7730 | Val   Acc: 0.6915
----------------------------------------
Epoch 5/10
Train Loss: 0.7638 | Train Acc: 0.7013
Val   Loss: 0.8404 | Val   Acc: 0.6516
----------------------------------------
Epoch 6/10
Train Loss: 0.7207 | Train Acc: 0.7211
Val   Loss: 0.7940 | Val   Acc: 0.6602
----------------------------------------
Epoch 7/10
Train Loss: 0.6961 | Train Acc: 0.7289
Val   Loss: 0.7463 | Val   Acc: 0.6990
----------------------------------------
Epoch 8/10
Train Loss: 0.6410 | Train Acc: 0.7511
Val   Loss: 0.7427 | Val   Acc: 0.7012
-

In [52]:
val_loss, val_acc, val_f1, val_cm, val_report = evaluate_with_metrics(model, val_loader, loss_fn_weighted, device)

print(f"Validation F1 Score: {val_f1:.4f}")
print("Confusion Matrix:\n", val_cm)
print("Classification Report:\n", val_report)


Validation F1 Score: 0.5583
Confusion Matrix:
 [[461   4  71   8]
 [ 18  25  36   5]
 [105  10 142   7]
 [  7   0  11  17]]
Classification Report:
               precision    recall  f1-score   support

      Normal       0.78      0.85      0.81       544
          AF       0.64      0.30      0.41        84
       Other       0.55      0.54      0.54       264
       Noisy       0.46      0.49      0.47        35

    accuracy                           0.70       927
   macro avg       0.61      0.54      0.56       927
weighted avg       0.69      0.70      0.69       927



## Hyper Parameter Tuning
In this section, we aim to improve the performance of our baseline ECG classification model by tuning key hyperparameters. These include:

- Learning Rate (lr): Controls how much the model weights are updated during training.

- Hidden Size (hidden_size): Determines the capacity of the recurrent layer (GRU), affecting the model’s ability to capture temporal patterns.

- Dropout Rate (dropout): Helps prevent overfitting by randomly zeroing some neuron activations during training.

- Optimizer Type: Different optimizers like Adam or SGD may converge differently depending on the dataset.


In [53]:
def run_experiments(param_grid, train_df, val_df, num_epochs=10):
    results = []

    for lr in param_grid['lr']:
        for hidden_size in param_grid['hidden_size']:
            for dropout in param_grid['dropout']:
                for opt in param_grid['optimizer']:
                    print(f"\n Training with lr={lr}, hidden_size={hidden_size}, dropout={dropout}, optimizer={opt}")

                    # Dataset & DataLoader
                    train_dataset = ECGDataset(train_df)
                    val_dataset = ECGDataset(val_df)
                    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,generator=torch.Generator().manual_seed(42))
                    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

                    # Model
                    model = ECGModel(hidden_size=hidden_size, dropout_rate=dropout).to(device)

                    # Optimizer
                    if opt == 'adam':
                        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                    elif opt == 'sgd':
                        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
                    else:
                        raise ValueError(f"Unsupported optimizer: {opt}")

                    # Loss
                    loss_fn = nn.CrossEntropyLoss(weight=weights_tensor.to(device))

                    # Train
                    for epoch in range(num_epochs):
                        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, loss_fn)
                        val_loss, val_acc = evaluate(model, val_loader, loss_fn)
                        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
                        print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}")

                    # Eval
                    _, _, val_f1, _, _ = evaluate_with_metrics(model, val_loader, loss_fn, device)

                    results.append({
                        'lr': lr,
                        'hidden_size': hidden_size,
                        'dropout': dropout,
                        'optimizer': opt,
                        'val_f1': val_f1
                    })

    return pd.DataFrame(results)


In [54]:
param_grid = {
    'lr': [0.001, 0.0005],
    'hidden_size': [128, 256],
    'dropout': [0.2, 0.3],
    'optimizer': ['adam', 'sgd']
}


In [55]:
torch.use_deterministic_algorithms(True)

In [None]:
# It takes approximately 30-40 minutes to run all combinations :)
experiment_results = run_experiments(param_grid, train_df, val_df, num_epochs=6)



 Training with lr=0.001, hidden_size=128, dropout=0.2, optimizer=adam
Train Loss: 1.0921 | Train Acc: 0.5727
Val   Loss: 1.0089 | Val   Acc: 0.5707
Train Loss: 1.0553 | Train Acc: 0.5933
Val   Loss: 0.9607 | Val   Acc: 0.5987
Train Loss: 0.9784 | Train Acc: 0.6114
Val   Loss: 0.8814 | Val   Acc: 0.6203
Train Loss: 0.9233 | Train Acc: 0.6339
Val   Loss: 0.9268 | Val   Acc: 0.6127
Train Loss: 0.8638 | Train Acc: 0.6662
Val   Loss: 0.8754 | Val   Acc: 0.6235
Train Loss: 0.8444 | Train Acc: 0.6645
Val   Loss: 0.7807 | Val   Acc: 0.6742

 Training with lr=0.001, hidden_size=128, dropout=0.2, optimizer=sgd
Train Loss: 1.1006 | Train Acc: 0.5689
Val   Loss: 1.0144 | Val   Acc: 0.5868
Train Loss: 1.0778 | Train Acc: 0.5832
Val   Loss: 1.0004 | Val   Acc: 0.5868
Train Loss: 1.0721 | Train Acc: 0.5836
Val   Loss: 0.9951 | Val   Acc: 0.5868
Train Loss: 1.0676 | Train Acc: 0.5863
Val   Loss: 0.9960 | Val   Acc: 0.5868
Train Loss: 1.0627 | Train Acc: 0.5861
Val   Loss: 0.9880 | Val   Acc: 0.5868
T

In [65]:
top_list = experiment_results.sort_values(by="val_f1", ascending=False)
top_list

Unnamed: 0,lr,hidden_size,dropout,optimizer,val_f1
14,0.0005,256,0.3,adam,0.526329
12,0.0005,256,0.2,adam,0.49558
0,0.001,128,0.2,adam,0.49443
6,0.001,256,0.3,adam,0.426548
2,0.001,128,0.3,adam,0.395061
8,0.0005,128,0.2,adam,0.370311
10,0.0005,128,0.3,adam,0.323067
7,0.001,256,0.3,sgd,0.270581
5,0.001,256,0.2,sgd,0.219435
15,0.0005,256,0.3,sgd,0.202186


In [62]:
base_model = ECGModel(hidden_size=256, dropout_rate=0.3).to(device)
base_optimizer = torch.optim.Adam(base_model.parameters(), lr=0.0005)
base_loss_fn = nn.CrossEntropyLoss(weight=weights_tensor.to(device))
train_dataset = ECGDataset(train_df)
val_dataset = ECGDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,generator=torch.Generator().manual_seed(42))
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [63]:
num_epochs = 20

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(base_model, train_loader, base_optimizer, base_loss_fn)
    val_loss, val_acc = evaluate(base_model, val_loader, base_loss_fn)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}")
    print("-" * 40)

Epoch 1/20
Train Loss: 1.0899 | Train Acc: 0.5746
Val   Loss: 1.0089 | Val   Acc: 0.5674
----------------------------------------
Epoch 2/20
Train Loss: 1.0294 | Train Acc: 0.6043
Val   Loss: 0.9663 | Val   Acc: 0.5965
----------------------------------------
Epoch 3/20
Train Loss: 0.9697 | Train Acc: 0.6200
Val   Loss: 0.8879 | Val   Acc: 0.6224
----------------------------------------
Epoch 4/20
Train Loss: 0.9270 | Train Acc: 0.6379
Val   Loss: 0.8835 | Val   Acc: 0.6311
----------------------------------------
Epoch 5/20
Train Loss: 0.8864 | Train Acc: 0.6571
Val   Loss: 0.8628 | Val   Acc: 0.6300
----------------------------------------
Epoch 6/20
Train Loss: 0.8864 | Train Acc: 0.6487
Val   Loss: 0.8325 | Val   Acc: 0.6289
----------------------------------------
Epoch 7/20
Train Loss: 0.8531 | Train Acc: 0.6643
Val   Loss: 0.8062 | Val   Acc: 0.6602
----------------------------------------
Epoch 8/20
Train Loss: 0.8335 | Train Acc: 0.6666
Val   Loss: 0.8234 | Val   Acc: 0.6526
-

In [64]:
val_loss, val_acc, val_f1, val_cm, val_report = evaluate_with_metrics(base_model, val_loader, base_loss_fn, device)

print(f"Validation F1 Score: {val_f1:.4f}")
print("Confusion Matrix:\n", val_cm)
print("Classification Report:\n", val_report)


Validation F1 Score: 0.6198
Confusion Matrix:
 [[437  14  87   6]
 [ 14  44  21   5]
 [ 88  38 129   9]
 [  3   0   4  28]]
Classification Report:
               precision    recall  f1-score   support

      Normal       0.81      0.80      0.80       544
          AF       0.46      0.52      0.49        84
       Other       0.54      0.49      0.51       264
       Noisy       0.58      0.80      0.67        35

    accuracy                           0.69       927
   macro avg       0.60      0.65      0.62       927
weighted avg       0.69      0.69      0.69       927

