In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
class ECGCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(ECGCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2)
        self.bn3 = nn.BatchNorm1d(128)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 27, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [24]:
data_path = '/kaggle/input/ecg-datasets/ecg_clean.csv'
df = pd.read_csv(data_path)
print(f"Dataset loaded: {df.shape}")

# Encode labels
if df['label'].dtype == 'object':
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['label'])
    print(f"Label encoding: {dict(zip(le.classes_, range(len(le.classes_))))}")

# Ensure numeric
feature_cols = [col for col in df.columns if col != 'label']
for col in feature_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna()

X = torch.tensor(df.drop('label', axis=1).values, dtype=torch.float32)
y = torch.tensor(df['label'].values, dtype=torch.long)
X = (X - X.mean(dim=0)) / (X.std(dim=0) + 1e-8)

print(f"X shape: {X.shape}, y shape: {y.shape}")


Dataset loaded: (100033, 217)
Label encoding: {'A': 0, 'L': 1, 'N': 2, 'R': 3, 'V': 4}
X shape: torch.Size([100033, 216]), y shape: torch.Size([100033])


In [25]:
def add_noise(signal, snr_db):
    signal_power = torch.mean(signal ** 2)
    noise_power = signal_power / (10 ** (snr_db / 10))
    noise = torch.randn_like(signal) * torch.sqrt(noise_power)
    return signal + noise

class NoisyDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, add_noise_prob=0.7):
        self.X = X
        self.y = y
        self.add_noise_prob = add_noise_prob
        self.snr_levels = [0, 3, 6, 9, 12, 15, 18, 20]
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]
        
        if torch.rand(1).item() < self.add_noise_prob:
            snr = np.random.choice(self.snr_levels)
            x = add_noise(x, snr)
        
        return x, y

dataset = NoisyDataset(X, y, add_noise_prob=0.7)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

Training samples: 80026
Validation samples: 20007


In [26]:
model = ECGCNN(num_classes=5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

best_val_acc = 0.0
epochs = 30

print("\n" + "="*60)
print("TRAINING CNN WITH NOISE AUGMENTATION")
print("="*60 + "\n")

for epoch in range(epochs):
    model.train()
    train_loss, train_correct, train_total = 0, 0, 0
    
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    train_acc = train_correct / train_total
    
    model.eval()
    val_correct, val_total = 0, 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    
    val_acc = val_correct / val_total
    
    print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), '/kaggle/working/cnn_noise_aug_best.pth')
        print(f"✅ Saved! Val Acc: {val_acc:.4f}\n")

print(f"\n🎯 Best Validation Accuracy: {best_val_acc:.4f}")


TRAINING CNN WITH NOISE AUGMENTATION



Epoch 1/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.74it/s]


Epoch 1/30 - Train Acc: 0.9640 | Val Acc: 0.9861
✅ Saved! Val Acc: 0.9861



Epoch 2/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.86it/s]


Epoch 2/30 - Train Acc: 0.9789 | Val Acc: 0.9872
✅ Saved! Val Acc: 0.9872



Epoch 3/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.79it/s]


Epoch 3/30 - Train Acc: 0.9823 | Val Acc: 0.9886
✅ Saved! Val Acc: 0.9886



Epoch 4/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.74it/s]


Epoch 4/30 - Train Acc: 0.9847 | Val Acc: 0.9885


Epoch 5/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.76it/s]


Epoch 5/30 - Train Acc: 0.9859 | Val Acc: 0.9899
✅ Saved! Val Acc: 0.9899



Epoch 6/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.65it/s]


Epoch 6/30 - Train Acc: 0.9871 | Val Acc: 0.9911
✅ Saved! Val Acc: 0.9911



Epoch 7/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.66it/s]


Epoch 7/30 - Train Acc: 0.9875 | Val Acc: 0.9896


Epoch 8/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.64it/s]


Epoch 8/30 - Train Acc: 0.9883 | Val Acc: 0.9910


Epoch 9/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.66it/s]


Epoch 9/30 - Train Acc: 0.9889 | Val Acc: 0.9919
✅ Saved! Val Acc: 0.9919



Epoch 10/30: 100%|██████████| 1251/1251 [00:41<00:00, 30.51it/s]


Epoch 10/30 - Train Acc: 0.9896 | Val Acc: 0.9907


Epoch 11/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.64it/s]


Epoch 11/30 - Train Acc: 0.9899 | Val Acc: 0.9915


Epoch 12/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.63it/s]


Epoch 12/30 - Train Acc: 0.9902 | Val Acc: 0.9919


Epoch 13/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.66it/s]


Epoch 13/30 - Train Acc: 0.9907 | Val Acc: 0.9917


Epoch 14/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.65it/s]


Epoch 14/30 - Train Acc: 0.9914 | Val Acc: 0.9922
✅ Saved! Val Acc: 0.9922



Epoch 15/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.62it/s]


Epoch 15/30 - Train Acc: 0.9908 | Val Acc: 0.9920


Epoch 16/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.75it/s]


Epoch 16/30 - Train Acc: 0.9919 | Val Acc: 0.9921


Epoch 17/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.71it/s]


Epoch 17/30 - Train Acc: 0.9920 | Val Acc: 0.9920


Epoch 18/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.65it/s]


Epoch 18/30 - Train Acc: 0.9925 | Val Acc: 0.9922


Epoch 19/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.72it/s]


Epoch 19/30 - Train Acc: 0.9922 | Val Acc: 0.9927
✅ Saved! Val Acc: 0.9927



Epoch 20/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.64it/s]


Epoch 20/30 - Train Acc: 0.9921 | Val Acc: 0.9917


Epoch 23/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.75it/s]


Epoch 23/30 - Train Acc: 0.9932 | Val Acc: 0.9919


Epoch 24/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.66it/s]


Epoch 24/30 - Train Acc: 0.9936 | Val Acc: 0.9923


Epoch 25/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.68it/s]


Epoch 25/30 - Train Acc: 0.9934 | Val Acc: 0.9924


Epoch 26/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.78it/s]


Epoch 26/30 - Train Acc: 0.9939 | Val Acc: 0.9930
✅ Saved! Val Acc: 0.9930



Epoch 27/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.71it/s]


Epoch 27/30 - Train Acc: 0.9939 | Val Acc: 0.9922


Epoch 29/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.73it/s]


Epoch 29/30 - Train Acc: 0.9940 | Val Acc: 0.9926


Epoch 30/30: 100%|██████████| 1251/1251 [00:40<00:00, 30.70it/s]


Epoch 30/30 - Train Acc: 0.9937 | Val Acc: 0.9922

🎯 Best Validation Accuracy: 0.9932


In [28]:
def evaluate_dataset(model, csv_path):
    df = pd.read_csv(csv_path)
    
    if df['label'].dtype == 'object':
        le = LabelEncoder()
        df['label'] = le.fit_transform(df['label'])
    
    feature_cols = [col for col in df.columns if col != 'label']
    for col in feature_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df = df.dropna()
    
    X = torch.tensor(df.drop('label', axis=1).values, dtype=torch.float32)
    y = torch.tensor(df['label'].values, dtype=torch.long)
    X = (X - X.mean(dim=0)) / (X.std(dim=0) + 1e-8)
    
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=64, shuffle=False)
    
    model.eval()
    correct, total = 0, 0
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return correct / total

In [29]:
model = ECGCNN(num_classes=5).to(device)
model.load_state_dict(torch.load('/kaggle/working/cnn_noise_aug_best.pth'))

cnn_baseline = {0: 0.7123, 3: 0.7678, 6: 0.8493, 9: 0.9038, 
                12: 0.9429, 15: 0.9686, 18: 0.9834, 20: 0.9891}

snr_levels = [0, 3, 6, 9, 12, 15, 18, 20]
results = {}

print("\n" + "="*70)
print("COMPARISON: CNN Baseline vs CNN+Noise Augmentation")
print("="*70)
print(f"{'SNR (dB)':<10} {'CNN Baseline':<15} {'CNN+NoiseAug':<15} {'Improvement':<15}")
print("-"*70)

for snr in snr_levels:
    path = f'/kaggle/input/ecg-datasets/ecg_noisy_{snr}db.csv'
    acc = evaluate_dataset(model, path)
    results[snr] = acc
    
    baseline = cnn_baseline[snr]
    improvement = (acc - baseline) * 100
    
    print(f"{snr:<10} {baseline:<15.4f} {acc:<15.4f} {improvement:+.2f}%")

print("-"*70)
avg_baseline = np.mean(list(cnn_baseline.values()))
avg_results = np.mean(list(results.values()))
avg_improvement = (avg_results - avg_baseline) * 100

print(f"{'AVERAGE':<10} {avg_baseline:<15.4f} {avg_results:<15.4f} {avg_improvement:+.2f}%")
print("="*70)

# Save results
results_df = pd.DataFrame({
    'SNR_dB': snr_levels,
    'CNN_Baseline': [cnn_baseline[snr] for snr in snr_levels],
    'CNN_NoiseAug': [results[snr] for snr in snr_levels],
    'Improvement_%': [(results[snr] - cnn_baseline[snr])*100 for snr in snr_levels]
})
results_df.to_csv('/kaggle/working/noise_aug_results.csv', index=False)
print("\n✅ Results saved!")


COMPARISON: CNN Baseline vs CNN+Noise Augmentation
SNR (dB)   CNN Baseline    CNN+NoiseAug    Improvement    
----------------------------------------------------------------------
0          0.7123          0.8854          +17.31%
3          0.7678          0.9509          +18.31%
6          0.8493          0.9818          +13.25%
9          0.9038          0.9921          +8.83%
12         0.9429          0.9957          +5.28%
15         0.9686          0.9967          +2.81%
18         0.9834          0.9974          +1.40%
20         0.9891          0.9976          +0.85%
----------------------------------------------------------------------
AVERAGE    0.8897          0.9747          +8.50%

✅ Results saved!
