In [2]:
import os
import json
import pandas as pd

# Directory containing the JSON files
ekg_dir = 'ekg_files'

# List to store all DataFrames
dfs = []
for filename in os.listdir(ekg_dir):
    if filename.endswith('.json'):
        with open(os.path.join(ekg_dir, filename), 'r', encoding='utf-8') as f:
            data = json.load(f)
        # Take the first lead (usually "I")
        if 'leads' in data and len(data['leads']) > 0:
            lead = data['leads'][0]
            signal = lead.get('signal', [])
            # Remove None values from the signal
            clean_signal = [x for x in signal if x is not None]
            df_tmp = pd.DataFrame({'Signal': clean_signal})
            dfs.append([filename.replace('_','/').replace('.json',''), df_tmp])

In [3]:
min_length = float('inf')
min_kg = None
for kg, signal_df in dfs:
    if len(signal_df) < min_length:
        min_length = len(signal_df)
        min_kg = kg
        
print(f"Shortest signal is from {min_kg} with length {min_length}")

Shortest signal is from 39879/2021 with length 761


In [4]:
df = pd.read_csv('DANE_mpsi.csv', sep='\t', encoding='utf-8')

# Extract labels from Dane MPSI (df['zgon']) and join with signals from dfs using filename and KG

# Prepare a mapping from KG to label
kg_to_label = dict(zip(df['KG'], df['zgon']))

# Prepare dataset: list of (signal, label) tuples
signal_label_dataset = []
for kg, signal_df in dfs:
    if kg in kg_to_label:
        label = kg_to_label[kg]
        signal = signal_df['Signal'].values
        signal_label_dataset.append((signal, label))

In [5]:
import torch
from torch.utils.data import Dataset

class EKGDataset(Dataset):
    def __init__(self, data):
        self.data = data  # list of (signal, label) tuples

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        signal, label = self.data[idx]
        
        # Normalize signal per sample: zero mean, unit variance
        mean = signal.mean()
        std = signal.std()
        normalized_signal = (signal - mean) / std
        
        # Convert to float32 tensor and add channel dimension
        signal_tensor = torch.tensor(signal, dtype=torch.float32).unsqueeze(-1)
        label_tensor = torch.tensor(label, dtype=torch.float32)
        return signal_tensor, label_tensor
    
signal_label_dataset = []
for kg, signal_df in dfs:
    if kg in kg_to_label:
        label = kg_to_label[kg]
        signal = signal_df['Signal'].values[-756:]  # take last 756 elements
        signal_label_dataset.append((signal, label))

ekg_dataset = EKGDataset(signal_label_dataset)

In [6]:
sample_x, sample_y = ekg_dataset.__getitem__(0)
print(sample_x[:5], sample_x.shape, sample_y)

tensor([[-0.0078],
        [ 0.0000],
        [ 0.0000],
        [ 0.0078],
        [ 0.0156]]) torch.Size([756, 1]) tensor(0.)


In [7]:
from torch.utils.data import DataLoader, random_split

import torch.nn as nn

# Define a simple RNN model
class SimpleRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, output_size=1):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = out[:, -1, :]  # Take the last output
        out = self.fc(out)
        return out


model = SimpleRNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

criterion = nn.BCEWithLogitsLoss()

# Split dataset into train and validation sets
train_size = int(0.8 * len(ekg_dataset))
val_size = len(ekg_dataset) - train_size
train_dataset, val_dataset = random_split(ekg_dataset, [train_size, val_size])

# Create DataLoader for training
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for signals, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(signals)
        labels = labels.unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")
    if (epoch + 1) % 5 == 0:
        model.eval()
        val_loader = DataLoader(val_dataset, batch_size=32)
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for signals, labels in val_loader:
                outputs = model(signals)
                labels = labels.unsqueeze(1)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = (torch.sigmoid(outputs) > 0.5).float()
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        avg_val_loss = val_loss / len(val_loader)
        accuracy = correct / total
        print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1/30, Loss: 0.6799
Epoch 2/30, Loss: 0.6752
Epoch 3/30, Loss: 0.6712
Epoch 4/30, Loss: 0.6678
Epoch 5/30, Loss: 0.6651
Validation Loss: 0.6904, Accuracy: 0.5556
Epoch 6/30, Loss: 0.6630
Epoch 7/30, Loss: 0.6617
Epoch 8/30, Loss: 0.6613
Epoch 9/30, Loss: 0.6618
Epoch 10/30, Loss: 0.6626
Validation Loss: 0.7079, Accuracy: 0.5556
Epoch 11/30, Loss: 0.6631
Epoch 12/30, Loss: 0.6630
Epoch 13/30, Loss: 0.6625
Epoch 14/30, Loss: 0.6619
Epoch 15/30, Loss: 0.6613
Validation Loss: 0.6994, Accuracy: 0.5556
Epoch 16/30, Loss: 0.6610
Epoch 17/30, Loss: 0.6608
Epoch 18/30, Loss: 0.6607
Epoch 19/30, Loss: 0.6607
Epoch 20/30, Loss: 0.6607
Validation Loss: 0.6933, Accuracy: 0.5556
Epoch 21/30, Loss: 0.6608
Epoch 22/30, Loss: 0.6608
Epoch 23/30, Loss: 0.6607
Epoch 24/30, Loss: 0.6607
Epoch 25/30, Loss: 0.6605
Validation Loss: 0.6928, Accuracy: 0.5556
Epoch 26/30, Loss: 0.6604
Epoch 27/30, Loss: 0.6602
Epoch 28/30, Loss: 0.6600
Epoch 29/30, Loss: 0.6597
Epoch 30/30, Loss: 0.6595
Validation Loss: 0.

In [9]:
import torch.nn as nn

# Define a simple GRU model
class SimpleGRU(nn.Module):
    def __init__(self, input_size=1, hidden_size=128, output_size=1):
        super(SimpleGRU, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc_out = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]  # Take the last output
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc_out(out)
        return out

gru_model = SimpleGRU()
optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

num_epochs = 30
for epoch in range(num_epochs):
    gru_model.train()
    total_loss = 0
    for signals, labels in train_loader:
        optimizer.zero_grad()
        outputs = gru_model(signals)
        labels = labels.unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")
    # Calculate train accuracy
    gru_model.eval()
    train_correct = 0
    train_total = 0
    with torch.no_grad():
        for signals, labels in train_loader:
            outputs = gru_model(signals)
            labels = labels.unsqueeze(1)
            preds = (torch.sigmoid(outputs) > 0.5).float()
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)
    train_accuracy = train_correct / train_total
    print(f"Train Accuracy: {train_accuracy:.4f}")

    if (epoch + 1) % 5 == 0:
        gru_model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for signals, labels in val_loader:
                outputs = gru_model(signals)
                labels = labels.unsqueeze(1)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = (torch.sigmoid(outputs) > 0.5).float()
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        avg_val_loss = val_loss / len(val_loader)
        accuracy = correct / total
        print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1/30, Loss: 0.6923
Train Accuracy: 0.6250
Epoch 2/30, Loss: 0.6878
Train Accuracy: 0.6250
Epoch 3/30, Loss: 0.6838
Train Accuracy: 0.6250
Epoch 4/30, Loss: 0.6803
Train Accuracy: 0.6250
Epoch 5/30, Loss: 0.6773
Train Accuracy: 0.6250
Validation Loss: 0.6872, Accuracy: 0.5556
Epoch 6/30, Loss: 0.6746
Train Accuracy: 0.6250
Epoch 7/30, Loss: 0.6720
Train Accuracy: 0.6250
Epoch 8/30, Loss: 0.6694
Train Accuracy: 0.6250
Epoch 9/30, Loss: 0.6669
Train Accuracy: 0.6250
Epoch 10/30, Loss: 0.6646
Train Accuracy: 0.6250
Validation Loss: 0.6909, Accuracy: 0.5556
Epoch 11/30, Loss: 0.6627
Train Accuracy: 0.6250
Epoch 12/30, Loss: 0.6614
Train Accuracy: 0.6250
Epoch 13/30, Loss: 0.6614
Train Accuracy: 0.6250
Epoch 14/30, Loss: 0.6626
Train Accuracy: 0.6250
Epoch 15/30, Loss: 0.6635
Train Accuracy: 0.6250
Validation Loss: 0.7093, Accuracy: 0.5556
Epoch 16/30, Loss: 0.6632
Train Accuracy: 0.6250
Epoch 17/30, Loss: 0.6622
Train Accuracy: 0.6250
Epoch 18/30, Loss: 0.6613
Train Accuracy: 0.6250
E