In [1]:
import pandas as pd
import numpy as np
import wfdb
import ast

In [2]:
import os

In [3]:
def load_raw_data(df, sampling_rate, path):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(os.path.join(path,f)) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(os.path.join(path,f)) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

In [4]:
path='/kaggle/input/ecg-data/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3/'
sampling_rate=100

# load and convert annotation data
Y = pd.read_csv(os.path.join(path,'ptbxl_database.csv'), index_col='ecg_id')
Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))


In [5]:
Y.scp_codes 

ecg_id
1                 {'NORM': 100.0, 'LVOLT': 0.0, 'SR': 0.0}
2                             {'NORM': 80.0, 'SBRAD': 0.0}
3                               {'NORM': 100.0, 'SR': 0.0}
4                               {'NORM': 100.0, 'SR': 0.0}
5                               {'NORM': 100.0, 'SR': 0.0}
                               ...                        
21833    {'NDT': 100.0, 'PVC': 100.0, 'VCLVH': 0.0, 'ST...
21834             {'NORM': 100.0, 'ABQRS': 0.0, 'SR': 0.0}
21835                           {'ISCAS': 50.0, 'SR': 0.0}
21836                           {'NORM': 100.0, 'SR': 0.0}
21837                           {'NORM': 100.0, 'SR': 0.0}
Name: scp_codes, Length: 21799, dtype: object

In [6]:
Y.scp_codes.shape

(21799,)

In [7]:
X = load_raw_data(Y, sampling_rate, path)


In [8]:
agg_df = pd.read_csv(os.path.join(path,'scp_statements.csv'), index_col=0)
agg_df = agg_df[agg_df.diagnostic == 1]


In [9]:
def aggregate_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            tmp.append(agg_df.loc[key].diagnostic_class)
    return list(set(tmp))

In [10]:
Y['diagnostic_superclass'] = Y.scp_codes.apply(aggregate_diagnostic)

In [11]:
test_fold = 10

X_train = X[np.where(Y.strat_fold != test_fold)]
y_train = Y[(Y.strat_fold != test_fold)].diagnostic_superclass

X_test = X[np.where(Y.strat_fold == test_fold)]
y_test = Y[Y.strat_fold == test_fold].diagnostic_superclass

In [12]:
X_train.shape

(19601, 1000, 12)

In [13]:
y_train.head

<bound method NDFrame.head of ecg_id
1        [NORM]
2        [NORM]
3        [NORM]
4        [NORM]
5        [NORM]
          ...  
21833    [STTC]
21834    [NORM]
21835    [STTC]
21836    [NORM]
21837    [NORM]
Name: diagnostic_superclass, Length: 19601, dtype: object>

In [14]:
y_train.info()

<class 'pandas.core.series.Series'>
Index: 19601 entries, 1 to 21837
Series name: diagnostic_superclass
Non-Null Count  Dtype 
--------------  ----- 
19601 non-null  object
dtypes: object(1)
memory usage: 306.3+ KB


In [15]:
np.unique(y_train).shape[0]


22

In [16]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
y_train_encoded = pd.DataFrame(mlb.fit_transform(y_train), columns=mlb.classes_)

In [17]:
y_train_encoded.shape

(19601, 5)

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
standard_scaler=StandardScaler()
num_sample,time_stamps,lead_ecg=X_train.shape

In [20]:
X_train_new=X_train.reshape(num_sample,-1)
standard_scaler.fit_transform(X_train_new)

array([[-0.63699445, -0.25904753,  0.31693273, ...,  0.40698747,
        -0.14975585, -0.42330522],
       [ 0.02834186,  0.68852979,  0.65826793, ..., -0.91570464,
        -0.79910897, -0.50410628],
       [-0.150163  , -0.37688098, -0.23407982, ..., -0.00946991,
        -0.0934344 , -0.01606787],
       ...,
       [ 0.21225596,  0.12882091, -0.06341222, ..., -0.54323924,
        -0.38166768,  0.07119727],
       [-0.30162168, -0.26886698,  0.00485482, ...,  0.60641777,
         1.20527184, -0.06454851],
       [-0.25834777, -0.11175572,  0.12188404, ...,  0.29847393,
         0.1219123 , -0.3683605 ]])

In [21]:
# X_train=X_train_new.reshape(num_sample,time_stamps,lead_ecg)

In [22]:
from sklearn.decomposition import PCA
pca=PCA(0.95)
X_pca=pca.fit_transform(X_train_new)

In [23]:
X_pca

array([[-0.16782571, -0.18966425, -0.06024255, ..., -0.01662815,
        -0.05555115,  0.15073412],
       [-0.12314312, -0.88779329, -1.3212945 , ...,  0.46643418,
        -0.18896158, -0.3539256 ],
       [-0.33106224, -1.75605188, -0.30401826, ..., -0.03894012,
        -0.11327651, -0.17865432],
       ...,
       [-0.09619951,  0.00643173,  0.06552987, ...,  0.01140194,
         0.02635737, -0.06611371],
       [-0.09021061, -0.1596813 , -0.21892681, ..., -0.47067189,
        -0.10061179, -0.08389196],
       [-0.00641155, -1.09588717, -0.48185639, ..., -0.1285563 ,
         0.24503757,  0.13995697]])

In [26]:
# Assuming x_pca and y_train are Pandas DataFrames
y_train_array = y_train_encoded.values  # Convert to NumPy array


In [28]:
import torch

In [29]:
x_tensor = torch.tensor(X_pca, dtype=torch.float32)  # for features
y_tensor = torch.tensor(y_train_array, dtype=torch.float32)  # for labels


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

def conv1d_block(in_channels, out_channels, kernel_size=3, stride=1, dropout=0.0, activation="relu", use_bn=True):
    layers = []
    if dropout > 0:
        layers.append(nn.Dropout(dropout))
    layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding=(kernel_size - 1) // 2))
    if use_bn:
        layers.append(nn.BatchNorm1d(out_channels))
    if activation == "relu":
        layers.append(nn.ReLU(inplace=True))
    elif activation == "elu":
        layers.append(nn.ELU(inplace=True))
    return nn.Sequential(*layers)

class CustomConv1DModel(nn.Module):
    def __init__(self, input_size=1537, num_classes=5, filters=[128, 128, 128], dropout=0.5):
        super(CustomConv1DModel, self).__init__()
        self.layers = nn.Sequential(
            conv1d_block(1, filters[0], kernel_size=3, stride=2, dropout=dropout),
            conv1d_block(filters[0], filters[1], kernel_size=3, stride=2, dropout=dropout),
            conv1d_block(filters[1], filters[2], kernel_size=3, stride=2, dropout=dropout),
        )
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(filters[-1], num_classes)  # Output 5 classes

    def forward(self, x):
        x = self.layers(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = torch.sigmoid(self.fc(x))  
        x = torch.round(x)  
        return x

x_train = x_tensor 
y_train = y_tensor  
x_train = x_train.unsqueeze(1)  
model = CustomConv1DModel(input_size=1537, num_classes=5)
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10  
for epoch in range(num_epochs):
    model.train()  
    
    optimizer.zero_grad()
    
    output = model(x_train)
    
    loss = criterion(output, y_train)
    
    loss.backward()
    

    optimizer.step()
    
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

model.eval()  
with torch.no_grad(): 
    predictions = model(x_train)
    print(predictions)
