In [None]:
import os
import random

import utility_new as pc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.utils import plot_model
from keras import layers
from keras.layers import Input, Dense, Dropout, Activation, BatchNormalization, Add
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPool1D, ZeroPadding1D, LSTM, Bidirectional
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Concatenate

from sklearn.metrics import confusion_matrix

import scipy
from scipy import optimize
from scipy.io import loadmat
from scipy.signal import butter, filtfilt

import ecg_plot
import heartpy as hp

import pywt
import heartpy as hp

from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

%load_ext autoreload
%autoreload
%reload_ext autoreload



In [None]:
all_available_classes=['270492004','164889003', '164890007', '426627000','713427006', '713426002','445118002', '39732003','164909002', '251146004','698252002', '10370003','284470004','427172004','164947007', '111975006','164917005', '47665007','59118001', '427393009','426177001', '426783006','427084000', '63593006','164934002', '59931005', '17338001']
path_G="C:/Users/Admin/Downloads/archive/G12ECG/WFDB/"
path_P="C:/Users/Admin/Downloads/archive/PTB_XL/WFDB/"
path_C="C:/Users/Admin/Downloads/archive/CPSC_Extra/"
positive_classes  = ['164889003', '164890007']
dataset_paths = [path_C, path_G, path_P]

In [None]:
##To check signal count for positive and negative  classes
positive_classes  = ['164889003', '164890007']
gender, age, labels, ecg_filenames, class_counts, samples_with_positive_class=pc.import_key_data_with_simple(path_P, positive_classes)

In [None]:
##function for wavelet denosing and HRV extrcation
# Filter with sym5
def sym5_wavelet_filter(signal):
    # Apply sym5 wavelet filter
    coeffs = pywt.wavedec(signal, 'sym5', level=9)
    coeffs[1:] = (pywt.threshold(c, 0.1, mode='soft') for c in coeffs[1:])
    reconstructed_signal = pywt.waverec(coeffs, 'sym5')
    return reconstructed_signal

# Try to calculate the heart rate across all leads, across all files
def heartrate_coll(all_ecg_filenames):
    # Create a list to store rows
    rows = []

    for index in range(len(all_ecg_filenames)):
        try:
            data, header = pc.load_challenge_data(all_ecg_filenames[index])

            # Initialize a list to store heart rates for each lead
            heart_rates = [index]

            # Calculate heart rate for each lead
            for lead_num in range(12):
                try:
                    # Apply sym5 wavelet filter
                    filtered_ecg = sym5_wavelet_filter(data[lead_num])

                    # Calculate heart rate
                    wd, m = hp.process(filtered_ecg, sample_rate=500)
                    heart_rate = m['bpm']
                    heart_rates.append(heart_rate)
                except Exception as e:
                    heart_rates.append(np.nan)

            # Append the heart rates to the list
            rows.append(heart_rates)

            # Print progress
            print(f"Processed index {index}")

        except Exception as e:
            print(f"Error processing index {index}: {str(e)}")

    # Create a DataFrame from the list of rows
    columns = ['Index'] + [f'Lead_{i}' for i in range(12)]
    heart_rates_df = pd.DataFrame(rows, columns=columns)

    # Calculate median and add a new column
    heart_rates_df['Median_HeartRate'] = heart_rates_df.iloc[:, 1:].apply(lambda x: np.nanmedian(x), axis=1)

    # Add lead names column
    lead_names = [f'Lead_{i}' for i in range(12)]
    heart_rates_df['Lead_Name'] = heart_rates_df.apply(lambda x: get_lead_name(x,lead_names), axis=1)

    return heart_rates_df

def get_lead_name(row,lead_names):
     # Filter out NaN values and find the lead with the closest value to the median
    closest_lead = min(lead_names, key=lambda lead: abs(row[lead] - row['Median_HeartRate']) if not np.isnan(row[lead]) else np.inf)

    return closest_lead

In [None]:
heart_rates_df = heartrate_coll(all_ecg_filenames)
heart_rates_df.to_csv('heart_rates_with_all_lead_new.csv', index=False)
print(heart_rates_df)

heart_rates_df_selected = heart_rates_df[
    (~heart_rates_df['Lead_Name'].isna()) &  # Exclude rows where 'Lead_Name' is NaN
    (~heart_rates_df['Median_HeartRate'].isna()) &  # Exclude rows where 'Median_HeartRate' is NaN

]

len(heart_rates_df_selected )

In [None]:
def calculate_metrics(file_path, lead):
    # Load ECG data from the MAT file (replace this with your actual method)
    mat_data = pc.load_challenge_data(file_path)[0]
    
    # Use HeartPy to process the lead data and calculate metrics
    data, measures = hp.process(mat_data[lead], sample_rate=500.0)  # Adjust sample_rate as needed
    
    # Access the calculated metrics
    heart_rate_parameters = {
        'HeartRate': measures['bpm'],# Beats Per Minute, representing the heart rate.
        'InterBeatInterval': measures['ibi'],# Inter-Beat Interval, the time between successive heartbeats.
        'HRV_SDNN': measures['sdnn'],#Standard Deviation of NN intervals, a measure of heart rate variability.
        'HRV_SDSD': measures['sdsd'],#Standard Deviation of successive differences between NN intervals.
        'HRV_RMSSD': measures['rmssd'],# Root Mean Square of Successive Differences between NN intervals.
        'PNN20': measures['pnn20'],#Percentage of successive NN intervals differing by more than 20 milliseconds.
        'PNN50': measures['pnn50'],# Percentage of successive NN intervals differing by more than 50 milliseconds.
        'HR_MAD': measures['hr_mad'],#Heart Rate Mean Absolute Deviation.
        'Ratio_of_SD1_SD2': measures['s'],#A parameter without specific information in the given context.
        'InfoS': measures['hr_mad']#Heart Rate Mean Absolute Deviation.
        # Add more metrics as needed
    }

    return heart_rate_parameters


In [None]:
# Create an empty DataFrame to store the calculated metrics
calculated_metrics_df = pd.DataFrame()
# Iterate through each row in the original DataFrame and calculate metrics
for index in range(len(selected_ecg_df)):
    row = selected_ecg_df.iloc[index, :]
    file_path = row['Filename']
    lead = pd.to_numeric(row['LEAD'])
    age = row['Age']
    gender = row['Gender']
    label=row['labels']
    # Calculate metrics for the current file and lead
    heart_rate_measures = calculate_metrics(file_path, lead)
    
    # Convert the dictionary to a DataFrame
    metrics_df = pd.DataFrame([heart_rate_measures])
    
    # Include additional columns in the DataFrame
    metrics_df['Age'] = age
    metrics_df['Gender'] = gender
    metrics_df['Encoded_labels'] = label
    metrics_df['Filename'] = file_path
    metrics_df['LEAD'] = lead
    
    # Concatenate the DataFrame with calculated metrics to the main DataFrame
    calculated_metrics_df = pd.concat([calculated_metrics_df, metrics_df], ignore_index=True)

In [None]:
calculated_metrics_df.to_csv('Final_frame_to_work_withFilterSignalLengthCheck_new.csv', index=False)