In [None]:
import wfdb
import pandas as pd
import numpy as np
import pywt
import heartpy as hp
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Define the record name (without extension)
record_name = "rec_1"  # Change this to your file

# Load the ECG signal from the .dat file
record = wfdb.rdrecord(record_name)

# Extract only the raw ECG signal
raw_signal = record.p_signal[:, 0]  # Signal 0: Raw ECG

# Create a DataFrame for the ECG signal (only raw data)
df_ecg = pd.DataFrame({
    "Time (ms)": [i * 2 for i in range(len(raw_signal))],  # 500 Hz → 2ms per sample
    "Raw ECG": raw_signal
})

# Display missing data summary
print("Missing Data Summary:\n", df_ecg.isnull().sum())

In [None]:
# Handle missing data
print("\nChoose how to handle missing values:")
print("1: Drop missing values")
print("2: Forward fill (use previous value)")
print("3: Backward fill (use next value)")
choice = input("Enter your choice (1/2/3): ")

if choice == "1":
    df_ecg = df_ecg.dropna()
    print("\nDropped missing values.")
elif choice == "2":
    df_ecg = df_ecg.fillna(method='ffill')
    print("\nApplied forward fill.")
elif choice == "3":
    df_ecg = df_ecg.fillna(method='bfill')
    print("\nApplied backward fill.")
else:
    print("\nInvalid choice. No changes applied.")

# Verify no missing data remains
print("\nFinal Missing Data Summary:\n", df_ecg.isnull().sum())

In [None]:
# Butterworth Bandpass Filter
def butter_bandpass(lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def apply_bandpass_filter(signal, lowcut=0.5, highcut=50, fs=500, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order)
    return filtfilt(b, a, signal)

# Noise reduction strength selection
print("\nChoose noise reduction strength:")
print("1: Weak (5-point moving average)")
print("2: Moderate (10-point moving average)")
print("3: Strong (14-point moving average)")
choice1 = input("Enter your choice (1/2/3): ")

In [None]:
if choice1 == "1":
    window_size = 5
elif choice1 == "2":
    window_size = 10
elif choice1 == "3":
    window_size = 14
else:
    print("Invalid choice, using default (10)")
    window_size = 10

# Apply bandpass filter (0.5 - 50 Hz)
df_ecg["Denoised ECG"] = apply_bandpass_filter(df_ecg["Raw ECG"].values)

# Apply moving average filter for additional smoothing
df_ecg["Denoised ECG"] = np.convolve(df_ecg["Denoised ECG"], np.ones(window_size)/window_size, mode='same')

# Drop original noisy column
df_ecg = df_ecg.drop(columns=["Raw ECG"])

In [None]:
# Outlier detection using IQR method
Q1 = df_ecg["Denoised ECG"].quantile(0.25)
Q3 = df_ecg["Denoised ECG"].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Mark and remove outliers
df_ecg["Outlier"] = (df_ecg["Denoised ECG"] < lower_bound) | (df_ecg["Denoised ECG"] > upper_bound)
df_ecg = df_ecg[~df_ecg["Outlier"]].drop(columns=["Outlier"])

In [None]:
# Motion Artifact Removal using Wavelet Transform
def remove_motion_artifacts_wavelet(signal, wavelet="db6", level=4):
    coeffs = pywt.wavedec(signal, wavelet, level=level)
    coeffs[1:] = [pywt.threshold(c, np.std(c) * 0.8, mode="soft") for c in coeffs[1:]]
    return pywt.waverec(coeffs, wavelet)[:len(signal)]  # Ensure length consistency

df_ecg["Denoised ECG"] = remove_motion_artifacts_wavelet(df_ecg["Denoised ECG"])

# Normalize using Min-Max Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
df_ecg["ECG Signal"] = scaler.fit_transform(df_ecg[["Denoised ECG"]])

# Drop intermediate "Denoised ECG" column
df_ecg = df_ecg.drop(columns=["Denoised ECG"])

# Plot ECG Signal
plt.figure(figsize=(10, 5))
plt.plot(df_ecg['Time (ms)'], df_ecg['ECG Signal'], label="ECG Signal", color='b')
plt.xlabel("Time (ms)")
plt.ylabel("ECG Signal")
plt.title("ECG Signal Over Time")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Extract ECG Features
fs = 500  # Sampling frequency
ecg_signal = df_ecg["ECG Signal"].values  

try:
    wd, m = hp.process(ecg_signal, sample_rate=fs)

    # Extract features
    heart_rate = m["bpm"]
    rr_intervals = np.diff(wd["peaklist"]) * (1000 / fs)  # Convert to milliseconds

    mean_rr = np.mean(rr_intervals)
    std_rr = np.std(rr_intervals)
    median_rr = np.median(rr_intervals)

    energy = np.sum(ecg_signal**2)
    entropy = -np.sum(ecg_signal * np.log(np.abs(ecg_signal) + 1e-10))  # Avoid log(0)

    # Display features
    ecg_features = {
        "Heart Rate (BPM)": heart_rate,
        "Mean RR Interval (ms)": mean_rr,
        "Median RR Interval (ms)": median_rr,
        "RR Interval Std Dev (ms)": std_rr,
        "Signal Energy": energy,
        "Signal Entropy": entropy
    }

    for key, value in ecg_features.items():
        print(f"{key}: {value:.2f}")

except Exception as e:
    print("\nError in feature extraction:", e)
    print("Possible causes: No clear R-peaks detected, signal distortion.")