# Main Pipeline File

the following will be the pipeline:
<ol>
   <li>Read in the files and deal with the missing data. </li>
   <li>Preprocess the signals</li>
   <li>Complete feature extraction</li>
   <li>Put data into a model.</li>
   <li>optimize, compare, iterate - try other models.</li>
</ol>

In [1]:
# Import libraries. 
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt 
import os
import wfdb
import pickle
import sys
import glob
from scipy.signal import butter, lfilter
import pprint


In [2]:
# Load helper files.
import dataloaders
import visualize
import preprocess
import segment 
# import cart_model

In [3]:
dataloaders.get_device_info()

Using device: cpu

Using the CPU, no GPU found


In [4]:
#'C:/Users/henry/OneDrive/Desktop/ELEC 872 - AI and Interactive Systems/Project/mit-bih-arrhythmia-database-1.0.0/'
# 
file_path = 'G:/Datasets/mit-bih-arrhythmia-database-1.0.0/'

In [5]:
# Load data. 
patient_data = dataloaders.load_all_records(file_path)

#print(patient_data)
# print(patient_data['102'][0])  # Prints the first beat entry for patient 100

completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/100
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/101
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/102
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/103
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/104
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/105
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/106
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/107
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/108
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/109
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/111
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/112
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/113
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/114
completed pt #G:/Datasets/mit-bih-arrhythmia-database-1.0.0/115
completed pt #G:/Datasets/mit-bih-arrhyt

In [6]:
# Load raw ECG and annotations for a single record.
# signal, annotations = dataloaders.load_record(file_path + '/103')
# print(signal.shape)

# record = wfdb.rdrecord(file_path+'/100')  
# test check for loading lead count, weird bug
# print("Number of leads:", record.p_signal.shape[1])


In [7]:
pp = pprint.PrettyPrinter(indent=2)

# Print the structure of the patient_data dictionary
pp.pprint(patient_data['103'][:3])

[ {'label': 'normal', 'signal_lead_1': -0.375, 'signal_lead_2': 0.05},
  {'label': 'normal', 'signal_lead_1': -0.375, 'signal_lead_2': 0.05},
  {'label': 'normal', 'signal_lead_1': -0.375, 'signal_lead_2': 0.05}]


In [8]:
# Visualize the raw ECG with annotations for the 1 record.
#visualize.visualize_ecg_with_labels(signal, annotations, fs=250, duration=15)
#visualize.visualize_ecg_with_annotationsV2(patient_data, patient_id='221', num_beats=5)

In [9]:
# check for missing values now before preprocessing. 
#missing_values_report = preprocess.check_for_missing_values(patient_data)

In [10]:
# # R-R interval calculation. 
# rr_intervals_report = preprocess.calculate_rr_intervals(patient_data, fs=360)
# # print("R-R intervals for patient 100:", rr_intervals_report.get('10', []))

# unique_rr_counts = {patient_id: len(set(rr_intervals)) for patient_id, rr_intervals in rr_intervals_report.items()}
# print(unique_rr_counts)


In [11]:
#NOTE: commented out unless u really want every thing to be printed
# Label check for all labels that aren't 'Other'
# for patient_id, beats in patient_data.items():
#     print(f"Patient ID: {patient_id}")
    
#     for beat in beats: 
#         label = beat['label']
#         if label != 'Other': 
#             print(f"Label: {label}") 


In [20]:
visualize.summarize_rhythm_counts(patient_data, "102")



Summary of Rhythm Counts for Patient 102:

Rhythm Type  Count
     normal 621010
      Other  28990


# Preprocessing Stage

1. High-Pass Filter to remove baseline wander
2. Notch Filter to remove powerline interference (if any?)
3. Low-Pass Filter to remove high-frequency noise (set for 40 hz for now?)
4. Moving Average Filter to smooth the remaining signal 
5. Normalization for 0-->1 because the leads all act differently

<p> may want to consider an FFT or a Wavelet Transform because it's time series data. can determine later on. </p>

In [13]:
# TESTING NEW -1 AND 1 NORMALIZATION, COULDN'T ACCESS EARLY AHAHA

# for patient_id, beats in patient_data.items():
#     for beat in beats:
#         # Apply preprocessing to each lead in the beat entry
#         for key in beat:
#             if key.startswith('signal_lead_'):
#                 beat[key] = preprocess.test_signal(beat[key])

# patient_id = '100'
# if patient_id in patient_data:
#     print("After preprocessing:")
#     pp.pprint(patient_data[patient_id][:3])  # Print first 3 beats for verification
# else:
#     print(f"Patient ID {patient_id} not found in data.")

In [14]:
# Pre-Preprocessing visual check.
#visualize.visualize_ecg_with_annotationsV2(patient_data, patient_id='100', original_fs=360, target_fs=360)

In [15]:
#patient_data = preprocess.apply_filters_and_normalization(patient_data, fs=250)

In [16]:
# Post Pre-Preprocessing visual check.
#visualize.visualize_ecg_with_annotationsV2(patient_data, patient_id='100')
#visualize.visualize_ecg_with_annotationsV2(patient_data, patient_id='221')

In [17]:
# Segment the data
segmented_data = segment.segment_ecg_data(patient_data, snippet_length=200, samples_before=90, samples_after=90)

# Visualize 
#segment2.visualize_segmented_data(segmented_data, patient_id='100', num_beats=3)
segment.visualize_segmented_data(segmented_data, patient_id='100', num_beats=3)

TypeError: object of type 'numpy.float64' has no len()