In [1]:
import sys
# Delete all global variables when re-running the notebook.
this = sys.modules[__name__] # type: ignore
for n in dir():
    if n in ['this', 'was_mounted']: continue
    if n[0]!='_': delattr(this, n)


try:
    was_mounted = was_mounted
except:
    was_mounted = False


import os
if  os.getenv("COLAB_RELEASE_TAG"):
  is_running_on_colab = True

else:
  is_running_on_colab = False

if is_running_on_colab:
  packages_to_install = ['pandas==2.1.3','neurokit2', 'wfdb']

  for package in packages_to_install:
    os.system(f'pip install {package}')
  from google.colab import drive, files
  code_directory = './gdrive/MyDrive/TCC/ectopic_beats_detection'
  if not was_mounted:
      drive.mount('/content/gdrive')
  was_mounted = True
  if not os.path.samefile(os.getcwd(),code_directory):
    os.chdir(code_directory)

from utils import create_compare_df, create_df_beats, create_dict_results, plot_results, calculate_metrics, resolve_relative_path
from globals import *
import sys
import neurokit2 as nk
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import pyarrow as pa
from os.path import join
import pyarrow as pa
from typing import Any, Dict, Iterable, List, Tuple, Union
from numpy import typing as npt
from utils import Processor, Processors
from datetime import datetime
import pickle
from multiprocessing import Pool, cpu_count
from timeit import default_timer as timer


pd.set_option('display.max_columns', None)

df_record_lead_ann = pd.read_parquet(join(dataframes_directory, 'df_record_lead_ann.parquet'))
df_lead_ann_summery =  pd.read_parquet(join(dataframes_directory, 'df_lead_ann_summery.parquet'))
df_ann_summery = pd.read_parquet(join(dataframes_directory, 'df_ann_summery.parquet'))
df_code_description = pd.read_parquet(join(dataframes_directory, 'df_code_description.parquet'))

# Multiple record analysis

In [2]:
total_time = 300 # seconds

offset = 100 # seconds
discard_start_sec = discard_end_sec = 2

methods = ['neurokit', 'pantompkins1985', 'hamilton2002', 'martinez2004', 'christov2004',
               'gamboa2008', 'elgendi2010', 'engzeemod2012', 'kalidas2017', 'rodrigues2020']
offset = 500 # seconds


list_processors = [
    Processor(None),
    Processor('detrend', method = 'polynomial', order = 0),
    Processor('detrend', method = 'polynomial', order = 1),
]

dict_multi_analysis = {}

print(f'Total time = {total_time} seconds')

time_str = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
for idx, row in df_record_lead_ann.iterrows():
    if row['upper_signal'] == 'MLII':
        signal_track = 0
    elif row['lower_signal'] == 'MLII':
        signal_track = 1
    else:
        continue

    record_num = row['record']


    # Start measuring time
    start_time = timer()

    print(f'Processing record {record_num}')

    dict_multi_analysis[record_num] = {}

    for processor in list_processors:
        processor_name = processor.processor_name
        print(f'Processor =  {processor_name}')
        dict_multi_analysis[record_num][processor_name] = {}

        df_beats, ecg, start_samples, end_samples, fs = create_df_beats(record_num, total_time, offset, LIST_BEATS_1, signal_track)

        samples = int(total_time * fs)

        first_used_sample = start_samples + discard_start_sec * fs
        last_used_sample = end_samples - discard_end_sec * fs

        #dict_results = create_dict_results(ecg, methods,start_samples, first_used_sample, last_used_sample, fs, discard_start_sec, discard_end_sec)
        p = Pool(cpu_count())
        list_results = p.starmap(create_dict_results, [(ecg, [method], start_samples, first_used_sample, last_used_sample, fs, discard_start_sec, discard_end_sec) for method in methods])
        dict_results = {}
        for result in list_results:
            for key, value in result.items():
                dict_results[key] = value

        #raise
        # Now the operations are performed on the time window, we can discard the first and last n seconds
        ecg = ecg.loc[first_used_sample:last_used_sample ]
        df_beats = df_beats[(df_beats.peak_index >= first_used_sample) & (df_beats.peak_index <= last_used_sample)].reset_index(drop = True)

        df_comp_methods = create_compare_df(df_beats, dict_results)

        dict_metrics = calculate_metrics(df_comp_methods, methods)

        for method in dict_metrics.keys():
            dict_metrics[method]['signal_track'] = signal_track
            dict_metrics[method]['start_samples'] = start_samples
            dict_metrics[method]['end_samples'] = end_samples
        
        dict_multi_analysis[record_num][processor_name] = dict_metrics
    end_time = timer()
    print(f'Time elapsed = {end_time - start_time} seconds')

# save results as pickle
with open(join(dataframes_directory, f'dict_multi_analysis_{time_str}.pickle'), 'wb') as handle:
    pickle.dump(dict_multi_analysis, handle, protocol=pickle.HIGHEST_PROTOCOL)

Total time = 300 seconds
Processing record 100
Processor =  None
Time elapsed = 35.10825809999983 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 0}
Time elapsed = 69.04168519999985 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 1}
Time elapsed = 103.35121439999989 seconds
Processing record 101
Processor =  None
Time elapsed = 34.68639729999995 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 0}
Time elapsed = 70.52362369999992 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 1}
Time elapsed = 105.52813719999995 seconds
Processing record 103
Processor =  None
Time elapsed = 35.28421759999992 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 0}
Time elapsed = 70.21772190000002 seconds
Processor =  detrend|{'method': 'polynomial', 'order': 1}
Time elapsed = 104.23426649999988 seconds
Processing record 105
Processor =  None
Time elapsed = 34.50319149999996 seconds
Processor =  detrend|{'method': 'polynomial', 'order':

KeyboardInterrupt: 

In [6]:
dict_multi_analysis_df = {
    'record_num': [],
    'processor': [],
    'method': [],
    'accuracy': [],
    'precision': [],
    'signal_track': [],
    'start_samples': [],
    'end_samples': [],
}

for record_num, dict_processor in dict_multi_analysis.items():
    for processor, dict_metrics in dict_processor.items():
        for method, metrics in dict_metrics.items():
            dict_multi_analysis_df['record_num'].append(record_num)
            dict_multi_analysis_df['processor'].append(processor)
            dict_multi_analysis_df['method'].append(method)
            dict_multi_analysis_df['accuracy'].append(metrics['accuracy'])
            dict_multi_analysis_df['precision'].append(metrics['precision'])
            dict_multi_analysis_df['signal_track'].append(metrics['signal_track'])
            dict_multi_analysis_df['start_samples'].append(metrics['start_samples'])
            dict_multi_analysis_df['end_samples'].append(metrics['end_samples'])

df_multi_analysis = pd.DataFrame(dict_multi_analysis_df)

#df_multi_analysis.to_parquet(join(dataframes_directory, f'df_multi_analysis_{time_str}.parquet'))

# Single record analysis
We analyze just one record from the dataset. Later we will analyze all the records.


In [None]:
methods=  ['hamilton2002', 'kalidas2017', 'rodrigues2020']

# Load record

record_num = 107
total_time = 180 # seconds
offset = 500 # seconds

df_beats, ecg, start_samples, end_samples, fs = create_df_beats(record_num, total_time, offset, derised_anns = LIST_BEATS_1)



dict_results, first_used_sample, last_used_sample = create_dict_results(ecg, methods,start_samples, first_used_sample, last_used_sample, fs, discard_start_sec, discard_end_sec)

# Now the operations are performed on the time window, we can discard the first and last n seconds
ecg = ecg.loc[first_used_sample:last_used_sample]
df_beats = df_beats[(df_beats.peak_index >= first_used_sample) & (df_beats.peak_index <= last_used_sample)].reset_index(drop = True)

x_xis_factor = 1 #1/fs # 1/fs = seconds, 1 = samples


fig = plot_results(
    dict_results, df_beats, ecg,
    [
        (
            df_code_description.symbol[~df_code_description.symbol.isin(
                LIST_BEATS_1)],
            dict(mode="markers", marker=dict(size=4, color="black"))
        ),

        (LIST_BEATS_1, dict(mode="markers", marker=dict(size=9, color="red")))
    ],

    x_xis_factor=1
)

#Define x zoom
fig.update_xaxes(range=[first_used_sample * x_xis_factor, (first_used_sample + 10 * fs) * x_xis_factor ])

fig.show()


df_comp_methods = create_compare_df(df_beats, dict_results)
for method in methods:
    precision, accuracy, dict_details = calculate_metrics(df_comp_methods, [method])[method].values()

    print(f'{method}:\n\tPrecision: {100*precision:.2f} %\n\tAccuracy: {100*accuracy:.2f} %\n')