In [None]:
import sys

# Delete all global variables when re-running the notebook.
this = sys.modules[__name__]
for n in dir():
    if n == 'this': continue
    if n[0]!='_': delattr(this, n)

from utils import *
from globals import *
import sys
import neurokit2 as nk
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import pyarrow as pa
from os.path import join
import pyarrow as pa

In [None]:
df_record_lead_ann = pd.read_parquet(join(dataframes_directory, 'df_record_lead_ann.parquet'))
df_lead_ann_summery =  pd.read_parquet(join(dataframes_directory, 'df_lead_ann_summery.parquet'))

In [None]:
# The only feature extracted from the ECG is the R wave. Initially, a point in the QRS complex is detected (QRS point), using the algorithm proposed by Hamilton and Tompkins [25,26]. Then, the main wave of the QRS complex (R wave) is identified in the window [QRS  280 ms, QRS þ 120 ms] by locating the point where the signal has its maximum absolute value. The RR-interval signal is constructed by measuring the time interval between successive R waves.



In [95]:
!pip install black

Collecting black
  Downloading black-23.11.0-cp39-cp39-win_amd64.whl (1.3 MB)
     ---------------------------------------- 1.3/1.3 MB 2.1 MB/s eta 0:00:00
Collecting pathspec>=0.9.0
  Downloading pathspec-0.11.2-py3-none-any.whl (29 kB)
Collecting mypy-extensions>=0.4.3
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Collecting tomli>=1.1.0
  Downloading tomli-2.0.1-py3-none-any.whl (12 kB)
Collecting packaging>=22.0
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
     ---------------------------------------- 53.0/53.0 kB 1.3 MB/s eta 0:00:00
Installing collected packages: tomli, pathspec, packaging, mypy-extensions, black
  Attempting uninstall: packaging
    Found existing installation: packaging 21.3
    Uninstalling packaging-21.3:
      Successfully uninstalled packaging-21.3
Successfully installed black-23.11.0 mypy-extensions-1.0.0 packaging-23.2 pathspec-0.11.2 tomli-2.0.1


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-cloud-appengine-logging 1.1.0 requires google-api-core[grpc]<3.0.0dev,>=1.22.2, which is not installed.
drf-yasg 1.20.0 requires Django>=2.2.16, which is not installed.
mapillary 1.0.11 requires bleach==3.3.0, but you have bleach 5.0.1 which is incompatible.
mapillary 1.0.11 requires certifi==2021.5.30, but you have certifi 2022.12.7 which is incompatible.
mapillary 1.0.11 requires packaging==21.3, but you have packaging 23.2 which is incompatible.
mapillary 1.0.11 requires protobuf==3.17.3, but you have protobuf 3.20.3 which is incompatible.
mapillary 1.0.11 requires tqdm==4.61.1, but you have tqdm 4.65.0 which is incompatible.
mapillary 1.0.11 requires urllib3==1.26.5, but you have urllib3 1.26.16 which is incompatible.


# Single record analysis
We analyze just one record from the dataset. Later we will analyze all the records.


In [None]:
# Load record

record_num = 212
record, ann =  load_record(record_num)
fs = record.fs

total_time = 34 # seconds
offset = 80 # seconds
samples = int(total_time * fs)
start_samples = int(offset * fs)
end_samples = start_samples + samples

# Discard first n seconds
discard_samples = 2 * fs # 2 seconds
first_used_sample = start_samples + discard_samples
last_used_sample = end_samples - discard_samples

In [None]:
# ECG signal
ecg = pd.Series(record.p_signal[:,0], dtype=ECG_TYPE)[start_samples:end_samples]
ann_beat_indexes = pd.Series(ann.sample, dtype=INDEX_TYPE)
ann_beat_symbols= pd.Series(ann.symbol, dtype=ANN_TYPE)

# Derised annotations (N - normal beat)
derised_anns = ['N', 'P', 'f', 'L', 'R', 'Q']

# Mask for time window and derised annotations
mask_derised_ann = ann_beat_symbols.isin(derised_anns)

# We are only interested in samples in the time window
mask_time_window = (ann_beat_indexes >= start_samples) & (ann_beat_indexes < end_samples)

mask_used_ann = mask_time_window & mask_derised_ann

# Apply mask
ann_beat_indexes = ann_beat_indexes[mask_used_ann].reset_index(drop = True)
ann_beat_symbols = ann_beat_symbols[mask_used_ann].reset_index(drop = True)

df_beats  = correct_peaks(ecg, ann_beat_indexes, fs)


df_beats = df_beats.rename(columns = {'index' : 'peak_index', 'local_max' : 'cor_peak_index'}).merge(
    pd.DataFrame({'peak_index' : ann_beat_indexes, 'symbol' : ann_beat_symbols}), on = 'peak_index', how = 'left', validate = 'one_to_one')

# If the peak is not corrected, use the original peak index
df_beats.loc[df_beats.cor_peak_index.isna(), 'cor_peak_index'] = df_beats.peak_index

df_beats.tail()

In [None]:
methods=  ['hamilton2002', 'kalidas2017', 'rodrigues2020']

dict_results = {}

for method in methods:
    method_beat_indexes = find_peaks(ecg, fs, method)
    # Fix index
    method_beat_indexes += start_samples
    df_method_beats = correct_peaks(ecg, method_beat_indexes, fs)


    # When the method fails to detect a peak, the index is set to NaN. We replace it with the original index.
    df_method_beats.loc[df_method_beats.local_max.isna(), 'local_max'] = df_method_beats.local_max

    local_max = df_method_beats.local_max
    df_method_beats = df_method_beats[(local_max >= first_used_sample) & (local_max < last_used_sample)]
    # Store results in dict
    dict_results[method] = df_method_beats
    #dict_results[method] = pd.Series(result)


# Now the operations are performed on the time window, we can discard the first and last n seconds

ecg = ecg.loc[first_used_sample:last_used_sample]
df_beats = df_beats[(df_beats.peak_index >= first_used_sample) & (df_beats.peak_index <= last_used_sample)].reset_index(drop = True)

In [None]:
# Plot the ECG signal

x_xis_factor = 1 #1/fs # 1/fs = seconds, 1 = samples

fig = go.Figure()
fig.add_trace(go.Scatter(x=ecg.index*x_xis_factor, y=ecg, name="ECG"))


for desired_ann in derised_anns:
    # Get the samples of the desired annotations
    df_beats_desired = df_beats[df_beats.symbol == desired_ann]
    desired_peak_indexes = df_beats_desired.cor_peak_index
    
    # Plot the annotations
    fig.add_trace(go.Scatter(x=desired_peak_indexes*x_xis_factor, y=ecg.loc[desired_peak_indexes], mode="markers", name=desired_ann, marker=dict(size=7, color="red")))


# Plot the R peaks from the methods
for method in methods:
    df_method_beats = dict_results[method]
    peak_indexes = df_method_beats.local_max
    fig.add_trace(go.Scatter(x=peak_indexes*x_xis_factor, y=ecg.loc[peak_indexes], mode="markers", name=method, marker=dict(size=7)))

#Define x zoom
fig.update_xaxes(range=[first_used_sample * x_xis_factor, (first_used_sample + 10 * fs) * x_xis_factor ])

# Remove borders
fig.update_layout(
    margin=dict(l=0, r=0, t=15, b=0),
    paper_bgcolor="white",
)

# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeslider=dict(
            visible=True
        ),
        type="linear"
    )
)

In [None]:
for method in methods:
    true_positive = ((df_comp_methods['ann'] == True) & (df_comp_methods[method]) == True).values.sum()
    true_negative = ((df_comp_methods['ann'] == False) & (df_comp_methods[method]) == False).values.sum()
    false_positive = ((df_comp_methods['ann'] == False) & (df_comp_methods[method] == True)).values.sum()
    false_negative = ((df_comp_methods['ann'] == True) & (df_comp_methods[method] == False)).values.sum()


    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative)


    print(f'{method}:\n\tPrecision: {100*precision:.2f} %\n\tRecall: {100*recall:.2f} %\n\tAccuracy: {100*accuracy:.2f} %\n')

In [None]:
total_time = 30 # seconds
offset = 17 # seconds
samples = int(total_time * fs)
start_samples = int(offset * fs)
end_samples = start_samples + samples

# Discard first n seconds
discard_samples = 2 * fs # 2 seconds
first_used_sample = start_samples + discard_samples

for idx, row in df_record_lead_ann.iterrows():
    if row['upper_signal'] == 'MLII':
        signal_track = 0
    elif row['lower_signal'] == 'MLII':
        signal_track = 1
    else:
        continue
    
    record_num = row['record']
    record, ann =  load_record(record_num)
    fs = record.fs
    raise