# Arrhythmia

This example demostrates how to create a dataset for arrhythmia classification. It is available on [GitHub](https://github.com/devnums/pyheartlib/blob/main/examples/).

<a target="_blank" href="https://colab.research.google.com/github/devnums/pyheartlib/blob/main/examples/dataset/arrhythmia.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> 

### Install pyheartlib

In [None]:
try:
    import pyheartlib
    print('pyheartlib is already installed')
except ModuleNotFoundError:
    print('Installing pyheartlib...')
    %pip install pyheartlib

pyheartlib is already installed


### Download raw data

In [None]:
# download the raw data and store them in the data directory
from pathlib import Path
if not Path('data').is_dir():
  print('downloading raw data...')
  import io, zipfile
  from urllib.request import urlopen
  url='https://www.physionet.org/static/published-projects/mitdb/mit-bih-arrhythmia-database-1.0.0.zip'
  with urlopen(url) as rs:
      zipf = zipfile.ZipFile(io.BytesIO(rs.read()))
      zipf.extractall('data/')

  # create the config file, for this example just download it from the original repository
  with urlopen("https://raw.githubusercontent.com/devnums/pyheartlib/main/src/pyheartlib/config.yaml") as file:
      content = file.read().decode()
  with open("data/config.yaml", 'w') as file:
      file.write(content)

### Create dataset

In [None]:
import numpy as np
import pandas as pd
from pyheartlib.io import load_data
from pyheartlib.data_arrhythmia import ArrhythmiaData, ECGSequence

train_set = [201, 203]

arrhythmia_data = ArrhythmiaData(
    base_path="data", remove_bl=False, lowpass=False, progress_bar=False
)
annotated_records, samples_info = arrhythmia_data.save_samples(
    rec_list=train_set, file_name="train.arr", win_size=3600, stride=64
)
annotated_records, samples_info = load_data("data/train.arr")

labels = []
for sample in samples_info:
    labels.append(sample[3])
df = pd.DataFrame(np.unique(labels, return_counts=True), index=["Label", "Count"])
print(df)
class_labels = list(np.unique(labels))


File saved at: data/train.arr
File loaded from: data/train.arr
           0     1     2     3
Label  (AFIB  (AFL    (N    (T
Count  10330  1656  3594  1584


In [None]:
# if raw=True
trainseq = ECGSequence(
    annotated_records, samples_info, class_labels=None, batch_size=3, raw=True
)
bt = 0  # batch number
batch_label = trainseq[bt][1]  # excerpt label
batch_seq = trainseq[bt][0][0]  # excerpt values
batch_rri = trainseq[bt][0][1]  # rr intervals
batch_rri_feat = trainseq[bt][0][2]  # calculated rri features
print("batch_label shape:", batch_label.shape)
print(
    "batch_seq shape:",
    batch_seq.shape,
    " , batch_rri shape:",
    batch_rri.shape,
    " , batch_rri_feat shape:",
    batch_rri_feat.shape,
)

batch_label shape: (3,)
batch_seq shape: (3, 3600)  , batch_rri shape: (3, 150)  , batch_rri_feat shape: (3, 9)


In [None]:
# if raw=False
trainseq = ECGSequence(
    annotated_records, samples_info, class_labels=None, batch_size=3, raw=False
)
bt = 0  # batch number
batch_label = trainseq[bt][1]  # excerpt label
batch_seq = trainseq[bt][0][0]  # excerpt values
batch_rri = trainseq[bt][0][1]  # rr intervals
batch_rri_feat = trainseq[bt][0][2]  # calculated rri features
print("batch_label shape:", batch_label.shape)
print(
    "batch_seq shape:",
    batch_seq.shape,
    " , batch_rri shape:",
    batch_rri.shape,
    " , batch_rri_feat shape:",
    batch_rri_feat.shape,
)

batch_label shape: (3,)
batch_seq shape: (3, 100, 14)  , batch_rri shape: (3, 150)  , batch_rri_feat shape: (3, 9)
