# Rpeak

This example demostrates how to create an rpeak dataset.  It is available on [GitHub](https://github.com/devnums/pyheartlib/blob/main/examples/).

<a target="_blank" href="https://colab.research.google.com/github/devnums/pyheartlib/blob/main/examples/dataset/rpeak.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install pyheartlib

In [None]:
try:
    import pyheartlib
    print('pyheartlib is already installed')
except ModuleNotFoundError:
    print('Installing pyheartlib...')
    !pip install pyheartlib

pyheartlib is already installed


### Download raw data

In [None]:
# download the raw data and store them in the data directory
from pathlib import Path
if not Path('data').is_dir():
  print('downloading raw data...')
  import io, zipfile
  from urllib.request import urlopen
  url='https://www.physionet.org/static/published-projects/mitdb/mit-bih-arrhythmia-database-1.0.0.zip'
  with urlopen(url) as rs:
      zipf = zipfile.ZipFile(io.BytesIO(rs.read()))
      zipf.extractall('data/')

  # create the config file, for this example just download it from the original repository
  with urlopen("https://raw.githubusercontent.com/devnums/pyheartlib/main/src/pyheartlib/config.yaml") as file:
      content = file.read().decode()
  with open("data/config.yaml", 'w') as file:
      file.write(content)

### Create dataset

In [None]:
import numpy as np
import pandas as pd
from pyheartlib.io import load_data
from pyheartlib.data_rpeak import RpeakData, ECGSequence

train_set = [201, 203]

rpeak_data = RpeakData(
    base_path="data", remove_bl=False, lowpass=False, progress_bar=False
)
annotated_records, samples_info = rpeak_data.save_samples(
    rec_list=train_set, file_name="train.rpeak", win_size=30 * 360, stride=360
)
annotated_records, samples_info = load_data("data/train.rpeak")

labels = []
for sample in samples_info:
    labels.append(sample[3])
df = pd.DataFrame(np.unique(labels, return_counts=True), index=["Label", "Count"])
print(df)

File saved at: data/train.rpeak
File loaded from: data/train.rpeak
            0    1   2   3       4    5      6     7    8
Label       0    A   F   J       N    Q      V     a    j
Count  919749  900  90  30  122358  120  19163  2923  267


In [None]:
# if raw=True
trainseq = ECGSequence(
    annotated_records, samples_info, binary=False, batch_size=3, raw=True, interval=72
)
bt = 0  # batch number
batch_y = trainseq[bt][1]  # excerpt label
batch_x = trainseq[bt][0]  # excerpt values
print("batch_x shape:", batch_x.shape, ", batch_y shape:", batch_y.shape)

batch_x shape: (3, 10800) , batch_y shape: (3, 150)


In [None]:
# if raw=False
trainseq = ECGSequence(
    annotated_records, samples_info, binary=False, batch_size=3, raw=False, interval=72
)
bt = 0  # batch number
batch_y = trainseq[bt][1]  # excerpt label
batch_x = trainseq[bt][0]  # excerpt values
print("batch_x shape:", batch_x.shape, ", batch_y shape:", batch_y.shape)


batch_x shape: (3, 150, 14) , batch_y shape: (3, 150)
