# setup

In [1]:
import os

if os.getcwd().split('/')[-1] == 'notebooks':
    os.chdir('..')

In [2]:
import numpy as np
import h5py

from tqdm import tqdm

In [3]:
from data_loader.util import load_label_files, load_challenge_data, resample, slide_and_cut_beat_aligned

# read

In [4]:
# Define the weights, the SNOMED CT code for the normal class, and equivalent SNOMED CT codes.
weights_file = 'weights.csv'
normal_class = '426783006'
equivalent_classes = [['713427006', '59118001'], ['284470004', '63593006'], ['427172004', '17338001']]

In [5]:
input_directory_label = '/home/josegfer/datasets/challenge2020/data'
label_dir = '/home/josegfer/datasets/challenge2020/data'
# Find the label files.
print('Finding label and output files...')
label_files = load_label_files(input_directory_label)

Finding label and output files...


# loop

In [6]:
num_files = len(label_files)
# num_files = 5
n_lead = 12
n_segment = 10
beat_length = 400

In [7]:
h5f = h5py.File('data/challenge2020.h5', 'w')
X = h5f.create_dataset('recording', (num_files, n_lead, n_segment, beat_length), dtype='f8')
r = h5f.create_dataset('ratio', (num_files, 1, n_segment), dtype='f8')

In [8]:
for i in tqdm(range(num_files)):
    recording, header, name = load_challenge_data(label_files[i], label_dir)
    recording[np.isnan(recording)] = 0

    # divide ADC_gain and resample
    recording = resample(recording, header, 500)

    # slide and cut
    recording, info2save = slide_and_cut_beat_aligned(recording, 1, 5000, 500,
                                                    seg_with_r=False, beat_length=400)
    X[i, :, :, :] = np.transpose(recording, (0, 2, 1, 3))
    r[i, :, :] = info2save

100%|██████████| 42976/42976 [27:11<00:00, 26.34it/s]   


In [9]:
h5f.close()