In [3]:
import numpy as np
import pandas as pd
import os
import json
import scipy.io as sio
import tqdm

In [4]:
TRAIN_DATA_PATH = "/home/jovyan/ecg/examples/cinc17/train.json"
DEV_DATA_PATH = "/home/jovyan/ecg/examples/cinc17/dev.json"

In [5]:
STEP = 256

In [6]:
def load_ecg(record):
    if os.path.splitext(record)[1] == '.npy':
        ecg = np.load(record)
    elif os.path.splitext(record)[1] == '.mat':
        ecg = sio.loadmat(record)['val'].squeeze()
    else:
        with open(record, 'r') as fid:
            ecg = np.fromfile(fid, dtype = np.int16)
    
    trunc_samp = STEP * len(ecg) // STEP    
    return ecg[:trunc_samp]

In [7]:
def load_dataset(data_json):
    with open(data_json, 'r') as fid:
        data = [json.loads(l) for l in fid]
    labels = []; ecgs = []
    
    for d in tqdm.tqdm(data):
        labels.append(d['labels'])
        ecgs.append(load_ecg(d['ecg']))
    
    return ecgs, labels

In [8]:
ecgs, labels = load_dataset(TRAIN_DATA_PATH)

100%|██████████| 7676/7676 [00:01<00:00, 4037.88it/s]


In [12]:
def compute_mean_std(x):
    x = np.hstack(x)
    return np.mean(x).astype(np.float32), np.std(x).astype(np.float32)

In [16]:
def get_data_info(ecg, labels):
    mean, std = compute_mean_std(ecg)
    classes = sorted(set(l for label in labels for l in label))
    int_to_class = dict(zip(range(len(classes)), classes))
    class_to_int = {c : i for i, c in int_to_class.items()}
    return mean, std, int_to_class, class_to_int

In [17]:
mean, std, int_to_class, class_to_int = get_data_info(ecgs, labels)

In [18]:
mean, std

(7.5123463, 237.11787)

In [19]:
len(int_to_class)

4

In [20]:
int_to_class

{0: 'A', 1: 'N', 2: 'O', 3: '~'}