In [1]:
import os
import tensorflow as tf

from heartkit.tasks import TaskFactory
from typing import Type, TypeVar
from argdantic import ArgField, ArgParser
from pydantic import BaseModel
from heartkit.utils import env_flag, set_random_seed, setup_logger

from heartkit.tasks.AFIB_Ident.utils import (
    create_model,
    load_datasets,
    load_test_datasets,
    load_train_datasets,
    prepare,
)

from heartkit.defines import (
    HKDemoParams
)
from heartkit.tasks.AFIB_Ident.defines import (
    get_class_mapping,
    get_class_names,
    get_class_shape,
    get_classes,
    get_feat_shape,
)

cli = ArgParser()
B = TypeVar("B", bound=BaseModel)


def parse_content(cls: Type[B], content: str) -> B:
    """Parse file or raw content into Pydantic model.

    Args:
        cls (B): Pydantic model subclasss
        content (str): File path or raw content

    Returns:
        B: Pydantic model subclass instance
    """
    if os.path.isfile(content):
        with open(content, "r", encoding="utf-8") as f:
            content = f.read()

    return cls.model_validate_json(json_data=content)


config = 'configs/arrhythmia-100class-2.json'
params = parse_content(HKDemoParams, config)


params.seed = set_random_seed(params.seed)
params.data_parallelism = 8

class_names = get_class_names(params.num_classes)
class_map = get_class_mapping(params.num_classes)
input_spec = (
    tf.TensorSpec(shape=get_feat_shape(params.frame_size), dtype=tf.float32),
    tf.TensorSpec(shape=get_class_shape(params.frame_size, params.num_classes), dtype=tf.int32),
)

datasets = load_datasets(
    ds_path=params.ds_path,
    frame_size=params.frame_size,
    sampling_rate=params.sampling_rate,
    class_map=class_map,
    spec=input_spec,
    datasets=params.datasets,
)

# this is where they get the test signal and the label
test_x, test_y = load_test_datasets(datasets=datasets, params=params)

2024-03-08 10:24:31.371638: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-08 10:24:31.375092: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-08 10:24:31.412030: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-08 10:24:31.412061: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-08 10:24:31.413306: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [18]:
patient_ids = datasets[0].get_test_patient_ids()
signal_label = next(datasets[0].signal_label_generator(datasets[0].uniform_patient_generator(patient_ids=patient_ids, repeat=False)))
x = signal_label[0]
x.shape[0]



400

In [53]:
import numpy as np
patient_ids = datasets[0].get_test_patient_ids()
pat_gen = datasets[0].uniform_patient_generator(patient_ids=patient_ids, repeat=False)
first_pat = next(pat_gen)

segment = first_pat[1][np.random.choice(list(first_pat[1].keys()))]
# rlabels are the rhythm type
segment["rlabels"]

<HDF5 dataset "rlabels": shape (170, 2), type "<i4">

In [55]:
rlabels = segment["rlabels"][:]
rlabels
xs, xe, xl = rlabels[0::2, 0], rlabels[1::2, 0], rlabels[0::2, 1]

In [57]:
class_map

{<HeartRhythm.normal: 0>: 0,
 <HeartRhythm.afib: 1>: 1,
 <HeartRhythm.aflut: 2>: 1}

In [58]:
tgt_labels = list(set(class_map.values()))

In [56]:
for tgt_idx, tgt_class in enumerate(tgt_labels):
    idxs = np.where((xe - xs >= input_size) & (xl == tgt_class))
    seg_vals = np.vstack((seg_idx * np.ones_like(idxs), xs[idxs], xe[idxs])).T
    pt_tgt_seg_map[tgt_idx] += seg_vals.tolist()

(array([     52,    3962,   18663,   34626,   57329,   71571,   76812,
          82715,   87918,  104960,  112756,  132606,  137982,  152348,
         202974,  213073,  218853,  228933,  232088,  249612,  258502,
         263270,  266398,  274437,  280667,  285314,  294555,  305669,
         320068,  322925,  338470,  347251,  364046,  447457,  466155,
         470569,  477145,  486670,  505660,  511791,  519046,  530079,
         539286,  543507,  554905,  577473,  600118,  623972,  631531,
         691403,  707933,  712483,  736607,  748321,  760132,  776557,
         782432,  788802,  800985,  814846,  822109,  825522,  830221,
         835957,  843167,  852011,  859901,  867537,  870036,  875985,
         883043,  890699,  895353,  915967,  930465,  954581,  966862,
         982178,  989543,  998418, 1001865, 1012333, 1018545, 1032639,
        1043838], dtype=int32),
 array([   2769,   16631,   31710,   56096,   69703,   75658,   81539,
          86725,  103837,  111611,  128868,  

In [8]:
patient_ids = ds.get_test_patient_ids()
# this is where the signal being extracted by the first patient_id, patient_data
signal_label = next(ds.signal_label_generator(ds.uniform_patient_generator(patient_ids=patient_ids, repeat=False)))

AttributeError: 'list' object has no attribute 'get_test_patient_ids'

In [7]:
test_y
y_prob = tf.nn.softmax(model.predict(test_x)).numpy()
y_pred = np.argmax(y_prob, axis=-1)
test_acc = np.sum(y_pred == y_true) / len(y_true)

array([[1, 0],
       [1, 0],
       [1, 0],
       ...,
       [1, 0],
       [0, 1],
       [0, 1]], dtype=int32)

In [4]:
import numpy as np

y_true = np.argmax(test_y, axis=-1)
y_true

array([0, 0, 0, ..., 0, 1, 1])

In [3]:
test_x

array([[[ 1.3694090e-01],
        [ 6.9739319e-02],
        [-1.0512085e-01],
        ...,
        [-1.0516641e+00],
        [-7.4819469e-01],
        [-1.3595276e-03]],

       [[-6.5554589e-02],
        [-1.2801187e+00],
        [-2.4050586e+00],
        ...,
        [-2.2867651e-01],
        [-2.4683766e-02],
        [ 1.1924816e-01]],

       [[-1.1958856e-01],
        [ 1.0844608e-01],
        [ 1.4857377e-01],
        ...,
        [ 3.3686897e-01],
        [ 2.2104344e-01],
        [ 5.0419647e-02]],

       ...,

       [[ 2.3302076e-02],
        [ 4.0059674e-01],
        [ 6.0084641e-01],
        ...,
        [ 1.9712287e-01],
        [ 9.5048994e-02],
        [-6.3366622e-02]],

       [[ 1.1873671e-03],
        [-8.3239913e-02],
        [-1.9081929e-01],
        ...,
        [-3.5842064e-01],
        [-4.3141744e-01],
        [-2.1297677e-01]],

       [[ 1.3475962e-01],
        [-1.2744451e+00],
        [-1.5975510e+00],
        ...,
        [-4.0347967e+00],
        [-3.768

In [2]:
test_y

array([[1, 0],
       [1, 0],
       [1, 0],
       ...,
       [1, 0],
       [0, 1],
       [0, 1]], dtype=int32)

## We need a method to connect back the rlabels to the demo data

In [1]:
import os

from heartkit.tasks import TaskFactory
from typing import Type, TypeVar
from argdantic import ArgField, ArgParser
from pydantic import BaseModel

from heartkit.defines import (
    HKDemoParams,
    HKDownloadParams,
    HKExportParams,
    HKMode,
    HKTestParams,
    HKTrainParams,
)


cli = ArgParser()
B = TypeVar("B", bound=BaseModel)


def parse_content(cls: Type[B], content: str) -> B:
    """Parse file or raw content into Pydantic model.

    Args:
        cls (B): Pydantic model subclasss
        content (str): File path or raw content

    Returns:
        B: Pydantic model subclass instance
    """
    if os.path.isfile(content):
        with open(content, "r", encoding="utf-8") as f:
            content = f.read()

    return cls.model_validate_json(json_data=content)

task="AFIB_Ident"
task_handler = TaskFactory.get(task)

# In demo we will cover 5 regions at a time, frame_size*5
config = 'configs/arrhythmia-test.json'


2024-03-14 12:00:24.433413: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-14 12:00:24.436969: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-14 12:00:24.478222: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-14 12:00:24.478285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-14 12:00:24.479703: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
task_handler.evaluate(parse_content(HKTestParams, config))

Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.




## Get all the mis-predicted results from the model prediction

In [2]:
task_handler.export(parse_content(HKExportParams, config))

Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.


INFO:tensorflow:Assets written to: /tmp/tmpucadpn8k/assets


2024-03-14 12:01:40.732215: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-03-14 12:01:40.732257: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-03-14 12:01:40.733058: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpucadpn8k
2024-03-14 12:01:40.742531: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-03-14 12:01:40.742555: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpucadpn8k
2024-03-14 12:01:40.764211: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-03-14 12:01:40.772540: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-03-14 12:01:41.059287: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpucadpn8k
2024-03

INFO:tensorflow:Assets written to: /tmp/tmpcvkvf6og/assets


2024-03-14 12:03:13.365300: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-03-14 12:03:13.365352: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-03-14 12:03:13.365569: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpcvkvf6og
2024-03-14 12:03:13.380799: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-03-14 12:03:13.380828: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpcvkvf6og
2024-03-14 12:03:13.412159: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-03-14 12:03:13.712895: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpcvkvf6og
2024-03-14 12:03:13.800888: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 435319 



ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 2, the array at index 0 has size 400 and the array at index 1 has size 1

In [22]:
import pandas as pd
import numpy as np


# those non-labeled peaks are not documented here
overall_stats = pd.read_parquet('statistics/Icentia11k_global_stats.parquet')

overall_stats['pt']

0           9517
1           9517
2           9517
3           9517
4           9517
            ... 
17236062    8490
17236063    8490
17236064    8490
17236065    8490
17236066    8490
Name: pt, Length: 17236067, dtype: int64

In [23]:
np.bincount(overall_stats['pt'])

array([ 875, 2189, 2338, ..., 1413, 1936, 2032])

In [23]:
np.bincount(overall_stats['rhythm'])

array([       0, 16074753,   848238,   313076])

In [8]:
import numpy as np

mislabeled_data = np.load('results/arrhythmia-test/mislabeled_data.npy')

predict_labels = np.load('results/arrhythmia-test/predicted_labels.npy')

true_labels = np.load('results/arrhythmia-test/true_labels.npy')

In [11]:
len(mislabeled_data) == len(predict_labels) == len(true_labels)

True

## Longer window prediction evaluation

In [2]:
import os

from heartkit.tasks import TaskFactory
from typing import Type, TypeVar
from argdantic import ArgField, ArgParser
from pydantic import BaseModel

from heartkit.defines import (
    HKDemoParams
)


cli = ArgParser()
B = TypeVar("B", bound=BaseModel)


def parse_content(cls: Type[B], content: str) -> B:
    """Parse file or raw content into Pydantic model.

    Args:
        cls (B): Pydantic model subclasss
        content (str): File path or raw content

    Returns:
        B: Pydantic model subclass instance
    """
    if os.path.isfile(content):
        with open(content, "r", encoding="utf-8") as f:
            content = f.read()

    return cls.model_validate_json(json_data=content)

task="AFIB_Ident"
task_handler = TaskFactory.get(task)

# In demo we will cover 5 regions at a time, frame_size*5
config = 'configs/arrhythmia-100class-2.json'


In [8]:
import tensorflow as tf

from heartkit.rpc.backends import EvbBackend, PcBackend
from heartkit.defines import HeartRhythm
from enum import IntEnum
from heartkit.defines import (
    HKDemoParams,
    HKDownloadParams,
    HKExportParams,
    HKMode,
    HKTestParams,
    HKTrainParams,
)


from heartkit.tasks.AFIB_Ident.defines import (
    get_class_mapping,
    get_class_names,
    get_class_shape,
    get_classes,
    get_feat_shape,
)

from heartkit.tasks.AFIB_Ident.utils import (
    create_model,
    load_datasets,
    load_test_datasets,
    load_train_datasets,
    prepare,
)


bg_color = "rgba(38,42,50,1.0)"
primary_color = "#11acd5"
plotly_template = "plotly_dark"

color_dict = {
    -1: "#505050",  # Grey color for -1
    0: "#11acd5",  # Blue color for 0
    1: "#ce6cff",  # Purple color for 1
    2: "#a1d34f"   # Green color for 2
}

params=parse_content(HKTrainParams, config)

# Load backend inference engine
BackendRunner = EvbBackend if params.backend == "evb" else PcBackend
runner = BackendRunner(params=params)

# Load data
class_names = get_class_names(params.num_classes)
class_map = get_class_mapping(params.num_classes)
input_spec = (
    tf.TensorSpec(shape=get_feat_shape(params.frame_size), dtype=tf.float32),
    tf.TensorSpec(shape=get_class_shape(params.frame_size, params.num_classes), dtype=tf.int32),
)

class IcentiaRhythm(IntEnum):
    """Icentia rhythm labels"""

    noise = 0
    normal = 1
    afib = 2
    aflut = 3
    end = 4

HeartRhythmMap = {
    IcentiaRhythm.noise: HeartRhythm.noise,
    IcentiaRhythm.normal: HeartRhythm.normal,
    IcentiaRhythm.afib: HeartRhythm.afib,
    IcentiaRhythm.aflut: HeartRhythm.aflut,
    IcentiaRhythm.end: HeartRhythm.noise,
}

tgt_map = {k: class_map.get(v, -1) for (k, v) in HeartRhythmMap.items()}
row_idx = 0
n_hour=1
hour_frame = 15 * 60 #one minute * 60

datasets = load_datasets(
    ds_path=params.ds_path,
    frame_size=params.frame_size,
    sampling_rate=params.sampling_rate,
    class_map=class_map,
    spec=input_spec,
    datasets=params.datasets,
)
test_x, test_y = load_test_datasets(datasets=datasets, params=params)
y_true = np.argmax(test_y, axis=-1)

In [49]:
# 0-9999 for train, 10000-10999 for test
print(f"{datasets[0].get_train_patient_ids()}, {datasets[0].get_test_patient_ids()}")

[   0    1    2 ... 9997 9998 9999], [10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011
 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023
 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035
 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047
 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059
 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071
 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083
 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095
 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107
 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119
 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131
 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143
 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155
 10156 10157 1

In [None]:
datasets[0].load_test_dataset(
                test_pt_samples=params.test_samples_per_patient,
                # preprocess=preprocess,
                num_workers=params.data_parallelism,
            )

In [38]:
ds = datasets[0].load_test_dataset(
                test_pt_samples=params.test_samples_per_patient,
                # preprocess=preprocess,
                num_workers=params.data_parallelism,
            )

test = ds.batch(params.test_size)

In [22]:
from collections import Counter

y_true = np.argmax(test_y, axis=-1)
# Flatten the array
flat_arr = y_true.flatten()

# Count the frequency of each value
value_counts = Counter(flat_arr)

# Print the frequency of each value
for value, count in value_counts.items():
    print(f"Value: {value}, Count: {count}")

Value: 0, Count: 10636
Value: 1, Count: 9364


In [6]:
import numpy as np

train_ds, val_ds = load_train_datasets(datasets=datasets, params=params)

test_labels = [label.numpy() for _, label in val_ds]
y_true = np.argmax(np.concatenate(test_labels), axis=-1)

In [7]:
len(train_ds)

TypeError: The dataset length is unknown.

In [47]:
[label.numpy() for _, label in train_ds]

2024-03-14 17:14:40.760584: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffer (this may take a while): 45 of 100000
2024-03-14 17:15:01.089680: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffer (this may take a while): 3043 of 100000
2024-03-14 17:15:11.745199: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffer (this may take a while): 4682 of 100000
2024-03-14 17:15:30.319373: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffer (this may take a while): 7860 of 100000
2024-03-14 17:15:40.396381: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffer (this may take a while): 9588 of 100000
2024-03-14 17:16:00.333193: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:396: Filling up shuffle buffe

In [29]:
patient_ids = ds.get_test_patient_ids()
patient_ids[-1]

10999

In [18]:
patient_ids

array([10000, 10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008,
       10009, 10010, 10011, 10012, 10013, 10014, 10015, 10016, 10017,
       10018, 10019, 10020, 10021, 10022, 10023, 10024, 10025, 10026,
       10027, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035,
       10036, 10037, 10038, 10039, 10040, 10041, 10042, 10043, 10044,
       10045, 10046, 10047, 10048, 10049, 10050, 10051, 10052, 10053,
       10054, 10055, 10056, 10057, 10058, 10059, 10060, 10061, 10062,
       10063, 10064, 10065, 10066, 10067, 10068, 10069, 10070, 10071,
       10072, 10073, 10074, 10075, 10076, 10077, 10078, 10079, 10080,
       10081, 10082, 10083, 10084, 10085, 10086, 10087, 10088, 10089,
       10090, 10091, 10092, 10093, 10094, 10095, 10096, 10097, 10098,
       10099, 10100, 10101, 10102, 10103, 10104, 10105, 10106, 10107,
       10108, 10109, 10110, 10111, 10112, 10113, 10114, 10115, 10116,
       10117, 10118, 10119, 10120, 10121, 10122, 10123, 10124, 10125,
       10126, 10127,