In [None]:
import os
import h5py
import socket
import numpy as np
import pandas as pd

from ml4cvd.arguments import _get_tmap
from ml4cvd.TensorMap import TensorMap
from ml4cvd.tensor_maps_ecg import TMAPS, build_ecg_time_series_tensor_maps

In [None]:
TENSOR_EXT = "hd5"

In [None]:
def get_fpaths(dirpath: str, ext: str = TENSOR_EXT) -> list:
    fpaths = []
    for root, dirs, files in os.walk(dirpath):
        for fname in files:
            if not fname.endswith(TENSOR_EXT):
                continue
            else:
                fpaths.append(os.path.join(root, fname))
    print(f"Found {len(fpaths)} {TENSOR_EXT} files at {dirpath}")
    return fpaths

In [None]:
def _get_path_to_ecgs() -> str:
    """Check the hostname of the machine and return the appropriate path.
    If there is no match found, this function does not return anything, and
    the script ends up with a non-viable path prefix to HD5 files and will fail."""
    if "mithril" == socket.gethostname():
        return "/data/ecg"
    elif "anduril" == socket.gethostname():
        return "/media/4tb1/ecg"
    elif "stultzlab" in socket.gethostname():
        return "/storage/shared/ecg_deidentified"

fpaths = get_fpaths(dirpath=os.path.join(_get_path_to_ecgs(), "mgh"))

In [None]:
TMAPS.update(build_ecg_time_series_tensor_maps(needed_tensor_maps=["ecg_age_newest", "ecg_age_binary_newest"]))

for tmap in ["ecg_age_newest", "ecg_age_greater_than_newest"]:
    if tmap not in TMAPS:
        print(f"{tmap} not in TMAPS")

In [None]:
for fpath in fpaths[0:5]:
    print(fpath)
    with h5py.File(fpath, "r") as hf:

        ecg_age = TMAPS['ecg_age_newest'].tensor_from_file(
            tm=TMAPS['ecg_age_newest'],
            hd5=hf)
        print(ecg_age)
                
        ecg_age_binary_newest = TMAPS['ecg_age_binary_newest'].tensor_from_file(
            tm=TMAPS['ecg_age_binary_newest'],
            hd5=hf)
        print(ecg_age_binary_newest)
         
        print('\n')