# Loads a dataset, and visualizes cuts for a visual check.

Set start index to where you would like to start.

In [8]:
import functools
import os
from pathlib import Path

import numpy as np
import scipy
from cci.utils import project_dir
import polars as pl
import rerun as rr
from rerun.blueprint import (
    Blueprint,
    BlueprintPanel,
    SelectionPanel,
    TimePanel,
    TimeSeriesView,
    Viewport,
)

# NOTE: Change this to folder containing .mat files
OOCHA_DIR = Path(os.environ["OOCHA_DIR"])

DATASET_FOLDER = project_dir() / "data"
COLOR = {
    "HAS": [82, 239, 153],
    "HVT": [17, 94, 65],
    "HVF": [148, 210, 207],
    "HPE": [44, 69, 125],
    "HPR": [209, 121, 248],
    "DAS": [153, 28, 100],
    "DVF": [172, 130, 180],
    "DVT": [102, 51, 180],
    "DPE": [238, 200, 241],
    "DPR": [41, 140, 192],
    "CAS": [183, 209, 101],
    "CVT": [58, 166, 9],
    "CVF": [44, 245, 43],
    "CPE": [93, 64, 48],
    "CPR": [254, 183, 134],
    "cas": [177, 75, 50],
    "cvt": [251, 45, 76],
    "cvf": [251, 189, 19],
    "cpe": [116, 141, 19],
    "cpr": [37, 128, 254],
    "hun": [246, 18, 168],
    "cun": [194, 24, 241],
    "dfb": [160, 127, 61],
    "hvf": [194, 24, 241],
    "Dvf": [160, 127, 61],
    "hpe": [209, 121, 248],
}


@functools.lru_cache(maxsize=4)
def get_signal(signal_path: Path):
    return scipy.io.loadmat(
        signal_path,
        simplify_cells=True,
    )["SIGNALS"]["ecg_diff"].astype(np.float32)


blueprint = Blueprint(
    Viewport(
        TimeSeriesView(
            name="ECG",
        )
    ),
    BlueprintPanel(expanded=True),
    SelectionPanel(expanded=False),
    TimePanel(expanded=False),
)

Load dataset as an iterator

In [9]:
START_INDEX = 419
dfb_df = pl.read_csv(DATASET_FOLDER / "dfb.csv")
iterator = iter(
    dfb_df.select(
        [
            "index",
            "file",
            "EPI",
            "epi1",
            "epi2",
            "epi3",
            "Start",
            "start1",
            "start2",
            "start3",
            "Stop",
            "stop1",
            "stop2",
            "stop3",
        ]
    ).rows()
)

[next(iterator) for _ in range(START_INDEX)];

Run to visualize next sample

In [16]:
index, file, epi, epi1, epi2, epi3, start, start1, start2, start3, stop, stop1, stop2, stop3 = next(iterator)
signal = get_signal(OOCHA_DIR / f"{file}.mat")


def clamp(v, minimum=-4, maximum=4):
    return max(
        minimum,
        min(v, maximum),
    )


stop3 = clamp(stop3, start3 + 9000)

rr.init(f"{index}_{file}", spawn=True, blueprint=blueprint)

tick = 0
for i, (epi, start, stop) in enumerate(
    zip([epi, epi1, epi2, epi3], [start, start1, start2, start3], [stop, stop1, stop2, stop3])
):
    lname = f"{epi}_{i}"
    rr.log(lname, rr.SeriesLine(color=COLOR[epi], name=epi), timeless=True)
    for v in signal[start:stop]:
        rr.set_time_seconds("step", tick)
        rr.log(
            lname,
            rr.Scalar(clamp(v)),
        )
        tick += 1 / 500

Found existing process on port 9876. Trying to connect.


In [4]:
completed_epis = [
    # AS
    ("AS", "VT"),
    ("AS", "VF"),
    ("AS", "PE"),
    ("AS", "PR"),
    # VT
    ("VT", "AS"),
    ("VT", "VF"),
    ("VT", "PE"),
    ("VT", "PR"),
    # VF
    ("VF", "AS"),
    ("VF", "VT"),
    ("VF", "PE"),
    ("VF", "PR"),
    # PE
    ("PE", "AS"),
    ("PE", "VT"),
    ("PE", "VF"),
    ("PE", "PR"),
    # PR
    ("PR", "AS"),
    ("PR", "VT"),
    ("PR", "VF"),
    ("PR", "PE"),
]

EPI = "PR"
EPI_NEXT = "PE"

df = pl.read_csv(DATASET_FOLDER / "override_df.csv").filter(
    pl.col("EPI") == f"H{EPI}", pl.col("epi1") == f"H{EPI_NEXT}"
)

iterator = iter(df.partition_by(by="file"))


def get_segments(cuts: str):
    segments = scipy.io.loadmat(
        f"../Database/{cuts}_cuts/cuts_{EPI}_{EPI_NEXT}.mat",
        simplify_cells=True,
    )[f"cuts_{EPI}_{EPI_NEXT}"]["segment"]
    collected = []
    for i, segment in enumerate(segments):
        reg_num = i + 1
        ecg = segment["ECG"]
        info = segment["info"]
        file = info["episode_name"]
        start = info["start"]
        stop = info["stop"]
        if cuts == "Original":
            transition = segment["transition_time"]
        else:
            transition = segment["new_transition_time"]
        collected.append(
            {"reg": reg_num, "ecg": ecg, "file": file, "start": start, "stop": stop, "transition": transition}
        )
    return pl.DataFrame(collected)


# org = get_segments("Original")
revised = get_segments("Revised")

df
revised

FileNotFoundError: [Errno 2] No such file or directory: '../Database/Revised_cuts/cuts_PR_PE.mat'

In [5]:
def clamp(v, minimum=-3, maximum=3):
    return max(
        minimum,
        min(v, maximum),
    )


# start = clamp(start, minimum=stop - 9 * 500, maximum=start)
# stop1 = clamp(stop1, minimum=0, maximum=start1 + 9 * 500)


def view_signal(index, file, epi, epi1, start, start1, stop, stop1):
    id = f"{start}/{index}"
    signal = get_signal(OOCHA_DIR / f"{file}.mat")
    for i, (epi, start, stop) in enumerate(zip([epi, epi1], [start, start1], [stop, stop1])):
        tick = start
        lname = f"/{id}/{epi}_{i}"
        rr.log(lname, rr.SeriesLine(color=COLOR[epi], name=epi), timeless=True)
        for v in signal[start:stop]:
            rr.set_time_sequence("step", tick)
            rr.log(
                lname,
                rr.Scalar(clamp(v)),
            )
            tick += 1


def view_alonso(reg, transition, ecg, start, name):
    tick = int(start * 500)
    transition_time = int(transition * 250)
    id = f"{int(start * 500)}/{name}_{reg}"
    for i, (epi, start, stop) in enumerate(
        zip([f"H{EPI}", f"H{EPI_NEXT}"], [0, transition_time], [transition_time + 1, len(ecg)])
    ):
        lname = f"/{id}/{epi}_{i}"
        rr.log(lname, rr.SeriesLine(color=COLOR[epi], name=epi), timeless=True)
        for v in ecg[start:stop]:
            rr.set_time_sequence("step", tick)
            rr.log(
                lname,
                rr.Scalar(clamp(v)),
            )
            tick += 2

In [6]:
df = next(iterator)
file = df.select("file").unique(keep="first").item()
file
rr.init(f"{EPI}_{EPI_NEXT}_{file}", spawn=True)
for index, epi, epi1, start, start1, stop, stop1 in df.select(
    [
        "index",
        "EPI",
        "epi1",
        "Start",
        "start1",
        "Stop",
        "stop1",
    ]
).rows():
    view_signal(index, file, epi, epi1, start, start1, stop, stop1)
# for reg, transition, ecg, start in (
#     original.filter(pl.col("file") == file).select(["reg", "transition", "ecg", "start"]).rows()
# ):
#     view_alonso(reg, transition, ecg, start, "original")
for reg, transition, ecg, start in (
    revised.filter(pl.col("file") == file).select(["reg", "transition", "ecg", "start"]).rows()
):
    view_alonso(reg, transition, ecg, start, "revised")

[2024-05-10T12:07:24Z INFO  egui_wgpu] There were 2 available wgpu adapters: {backend: Vulkan, device_type: DiscreteGpu, name: "AMD Radeon RX 5700 XT (RADV NAVI10)", driver: "radv", driver_info: "Mesa 23.3.6", vendor: 0x1002, device: 0x731F}, {backend: Vulkan, device_type: Cpu, name: "llvmpipe (LLVM 17.0.6, 256 bits)", driver: "llvmpipe", driver_info: "Mesa 23.3.6 (LLVM 17.0.6)", vendor: 0x10005}
[2024-05-10T12:07:24Z INFO  tracing::span] perform;
[2024-05-10T12:07:24Z INFO  zbus::handshake] write_command; command=Auth(Some(External), Some([49, 48, 48, 48]))
[2024-05-10T12:07:24Z INFO  tracing::span] read_command;
[2024-05-10T12:07:24Z INFO  zbus::handshake] write_command; command=NegotiateUnixFD
[2024-05-10T12:07:24Z INFO  tracing::span] read_command;
[2024-05-10T12:07:24Z INFO  zbus::handshake] write_command; command=Begin
[2024-05-10T12:07:24Z INFO  tracing::span] socket reader;
[2024-05-10T12:07:24Z INFO  tracing::span] perform;
[2024-05-10T12:07:24Z INFO  zbus::handshake] write_co

NameError: name 'revised' is not defined

In [11]:
# Original
def view_alonso(cuts: str, epi: str, epi_next: str):
    segments = enumerate(
        iter(
            scipy.io.loadmat(
                f"../Database/{cuts}_cuts/cuts_{epi}_{epi_next}.mat",
                simplify_cells=True,
            )[f"cuts_{epi}_{epi_next}"]["segment"]
        )
    )
    for index, segment in segments:
        index += 1

        file = segment["info"]["episode_name"]
        transition_time = int(segment["transition_time"] * 250)
        ecg = segment["ECG"]

        # Print in our format
        info = segment["info"]
        start = int(info["start"] * 500)
        stop = int(info["stop"] * 500)
        transition = int(start + (segment["transition_time"] * 500))
        print(f"{index} - {file}, {start} - {transition} - {stop}")
        print(f"{segment['transition_time']}")
        tick = start

        # rr.init(f"{index}_{file}", spawn=True, blueprint=blueprint)

        for i, (epi, start, stop) in enumerate(
            zip([f"H{EPI}", f"H{EPI_NEXT}"], [0, transition_time], [transition_time + 1, len(ecg)])
        ):
            lname = f"/{file}/reg_{index}/{epi}_{i}"
            rr.log(lname, rr.SeriesLine(color=COLOR[epi], name=epi), timeless=True)
            for v in ecg[start:stop]:
                rr.set_time_sequence("step", tick)
                rr.log(
                    lname,
                    rr.Scalar(clamp(v)),
                )
                tick += 2


view_alonso("Original", EPI, EPI_NEXT)
view_alonso("Revised", EPI, EPI_NEXT)

1 - a_14, 311168 - 311458 - 317872
0.58
2 - a_23, 831422 - 856626 - 859695
50.408
3 - a_27, 889300 - 898742 - 898887
18.884
4 - a_35, 127599 - 127704 - 130949
0.21
5 - l_61, 613094 - 613205 - 622101
0.222
6 - l_63, 581894 - 600498 - 633536
37.208
7 - l_226, 423140 - 426788 - 429317
7.296
8 - l_313, 1172472 - 1179265 - 1181368
13.586


In [12]:
# Revised
for index, segment in enumerate(
    scipy.io.loadmat(
        f"../Database/Revised_cuts/cuts_{EPI}_{EPI_NEXT}.mat",
        simplify_cells=True,
    )[f"cuts_{EPI}_{EPI_NEXT}"]["segment"]
):
    index += 1

    file = segment["info"]["episode_name"]
    transition_time = int(segment["new_transition_time"] * 250)
    ecg = segment["ECG"]

    # Print in our format
    info = segment["info"]
    start = int(info["start"] * 500)
    stop = int(info["stop"] * 500)
    transition = int(start + (segment["new_transition_time"] * 500))
    print(f"{index} - {file}, {start} - {transition} - {stop}")
    print(f"{segment['new_transition_time']}")
    tick = start

    for i, (epi, start, stop) in enumerate(
        zip([f"H{EPI}", f"H{EPI_NEXT}"], [0, transition_time], [transition_time + 1, len(ecg)])
    ):
        lname = f"/{file}/reg_{index}_revised/{epi}_{i}"
        rr.log(lname, rr.SeriesLine(color=COLOR[epi], name=epi), timeless=True)
        for v in ecg[start:stop]:
            rr.set_time_sequence("step", tick)
            rr.log(
                lname,
                rr.Scalar(clamp(v)),
            )
            tick += 2

1 - a_23, 831422 - 856714 - 859695
50.58467741935484
2 - a_27, 889300 - 897569 - 898887
16.538978494623656
3 - l_226, 423140 - 426040 - 429317
5.800403225806453
4 - l_313, 1172472 - 1179037 - 1181368
13.131048387096778
