In [1]:
%load_ext autoreload
%autoreload 2

Create a dataframe with all cut information.

In [2]:
import os
from pathlib import Path

import polars as pl
import scipy

from cci.utils import project_dir

DATA_DIR = project_dir() / "data"
DATA_DIR.mkdir(exist_ok=True)

oocha_dir = Path(os.environ["OOCHA_DIR"])
arecs = scipy.io.loadmat(oocha_dir / "arecs.mat", simplify_cells=True)["arecs"]
oohrepr = scipy.io.loadmat(
    oocha_dir / "oohrepr.mat",
    simplify_cells=True,
)["oohrepr"]

#
files = []
epi = []
smp_start = []
smp_stop = []
for f, x, y in zip(arecs, oohrepr["EPI"], oohrepr["SMP"]):
    if isinstance(x, str):
        continue
    for j, k in zip(x, y):
        files.append(f)
        epi.append(j)  # .upper()  # The original file uses both upper and lower case.
        # Lower for noisy signal i think
        smp_start.append(k[0])
        smp_stop.append(k[1])

original_df = (
    pl.LazyFrame(
        {
            "files": files,
            "EPI": epi,
            "SMP_start": smp_start,
            "SMP_stop": smp_stop,
        }
    )
    .filter(pl.col("files").is_not_null())
    .with_columns(
        pl.col("EPI").shift(-1).alias("EPI_1"),
        pl.col("SMP_start").shift(-1).alias("SMP_start_1"),
        pl.col("SMP_stop").shift(-1).alias("SMP_stop_1"),
        pl.col("EPI").shift(-2).alias("EPI_2"),
        pl.col("SMP_start").shift(-2).alias("SMP_start_2"),
        pl.col("SMP_stop").shift(-2).alias("SMP_stop_2"),
    )
    .collect()
)

original_df.write_csv(DATA_DIR / "original.csv")
original_df.head()

files,EPI,SMP_start,SMP_stop,EPI_1,SMP_start_1,SMP_stop_1,EPI_2,SMP_start_2,SMP_stop_2
str,str,i32,i32,str,i32,i32,str,i32,i32
"""S_1""","""un""",1,11188,"""VF""",11189,19352,"""dfb""",19353,20192
"""S_1""","""VF""",11189,19352,"""dfb""",19353,20192,"""VF""",20193,21272
"""S_1""","""dfb""",19353,20192,"""VF""",20193,21272,"""AS""",21273,22846
"""S_1""","""VF""",20193,21272,"""AS""",21273,22846,"""CAS""",22847,38680
"""S_1""","""AS""",21273,22846,"""CAS""",22847,38680,"""AS""",38681,40186


# Min sample length

In [3]:
df = original_df.filter((pl.col("SMP_stop") - pl.col("SMP_start")) > 1500)

print(len(original_df))
print(len(df))

23182
19532


In [4]:
def classify_class_label(df, next_epi: int):
    """Class 0 good, class 1 bad
    next_epi: 1 for EPI_1, 2 for EPI_2
    Desired:
    VF/VT -> PR
    AS -> PR / VF / VT
    PE -> PR
    PR -> sROSC TODO:
    """
    class_label = []
    for epi, next_epi in df.select(["EPI", f"EPI_{next_epi}"]).rows():
        match epi:
            case "AS":
                if next_epi in ["VF", "VT", "PR"]:
                    class_label.append(0)
                else:
                    class_label.append(1)
            case "VF":
                if next_epi in ["PR"]:
                    class_label.append(0)
                else:
                    class_label.append(1)
            case "VT":
                if next_epi in ["PR"]:
                    class_label.append(0)
                else:
                    class_label.append(1)
            case "PE":
                if next_epi in ["PR"]:
                    class_label.append(0)
                else:
                    class_label.append(1)
            case "PR":
                class_label.append(1)
    return class_label

# Clean DF

In [5]:
clean_labels = ["AS", "VF", "VT", "PE", "PR"]
clean_df = df.filter(
    pl.col("EPI").is_in(clean_labels),
    pl.col("SMP_start_1") != 1,
    pl.col("EPI_1").is_in(clean_labels),
)
class_label = classify_class_label(clean_df, 1)

clean_df = clean_df.hstack([pl.Series("Class Label", class_label)])
clean_df.head(20)
clean_df.write_csv(DATA_DIR / "clean_df.csv")

# Clean DFB DF

In [6]:
clean_labels = ["AS", "VF", "VT", "PE", "PR"]
clean_dfb_df = df.filter(
    pl.col("EPI").is_in(clean_labels),
    pl.col("SMP_start_1") != 1,
    pl.col("SMP_start_2") != 1,
    pl.col("EPI_1") == "dfb",
    pl.col("EPI_2").is_in(clean_labels),
)
class_label = classify_class_label(clean_dfb_df, 2)
clean_dfb_df = clean_dfb_df.hstack([pl.Series("Class Label", class_label)])
clean_dfb_df.head()
clean_dfb_df.write_csv(DATA_DIR / "clean_df_dfb.csv")

In [7]:
clean_dfb_df.select("Class Label").to_series().value_counts()

Class Label,count
i64,u32
1,941


In [8]:
original_df.filter(pl.col("EPI_1") == "dfb").write_csv(DATA_DIR / "original_dfb.csv")

# Dataset V2

## Alle fra samme episode legges i samme subfold for å unngå krysskontaminering.
## rytme1 -> dfb -> rytme1 ->
S_1,VF,11189,19352,dfb,19353,20192,VF,20193,21272
S_1,dfb,19353,20192,VF,20193,21272,AS,21273,22846
S_1,VF,20193,21272,AS,21273,22846,CAS,22847,38680
S_1,AS,21273,22846,CAS,22847,38680,AS,38681,40186

VF -> AS hvis kort nok tid etter dfb
og kall de: DVF
HVF
HAS

spontan vs defb

In [1]:
import os
from pathlib import Path

import numpy as np
import polars as pl
import scipy

from cci.utils import project_dir

DATASET_FOLDER = project_dir() / "data"
DATASET_FOLDER.mkdir(exist_ok=True)
OOCHA_DIR = Path(os.environ["OOCHA_DIR"])

In [2]:
arecs = scipy.io.loadmat(OOCHA_DIR / "arecs.mat", simplify_cells=True)["arecs"]
oohca_info = scipy.io.loadmat(OOCHA_DIR / "oohrepr.mat", simplify_cells=True)["oohrepr"]


def replace_object(x):
    return [[y] if isinstance(y, str) else y.tolist() for y in x]


for k, v in oohca_info.items():
    oohca_info[k] = replace_object(v)
oohca_info.update({"file": arecs})

In [3]:
# Collect values
df = (
    pl.LazyFrame(
        {key: oohca_info[key] for key in ["file", "EPI", "SMP"]},
    )
    .filter(
        pl.col("file").is_not_null(),  # Some of the entries are missing filename
        # Remove rows where these are different (rows with 1 rythm)
        pl.col("EPI").list.eval(pl.element().len()) == pl.col("SMP").list.eval(pl.element().len()),
    )
    .explode("EPI", "SMP")
    .with_columns(
        # Extract start/stop
        pl.col("SMP").list.to_struct(
            fields=["Start", "Stop"],
        ),
    )
    .unnest("SMP")
)


# Annotate
def annotate_hands_off(epi: str) -> str:
    """Hands off AS -> HAS"""
    mappings = {
        "AS": "HAS",
        "pr": "hpr",
        "VF": "HVF",
        "PR": "HPR",
        "as": "has",
        "vf": "hvf",
        "pe": "hpe",
        "VT": "HVT",
        "PE": "HPE",
        "vt": "hvt",
        "un": "hun",
    }
    return mappings.get(epi, epi)


def map_dfb(vals: pl.Struct) -> str:
    """VT -> dfb -> VT => VT -> DVT -> VT"""
    prev = vals["epi_-1"]
    current = vals["epi_0"]
    next = vals["epi_1"]
    if current == "dfb" and prev == next:
        return f"D{prev}"
    else:
        return current


# Annotate 'dfb' with corresponding rythm

df = (
    df.with_columns(
        [pl.col("EPI").shift(-i).alias(f"epi_{i}") for i in range(-1, 2)],
    )
    .with_columns(pl.struct(["epi_-1", "epi_0", "epi_1"]).map_elements(map_dfb).alias("EPI"))
    .with_columns(pl.col("EPI").map_elements(annotate_hands_off))
    # BUG
    # .drop(
    #     [f"epi_{i}" for i in range(-1, 2)],
    # )
)

# Collect and save
# BUG:?? Have to drop here or Start and Stop also gets dropped....
df = df.collect().drop(
    [f"epi_{i}" for i in range(-1, 2)],
)
df.write_csv(DATASET_FOLDER / "full.csv")

In [4]:
def dfb_df(full_df: pl.DataFrame) -> pl.DataFrame:
    # Filter so we have rhytm -> D(fb)rythm -> rythm -> transition
    df = (
        full_df.with_columns(
            [pl.col("EPI").shift(-i).alias(f"epi{i}") for i in range(1, 4)],
        )
        .with_columns(
            [pl.col("Start").shift(-i).alias(f"start{i}") for i in range(1, 4)],
        )
        .with_columns(
            [pl.col("Stop").shift(-i).alias(f"stop{i}") for i in range(1, 4)],
        )
        .filter(pl.col("epi1").str.starts_with("D"))
    )
    # Filter when occuring at end of file
    df = df.filter(pl.min_horizontal(pl.col("start1"), pl.col("start2"), pl.col("start3")) != 1)
    return df


dfb_df(df).write_csv(DATASET_FOLDER / "dfb_full.csv")

In [5]:
dfb_df = pl.read_csv(DATASET_FOLDER / "dfb_full.csv")
# Filter every transition occurence that happens before 2 seconds after dfb
dfb_df.filter(pl.col("start3") - pl.col("start2") < 1000)

file,EPI,Start,Stop,epi1,epi2,epi3,start1,start2,start3,stop1,stop2,stop3
str,str,i64,i64,str,str,str,i64,i64,i64,i64,i64,i64
"""S_1""","""HVF""",87942,98341,"""DVF""","""HVF""","""HAS""",98342,99183,99798,99182,99797,110361
"""S_1""","""HVF""",216520,224252,"""DVF""","""HVF""","""HAS""",224253,225093,225846,225092,225845,233027
"""S_1""","""HVF""",311663,324771,"""DVF""","""HVF""","""HAS""",324772,325613,326261,325612,326260,336096
"""a_2""","""HVF""",81103,88484,"""DVF""","""HVF""","""HPE""",88485,89326,89910,89325,89909,93133
"""a_2""","""HVF""",422639,428891,"""DVF""","""HVF""","""HPE""",428892,429736,430615,429735,430614,433345
…,…,…,…,…,…,…,…,…,…,…,…,…
"""s_375""","""HVF""",335895,360975,"""DVF""","""HVF""","""HAS""",360976,361817,362594,361816,362593,370340
"""s_381""","""HVF""",497504,505394,"""DVF""","""HVF""","""HPE""",505395,506219,506877,506218,506876,537259
"""s_382""","""HVF""",120088,132475,"""DVF""","""HVF""","""HAS""",132476,133301,134029,133300,134028,153908
"""s_387""","""HVF""",437556,447422,"""DVF""","""HVF""","""HPE""",447423,448246,449017,448245,449016,464622


In [6]:
# http://vrl.cs.brown.edu/color

color = {
    "HAS": [82, 239, 153],
    "HVT": [17, 94, 65],
    "HVF": [148, 210, 207],
    "HPE": [44, 69, 125],
    "HPR": [209, 121, 248],
    "DAS": [153, 28, 100],
    "DVF": [172, 130, 180],
    "DVT": [102, 51, 180],
    "DPE": [238, 200, 241],
    "DPR": [41, 140, 192],
    "CAS": [183, 209, 101],
    "CVT": [58, 166, 9],
    "CVF": [44, 245, 43],
    "CPE": [93, 64, 48],
    "CPR": [254, 183, 134],
    "cas": [177, 75, 50],
    "cvt": [251, 45, 76],
    "cvf": [251, 189, 19],
    "cpe": [116, 141, 19],
    "cpr": [37, 128, 254],
    "hun": [246, 18, 168],
    "cun": [194, 24, 241],
    "dfb": [160, 127, 61],
}

In [7]:
import functools
from pathlib import Path


@functools.lru_cache(maxsize=4)
def get_signal(signal_path: Path):
    return scipy.io.loadmat(
        signal_path,
        simplify_cells=True,
    )["SIGNALS"]["ecg_diff"].astype(np.float32)

In [37]:
import rerun as rr
from rerun.blueprint import (
    Blueprint,
    BlueprintPanel,
    Grid,
    SelectionPanel,
    Spatial2DView,
    TimePanel,
    TimeSeriesView,
    Viewport,
)

blueprint = Blueprint(
    Viewport(
        TimeSeriesView(
            name="ECG",
        )
    ),
    BlueprintPanel(expanded=False),
    SelectionPanel(expanded=False),
    TimePanel(expanded=False),
)
rr.init("dfb_viewer", spawn=True, blueprint=blueprint)

[2024-03-22T18:13:18Z WARN  re_sdk::log_sink] Dropping data in BufferedSink
[2024-03-22T18:13:18Z INFO  egui_wgpu] There were 2 available wgpu adapters: {backend: Vulkan, device_type: DiscreteGpu, name: "AMD Radeon RX 5700 XT (RADV NAVI10)", driver: "radv", driver_info: "Mesa 23.3.6", vendor: 0x1002, device: 0x731F}, {backend: Vulkan, device_type: Cpu, name: "llvmpipe (LLVM 17.0.6, 256 bits)", driver: "llvmpipe", driver_info: "Mesa 23.3.6 (LLVM 17.0.6)", vendor: 0x10005}
[2024-03-22T18:13:18Z INFO  tracing::span] perform;
[2024-03-22T18:13:18Z INFO  zbus::handshake] write_command; command=Auth(Some(External), Some([49, 48, 48, 48]))
[2024-03-22T18:13:18Z INFO  tracing::span] read_command;
[2024-03-22T18:13:18Z INFO  zbus::handshake] write_command; command=NegotiateUnixFD
[2024-03-22T18:13:18Z INFO  tracing::span] read_command;
[2024-03-22T18:13:18Z INFO  zbus::handshake] write_command; command=Begin
[2024-03-22T18:13:18Z INFO  tracing::span] socket reader;
[2024-03-22T18:13:18Z INFO  t

In [38]:
iterator = iter(
    dfb_df.select(
        [
            "file",
            "EPI",
            "epi1",
            "epi2",
            "epi3",
            "Start",
            "start1",
            "start2",
            "start3",
            "Stop",
            "stop1",
            "stop2",
            "stop3",
        ]
    )
    .filter(pl.col("epi3") != "HAS")
    .rows()
)
next(iterator)
next(iterator)
file, epi, epi1, epi2, epi3, start, start1, start2, start3, stop, stop1, stop2, stop3 = next(iterator)
epi2 = f"{epi}"
signal = get_signal(OOCHA_DIR / f"{file}.mat")

tick = 0
for i, (epi, start, stop) in enumerate(
    zip([epi, epi1, epi2, epi3], [start, start1, start2, start3], [stop, stop1, stop2, stop3])
):
    lname = f"{epi}_{i}"
    rr.log(lname, rr.SeriesLine(color=color[epi], name=epi), timeless=True)
    for v in signal[start:stop]:
        rr.set_time_seconds("step", tick)
        rr.log(
            lname,
            rr.Scalar(v),
        )
        tick += 1 / 500