In [None]:
%load_ext autoreload
%autoreload 2

import os
import re
import sys
from collections import defaultdict
from itertools import product
from pathlib import Path
from typing import Dict, List, Literal, Optional, Tuple, Union
from sklearn.metrics import  r2_score
import plotly.express as px
import scipy
from tqdm import tqdm
import IPython
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from matplotlib.axes import Axes as Axes
import logging
from time import perf_counter

NOTEBOOK_PATH:Path = Path(IPython.extract_module_locals()[1]["__vsc_ipynb_file__"])
PROJECT_DIR:Path = NOTEBOOK_PATH.parent.parent
sys.path.append(str(PROJECT_DIR))
import src.utils.custom_log as custom_log
from src.utils.PathChecker import PathChecker
from src.utils.iso18571 import rating_iso_18571_short
from src.utils.set_rcparams import set_rcparams
from src._StandardNames import StandardNames

os.chdir(PROJECT_DIR)
LOG:logging.Logger = logging.getLogger(__name__)

custom_log.init_logger(log_lvl=logging.INFO)
LOG.info("Log initialized")

set_rcparams()

In [None]:
STR: StandardNames = StandardNames()

DATA_DIR = PathChecker().check_directory(path=PROJECT_DIR / "data" / "doe" / "doe_big_grid_20230922_154140")
CHANNEL_FILE = PathChecker().check_file(path=DATA_DIR / STR.fname_channels)
FIG_DIR = PROJECT_DIR / "reports" / "figures" / NOTEBOOK_PATH.stem
FIG_DIR.mkdir(parents=True, exist_ok=True)
PathChecker().check_directory(path=FIG_DIR)

RELEVANT_CHANNELS: List[str] = [
    "03HEADLOC0OCCUDSXD",
    "03HEADLOC0OCCUDSYD",
    "03HEADLOC0OCCUDSZD",
    "03HEAD0000OCCUACXD",
    "03HEAD0000OCCUACYD",
    "03HEAD0000OCCUACZD",
    "03CHSTLOC0OCCUDSXD",
    "03CHSTLOC0OCCUDSYD",
    "03CHSTLOC0OCCUDSZD",
    "03CHST0000OCCUDSXD",
    "03CHST0000OCCUACXD",
    "03CHST0000OCCUACYD",
    "03CHST0000OCCUACZD",
    "03PELVLOC0OCCUDSXD",
    "03PELVLOC0OCCUDSYD",
    "03PELVLOC0OCCUDSZD",
    "03PELV0000OCCUACXD",
    "03PELV0000OCCUACYD",
    "03PELV0000OCCUACZD",
    "03NECKUP00OCCUFOXD",
    "03NECKUP00OCCUFOZD",
    "03NECKUP00OCCUMOYD",
    "03FEMRRI00OCCUFOZD",
    "03FEMRLE00OCCUFOZD",
]

TIME: np.ndarray = np.linspace(0, 140, 1401)

In [3]:
def read_data(
    file: Path = CHANNEL_FILE,
    columns: List[str] = RELEVANT_CHANNELS,
    percentiles: List[int] = [5],
    sim_ids: Optional[List[int]] = None,
) -> pd.DataFrame:
    LOG.info("Read data from %s", file)
    filters = [(STR.perc, "in", percentiles)]
    if sim_ids is not None:
        filters.append((STR.id, "in", sim_ids))

    db = pd.read_parquet(path=file, columns=columns, filters=filters).droplevel(STR.perc).apply(pd.to_numeric, downcast="float")

    LOG.info("Got data with shape %s", db.shape)

    return db

In [None]:
def full_channels(channels: List[str] = RELEVANT_CHANNELS, percentiles: List[int] = [95]):
    LOG.info("Read data")
    db: pd.DataFrame = read_data(percentiles=percentiles)

    LOG.info("Calculate percentiles")
    quantile = db.groupby(STR.time).quantile((0.05, 0.25, 0.5, 0.75, 0.95))

    LOG.info("Plot data")
    for channel in tqdm(channels):
        # init
        fig, ax = plt.subplots()

        # plot single signals
        for idx in db.index.get_level_values(STR.id).unique():
            ax.plot(TIME, db.loc[(slice(None), idx), channel], alpha=0.8)

        # plot percentiles
        ax.plot(TIME, quantile.loc[(slice(None), 0.50), channel], label="Median", ls="-", c="black", lw=2)
        ax.plot(TIME, quantile.loc[(slice(None), 0.25), channel], label="IQR", ls="--", c="black", lw=2)
        ax.plot(TIME, quantile.loc[(slice(None), 0.05), channel], label="Percentile 5-95", ls=":", c="black", lw=2)        
        ax.plot(TIME, quantile.loc[(slice(None), 0.75), channel], ls="--", c="black", lw=2)
        ax.plot(TIME, quantile.loc[(slice(None), 0.95), channel], ls=":", c="black", lw=2)

        # style
        ax.legend()
        ax.set_title(f"HIII {percentiles}")
        ax.set_ylabel(channel)
        ax.set_xlabel("Time [ms]")
        ax.grid()

        fig.savefig(FIG_DIR / f"all_ids_{channel}_perc_{'_'.join([str(per) for per in percentiles])}.png")
        plt.close(fig)

    LOG.info("Done")


full_channels()

In [None]:
def some_isos(channels: List[str] = RELEVANT_CHANNELS, percentiles: List[int] = [95]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    LOG.info("Read data")
    db: pd.DataFrame = read_data(percentiles=percentiles)

    LOG.info("Calculate percentiles")
    quantile = db.groupby(STR.time).quantile((0.05, 0.25, 0.5, 0.75, 0.95))
    del db

    LOG.info("Calculate")
    cases = list(product(channels, (0.05, 0.25, 0.75, 0.95), ((0, 140), (20, 120), (40, 120), (60, 120), (40, 100), (60, 100))))
    ratings = defaultdict(lambda: defaultdict(dict))
    r2s = defaultdict(lambda: defaultdict(dict))
    for channel, quant, t_range in tqdm(cases):
        # data
        signal_ref = quantile.loc[(slice(*t_range), quant), channel].to_numpy()
        signal_comp = quantile.loc[(slice(*t_range), 0.5), channel].to_numpy()

        # iso
        ratings[channel][quant][t_range] = rating_iso_18571_short(signal_ref=signal_ref, signal_comp=signal_comp)

        # r2
        r2s[channel][quant][t_range] = r2_score(y_true=signal_ref, y_pred=signal_comp)
        r2s[channel][quant][t_range] = r2s[channel][quant][t_range] if r2s[channel][quant][t_range] > 0 else 0

    LOG.info("Convert to DataFrame")
    ratings = pd.DataFrame.from_dict({(i, j): ratings[i][j] for i in ratings.keys() for j in ratings[i].keys()}, orient="index")
    ratings.index.names = ["channel", "quantile"]

    r2s = pd.DataFrame.from_dict({(i, j): r2s[i][j] for i in r2s.keys() for j in r2s[i].keys()}, orient="index")
    r2s.index.names = ["channel", "quantile"]

    LOG.info("Done")

    return ratings, r2s


RATINGS, R2 = some_isos()
RATINGS

In [None]:
display(RATINGS.groupby("quantile").median())
display(R2.groupby("quantile").median())

In [None]:
display(RATINGS.groupby("quantile").min())
display(R2.groupby("quantile").min())

In [None]:
display(RATINGS.groupby("quantile").max())
display(R2.groupby("quantile").max())

In [None]:
_ = [
    display(pd.concat([RATINGS.loc[(ch, slice(None)), :], R2.loc[(ch, slice(None)), :]], axis=1))
    for ch in RATINGS.index.get_level_values("channel").unique()
]

In [None]:
from scipy.signal import resample
import matplotlib.colors as mcolors
from scipy.interpolate import CubicSpline


def resampler(n_samples=5, channels: List[str] = RELEVANT_CHANNELS, percentiles: List[int] = [95]):
    LOG.info("Read data")
    cs = sorted(mcolors.TABLEAU_COLORS.values())
    t1 = np.linspace(0, 140, 1401)
    t2 = np.linspace(0, 140, 50)
    rng = np.random.default_rng()
    ids = list(rng.integers(low=0, high=3124, size=n_samples))
    db: pd.DataFrame = read_data(percentiles=percentiles, sim_ids=ids)

    db_ = np.array(np.vsplit(db, n_samples))

    db_resample = CubicSpline(x=t1, y=db_, axis=1)(t2)

    db_back_sampled = CubicSpline(x=t2, y=db_resample, axis=1)(t1)
    LOG.info("Resample from %s to %s and back to %s", db_.shape, db_resample.shape, db_back_sampled.shape)

    for idx, channel in enumerate(channels):
        print(channel, np.sum(np.abs(db_[:, :, idx]-db_back_sampled[:, :, idx]))/(np.max(db_[:, :, idx])-np.min(db_[:, :, idx])))

    LOG.info("Done")


resampler()

In [None]:
import tensorflow as tf
import numpy as np
from scipy.interpolate import CubicSpline

class MyDenseLayer(tf.keras.layers.Layer):
    def __init__(self, num_outputs):
        super(MyDenseLayer, self).__init__()
        self.num_outputs = num_outputs

    def build(self, input_shape):
        print("###", input_shape)
        pass

    def call(self, inputs):
        t1 = np.linspace(0, 140, 1401)
        t2 = np.linspace(0, 140, self.num_outputs)

        db_resample = CubicSpline(x=t1, y=inputs, axis=1)(t2)

        return db_resample
    
MyDenseLayer(num_outputs=50)(np.random.random((100, 1401, 25))).shape

In [8]:
def inter(inputs, num_outputs):
    t1 = np.linspace(0, 140, 1401)
    t2 = np.linspace(0, 140, num_outputs)

    return CubicSpline(x=t1, y=inputs, axis=1)(t2)

In [None]:
from time import perf_counter
d = np.random.random((6553, 1401, 43))
print(d.shape)

tic = perf_counter()
_ = inter(d, 50)
print(perf_counter()-tic)

tic = perf_counter()
_ = MyDenseLayer(num_outputs=50)(d)
print(perf_counter()-tic)

In [None]:
import tensorflow as tf
import numpy as np
from scipy.interpolate import CubicSpline

class MyDenseLayer(tf.keras.layers.Layer):
    def __init__(self, num_outputs):
        super(MyDenseLayer, self).__init__()
        self.num_outputs = num_outputs

    def build(self, input_shape):
        print("###", input_shape)
        pass

    def call(self, inputs):
        t1 = np.linspace(0, 140, 1401)
        t2 = np.linspace(0, 140, self.num_outputs)

        db_resample = CubicSpline(x=t1, y=inputs, axis=1)(t2)

        return db_resample
    
MyDenseLayer(num_outputs=50)(np.random.random((100, 1401, 25))).shape