In [None]:
import sys
from logging import Logger
from pathlib import Path
from typing import Optional, List

import IPython
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from sklearn.preprocessing import RobustScaler
import tqdm
from itertools import product

NOTEBOOK_PATH: Path = Path(IPython.extract_module_locals()[1]["__vsc_ipynb_file__"])
PROJECT_DIR: Path = NOTEBOOK_PATH.parent.parent
sys.path.append(str(PROJECT_DIR))
import src.utils.custom_log as custom_log

LOG: Logger = custom_log.init_logger(__name__, log_lvl=custom_log.LEVELS.INFO)
LOG.info("Log start, project directory is %s (exist: %s)", PROJECT_DIR, PROJECT_DIR.is_dir())

In [None]:
DATA_DIR: Path = PROJECT_DIR / "data" / "raw" / "doe_big_grid_20230922_154140"
LOG.info("Data directory is %s (exist: %s)", DATA_DIR, DATA_DIR.is_dir())

AI_DIR: Path = PROJECT_DIR / "experiments" / "2024-07-05-07-45-58_trial_doe_from_fe"
LOG.info("AI directory is %s (exist: %s)", AI_DIR, AI_DIR.is_dir())

PIC_DIR:Path = PROJECT_DIR / "reports" / "figures" / NOTEBOOK_PATH.stem
PIC_DIR.mkdir(parents=True, exist_ok=True)
LOG.info("Picture directory is %s (exist: %s)", PIC_DIR, PIC_DIR.is_dir())

In [3]:
RNG: np.random.Generator = np.random.default_rng(seed=42)
PERCENTILE: int = 50

In [None]:
DOE: pd.DataFrame = pd.read_parquet(DATA_DIR / "doe.parquet", filters=[("PERC", "==", PERCENTILE)])
DOE.drop(columns=["PERC"], inplace=True)
DOE

In [None]:
CHANNELS: pd.DataFrame = pd.read_parquet(DATA_DIR / "channels.parquet", filters=[("SIM_ID", "in", set(DOE.index))])
CHANNELS

In [None]:
INJURY_VALUES: pd.DataFrame = pd.read_parquet(DATA_DIR / "injury_criteria.parquet", filters=[("SIM_ID", "in", set(DOE.index))])
INJURY_VALUES

In [None]:
sns.pairplot(DOE);

In [None]:
sns.pairplot(pd.concat([DOE, INJURY_VALUES], axis=1), x_vars=DOE.columns, y_vars=INJURY_VALUES.columns);

In [None]:
def plot_channel(channel: str, ax: Optional[Axes] = None, n_samples: int = 10, choose_from: Optional[List[int]] = None) -> None:
    LOG.info("Plotting channel %s", channel)
    # init plot
    if ax is None:
        _, ax = plt.subplots()

    # selection
    if choose_from is None:
        choose_from = sorted(DOE.index)
    samples = sorted(RNG.choice(choose_from, n_samples))

    # plot
    for idx in samples:
        data = CHANNELS.loc[(idx, slice(None)), channel]
        ax.plot(data.index.get_level_values("TIME"), data.values, label=idx)

    # format
    ax.grid()
    ax.legend()
    ax.set_title(channel)


plot_channel("03CHST0000OCCUACXD")

In [None]:
def plot_channel_conditional_doe(factor: str, channel: str, store: Optional[bool] = False) -> None:
    LOG.info("Plotting channel %s conditional on %s", channel, factor)

    factor_values = DOE[factor].unique()
    fig, ax = plt.subplots(ncols=len(factor_values), sharex=True, sharey=True, figsize=(25, 5), layout="constrained")
    for i, factor_value in enumerate(factor_values):
        plot_channel(
            channel=channel,
            ax=ax[i],
            choose_from=sorted(DOE[DOE[factor].eq(factor_value)].index),
        )
        ax[i].set_title(f"{factor}={factor_value:.2f}")
    fig.suptitle(channel)

    if store:
        pic_path = PIC_DIR / "channel_conditional_doe" / f"{channel}_{factor}.png"
        pic_path.parent.mkdir(parents=True, exist_ok=True)
        LOG.info("Saving picture to %s", pic_path)
        fig.savefig(pic_path)
        plt.close(fig)

    LOG.info("Done")


plot_channel_conditional_doe(factor="PAB_M_Scal", channel="03CHST0000OCCUACXD")

In [None]:
LOG.setLevel(custom_log.LEVELS.WARNING)
for channel, factor in tqdm.tqdm(list(product(CHANNELS.columns, DOE.columns))):
    plot_channel_conditional_doe(factor=factor, channel=channel, store=True)
LOG.setLevel(custom_log.LEVELS.INFO)

In [None]:
def plot_parameter_influence(channel: str, para_pos: int = 2, store: bool = False) -> None:
    fig, ax = plt.subplots(ncols=len(DOE.columns), figsize=(20, 10), sharex=True, sharey=True, layout="constrained")
    fig.suptitle(channel)

    for i, col in enumerate(DOE.columns):
        remaining = set(DOE.columns) - {col}
        filt = DOE[col].le(1e20)
        for other_col in remaining:
            filt &= DOE[other_col].eq(sorted(DOE[other_col].unique())[para_pos])

        for idx in DOE[filt].index:
            ch_data = CHANNELS.loc[(idx, slice(None)), channel]
            ax[i].plot(ch_data.index.get_level_values("TIME"), ch_data.values, label=f"{DOE.loc[idx, col]:.2f} (ID {idx})")
        ax[i].set_title(col)
        ax[i].grid()
        ax[i].legend()

    if store:
        pic_path = PIC_DIR / "plot_parameter_influence" / f"{channel}.png"
        pic_path.parent.mkdir(parents=True, exist_ok=True)
        LOG.info("Saving picture to %s", pic_path)
        fig.savefig(pic_path)
        plt.close(fig)

plot_parameter_influence("03CHST0000OCCUACXD")

In [None]:
LOG.setLevel(custom_log.LEVELS.WARNING)
for ch in tqdm.tqdm(CHANNELS.columns):
    plot_parameter_influence(channel=ch, store=True)
LOG.setLevel(custom_log.LEVELS.INFO)