In [None]:
import json
import logging
import os
import sys
from collections import defaultdict
from itertools import product
from pathlib import Path
from typing import Dict, List, Optional

import IPython
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import pyarrow.parquet as pq
import seaborn as sns
import textalloc as ta
from IPython.display import display
from sklearn.dummy import DummyClassifier
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score, precision_score, r2_score, recall_score
from tqdm import tqdm

NOTEBOOK_PATH: Path = Path(IPython.extract_module_locals()[1]["__vsc_ipynb_file__"])
PROJECT_DIR: Path = NOTEBOOK_PATH.parent.parent
sys.path.append(str(PROJECT_DIR))
import src.utils.custom_log as custom_log
import src.utils.json_util as json_util
from src._StandardNames import StandardNames
from src.evaluate._Data import Data
from src.load.LoadForClassification import RENAMER, LoadForClassification
from src.utils.PathChecker import PathChecker
from src.utils.Csv import Csv
from src.utils.set_rcparams import set_rcparams

os.chdir(PROJECT_DIR)
set_rcparams()

LOG: logging.Logger = logging.getLogger(__name__)
custom_log.init_logger(log_lvl=logging.INFO)
LOG.info("Log start, project directory is %s (exist: %s)", PROJECT_DIR, PROJECT_DIR.is_dir())

CHECK: PathChecker = PathChecker()
STR: StandardNames = StandardNames()

FIG_DIR: Path = CHECK.check_directory(PROJECT_DIR / "reports" / "figures", exit=False)
FIG_DIR /= NOTEBOOK_PATH.stem
FIG_DIR.mkdir(parents=True, exist_ok=True)
LOG.info("Figure directory is %s (exist: %s)", FIG_DIR, FIG_DIR.is_dir())

EXP_DIR:Path = CHECK.check_directory(PROJECT_DIR /"experiments")
DIR_SEARCH_STR:str = "2024-12-*-*-*-*_pure_cnn_95HIII_injury_criteria_from_doe_sobol_20240705_194200"


In [None]:
sorted([d for d in EXP_DIR.glob(DIR_SEARCH_STR) if d.is_dir()])

In [None]:
def get_results() -> pd.DataFrame:
    results = []
    for i, exp_dir in enumerate(sorted([d for d in EXP_DIR.glob(DIR_SEARCH_STR) if d.is_dir()])):
        db = Csv(csv_path=exp_dir / STR.fname_results_csv).read().loc[-1]
        db.index = [i] * db.shape[0]
        db.index.name = "Repetition"
        db.set_index("Data", append=True, inplace=True)
        results.append(db)

    results = pd.concat(results)
    results = results.sort_index()
    return results


RESULTS: pd.DataFrame = get_results()
RESULTS

In [None]:
print("MEAN")
display(RESULTS.groupby("Data").median().round(4))

In [None]:
((RESULTS.groupby("Data").max() - RESULTS.groupby("Data").min())/2).round(4)

In [None]:
((RESULTS.groupby("Data").max() - RESULTS.groupby("Data").min())/2).round(4).median(axis=1)

In [None]:
(RESULTS.groupby("Data").min()+(RESULTS.groupby("Data").max() - RESULTS.groupby("Data").min())/2).round(4)

In [None]:
(RESULTS.groupby("Data").min()+(RESULTS.groupby("Data").max() - RESULTS.groupby("Data").min())/2).round(4).median(axis=1)

In [None]:
db_5 = Csv(csv_path=PROJECT_DIR / "experiments"/"2024-12-04-12-45-14_pure_cnn_05HIII_injury_criteria_from_doe_sobol_20240705_194200" / STR.fname_results_csv).read().loc[-1].set_index("Data")
db_5.round(2)

In [None]:
display((db_5.loc["Train"] - db_5.loc["Test"]).round(3))
((db_5.loc["Train"] - db_5.loc["Test"]).round(3)).max()

In [None]:
display((RESULTS.loc[(slice(None), "Train"), :].droplevel("Data") - RESULTS.loc[(slice(None), "Test"), :].droplevel("Data")).round(3))
(RESULTS.loc[(slice(None), "Train"), :].droplevel("Data") - RESULTS.loc[(slice(None), "Test"), :].droplevel("Data")).round(3).max().max()

In [None]:
def get_classification_results() -> pd.DataFrame:
    search_str = "2024-12-0*-*-*-*_pure_cnn_*HIII_injury_criteria_classes_*_from_doe_sobol_20240705_194200"
    results = []
    for exp_dir in EXP_DIR.glob(search_str):
        db = Csv(csv_path=exp_dir / STR.fname_results_csv).read(index_cols=[0,1,2])
        db["Median"] = db.median(axis=1)
        db = db.loc[(slice(None), -1, slice(None)), :].copy().droplevel("Fold")
        db["Classes"] = int(exp_dir.stem.split("_")[-6])
        db[STR.perc] = int(exp_dir.stem.split("_")[3][:2])
        db.set_index(["Classes", STR.perc], append=True, inplace=True)
        results.append(db)
    
    return pd.concat(results).sort_index()

CLASS_RESULTS = get_classification_results()
CLASS_RESULTS

In [None]:
CLASS_RESULTS.loc[(slice(None), "Test", 5, 95), :]

In [None]:
CLASS_RESULTS.groupby(["Classes", STR.perc, "Data"]).min().round(2).reset_index().set_index(
    ["Classes", "PERC", "Data"]
).unstack("Classes").T.loc[
    (
        [
            "Head_HIC15",
            "Head_a3ms",
            "Neck_My_Extension",
            "Neck_Fz_Max_Tension",
            "Neck_Fx_Shear_Max",
            "Chest_Deflection",
            "Chest_VC",
            "Femur_Fz_Max_Compression",
            "Median",
        ],
        slice(None),
    ),
]

In [None]:
def get_channel_results() -> pd.DataFrame:
    search_str = "2024-12-*-*-*-*_pure_cnn_*HIII_channels_from_doe_sobol_20240705_194200"
    results = []
    for exp_dir in EXP_DIR.glob(search_str):
        db = Csv(csv_path=exp_dir / STR.fname_results_csv).read().loc[-1]
        db[STR.perc] = int(exp_dir.stem.split("_")[3][:2])
        db.set_index(["Data", STR.perc], inplace=True)
        results.append(db)
    
    return pd.concat(results).sort_index()

CH_RESULTS = get_channel_results()
CH_RESULTS


In [None]:
CH_RESULTS.T.round(2).loc[
    [
        "03HEAD0000OCCUACRD",
        "03CHST0000OCCUACRD",
        "03CHST0000OCCUDSXD",
        "03NECKUP00OCCUMOYD",
        "03NECKUP00OCCUFOZD",
        "03NECKUP00OCCUFOXD",
        "03FEMRLE00OCCUFOZD",
        "03FEMRLE00OCCUFOZD",
    ]
]