In [1]:
import sys
from collections import defaultdict
from itertools import combinations
from pathlib import Path
import multiprocessing
from typing import Tuple, Union, List

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.graph_objects as go

from IPython.display import display


project_dir = Path(".").absolute().parent
if project_dir not in set(sys.path):
    sys.path.append(str(project_dir))
import src.utils.custom_log as custom_log
import src.utils.json_util as json_util
from src.utils.Csv import Csv
from src.utils.csv_data_load import load_single_channel

In [None]:
log = custom_log.init_logger(log_lvl=10)

In [3]:
b_path = Path(r"Q:\Honda_Accord_2014_Sled_with_HIII_Rigid_Seat_SpeedOpt_BigDOE\doe_big_grid_20230922_154140")

In [None]:
log.info("Start")
for path in b_path.glob("V*"):
    if not (path / "channels.csv.zip"):
        log.error("Issue in %s", path)
log.info("Done")

In [None]:
def load_data(ch_name: str) -> pd.DataFrame:
    log.info("Start")
    
    p_tuples: List[Tuple[Path, str]] = [(x / "channels.csv.zip", ch_name) for x in b_path.glob("V*") if x.is_dir()]
    p_tuples: List[Tuple[Path, str]] = [(x, y) for x, y in p_tuples if x.is_file()]

    with multiprocessing.Pool(processes=8) as p:
        log.info("Run parallel")
        col = p.map(func=load_single_channel, iterable=p_tuples, chunksize=1)
    log.info("Data collected - transform to DataFrame")
    col = [x for x in col if x is not None]

    db = Csv(csv_path=p_tuples[0][0], log=log).read()

    return pd.DataFrame(dict(col), index=db.index)


ch_data = load_data(ch_name="03HEAD0000H395ACRC")
ch_data

In [None]:
quants = ch_data.T.quantile([0.05,0.25,0.50,0.75,0.95]).T
f, ax = plt.subplots()
for qu in quants.columns:
    ax.plot(quants.index, quants[qu], label=f"{qu*100:02.0f}")
ax.grid()
ax.legend(title="Percentile")

In [None]:
f, ax = plt.subplots()
y_lim = 1500
for c in ch_data.columns:
    if ch_data[c].max() > y_lim:
        ax.plot(ch_data.index, ch_data[c], label=c)
    else:
        ax.plot(ch_data.index, ch_data[c])
ax.grid()
ax.legend(title=f"y_lim<{y_lim}")
ax.set_ylim([0,1000]);

In [None]:
def plot_outlier():
    f1 = ch_data[ch_data.index > 90]
    q1 = f1.quantile(0.25, axis=1)
    q3 = f1.quantile(0.75, axis=1)
    iqr = q3 - q1
    fence_up = q3 + 1.5 * iqr
    fence_lo = q1 - 1.5 * iqr

    is_outlier = f1.gt(fence_up, axis=0)
    n_outliers = is_outlier.sum(axis=0)
    f2 = n_outliers[n_outliers.ge(250)]
    display(f2)

    fig, ax = plt.subplots()
    ax.boxplot(n_outliers)

    fig, ax = plt.subplots()
    for sid in f2.index:
        ax.plot(ch_data.index, ch_data[sid].values)


plot_outlier()