In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.stats_utils import *
from transform.raw_results.tesla_results import get_results
from transform.raw_results.get_tesla_soh_readouts import get_aviloo_soh_readouts

In [None]:
results = pd.read_parquet("data_cache/tesla_results.parquet")

In [None]:
import warnings
warnings.filterwarnings("ignore", message="invalid value encountered in subtract")

BASE_SLOPE = 0.8 / 1e4 # base soh loss per kilometer

def evaluate_esimtations(results:DF, soh_cols:list[str]) -> DF:
    # This is an ugly pd.concat call but it's the first working I found :-)
    return pd.concat(
        {soh_col: evaluate_single_estimation(results, soh_col, get_aviloo_soh_readouts()) for soh_col in soh_cols},
        axis="columns"
    ).T

def evaluate_single_estimation(results:DF, soh_col:DF, aviloo_soh_readouts:DF) -> DF:
    results_grp = results.groupby("vin")
    lr_params:DF = results_grp.apply(lr_params_as_series, "odometer", soh_col, include_groups=False).reset_index(drop=False)
    results:DF = (
        results
        .merge(lr_params, on="vin", how="left")
        .eval("lr_soh = odometer * slope + intercept")
        .eval(f"soh_lr_diff = {soh_col} - lr_soh")
        .groupby("vin")
        .agg(**{
            "std_soh_lr_diff":pd.NamedAgg("soh_lr_diff", "std"),
            "nb_soh_counts":pd.NamedAgg(soh_col, "count"),
            soh_col:pd.NamedAgg(soh_col, "median"),
        })
        .eval("std_soh_lr_diff = std_soh_lr_diff.where(nb_soh_counts > 2)") # Set to NaN std_soh_lr_diffs of vehicles because their std will always be ~0.
    )
    # Compute the mean of std from the soh points per charge rather than per vehicle so that the vehicles with more charges count more.
    evaluation = (
        results
        .merge(lr_params, on="vin", how="left")# yes I know I already merged into it..,
        .merge(aviloo_soh_readouts[["vin", "soh_readout"]], on="vin", how="left")
        .assign(abs_soh_residual=lambda df: (df[soh_col] - df["soh_readout"]).abs()) # Have to use assign because .abs() does not work in eval call, idk why...
        .assign(abs_diff_to_base_trendline=lambda df: (df["slope"] - BASE_SLOPE).abs())
        .agg({
            "abs_diff_to_base_trendline": "mean",
            "abs_soh_residual": "mean",
        })
    )
    evaluation["soh_std_mean"] = results["std_soh_lr_diff"].mean() 
    return evaluation

evaluations = evaluate_esimtations(results, ["soh"])
evaluations