# SoH estimation evaluation
The goal of this notebook is to implement a function to evaluate our SoH estimations.  
> Note:  
> Our implementation is based on the assumption that the change of the SoH over the odometer should be linear.  
> This is because the maximum duration of data that would have is short enough to simplify the evolution of SoH over odometer to a linear slope.   

The evaluation will have trhee scores:
-   std relative to the slope (per vehicle): 
    1. We compute the LR(linear regression) of SoH over odometer and
    1. We compute the difference of the soh points from the slope
    1. We compute the std of differences
-   The residual of the slope to a "base slope", this is the average soh slope of SoH lost per km traveled.  
-   The residual to physical SoH estimations sent by Ayvens

## Setup

### Imports

In [None]:
import warnings

import plotly.express as px
import numpy as np
from sklearn.linear_model import LinearRegression

from core.pandas_utils import *
from core.stats_utils import *
from transform.raw_results.tesla_results import get_results
from transform.raw_results.get_tesla_soh_readouts import aviloo_readouts

### Data extraction

In [None]:
results = get_results() #pd.read_parquet("data_cache/tesla_results.parquet")

## Evaluation

In [None]:

BASE_SLOPE = 0.08 / 1e4 # base soh loss per kilometer (0.8% per 10k km)

def evaluate_soh_esimtations(results:DF, soh_cols:list[str], lr_func:callable) -> DF:
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="invalid value encountered in subtract")
        # This is an ugly pd.concat call but it's the first working I found :-)
        return pd.concat(
            {soh_col: evaluate_single_soh_estimation(results, soh_col, lr_func) for soh_col in soh_cols},
            axis="columns"
        ).T

def evaluate_single_soh_estimation(results:DF, soh_col:DF, lr_func:callable) -> DF:
    lr_params:DF = (
        results
        .groupby("vin", observed=True, as_index=False)
        .apply(lr_func, "odometer", soh_col, include_groups=False)
    )
    return pd.Series({
        "std_soh_diff_to_lr": results.merge(lr_params, "left", "vin").eval(f"intercept + odometer * slope - {soh_col}").std(),
        "MAE_to_aviloo_soh_readouts": results.groupby("vin", observed=True).agg({soh_col:"median"}).merge(aviloo_readouts, "left", "vin").eval(f"soh_readout - {soh_col}").abs().mean(),
        "MAE_to_base_trendline": lr_params.eval("slope - @BASE_SLOPE").abs().mean(),
        "MAE_to_1_intercept": lr_params["intercept"].sub(1).abs().mean(),
    })

evaluate_soh_esimtations(results.assign(random_soh=np.random.rand(len(results))), ["soh", "random_soh"], lr_params_as_series),