# SOH estimation hyper parameter tunning
Using the method demonstrated in the [estimation evaluation notebook](./soh_estimation_evaluation.ipynb) we will try to improve the models features and degrees of freedom.  

## Setup

### Imports

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import linregress
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer

from core.pandas_utils import floor_to
from core.plt_utils import plt_3d_df
from transform.watea.soh_estimation import get_processed_cluster, get_soh_per_charges
from transform.watea.watea_config import SOH_ESTIMATION_FEATURES

### Data extraction

In [None]:
processed_cluster = get_processed_cluster()


### Constants

In [None]:
INDEX = [
    "slope",
    "intercept",
    "rvalue",
    "pvalue",
    "stderr",
    # "intercept_stderr",
]


### Functions

In [None]:
# scorer
def score_estimation(processed_cluster:DF, soh_col="soh") -> float:
    return (
        processed_cluster
        .groupby("id")
        .apply(lambda df: Series(linregress(df["odometer"], df[soh_col]), INDEX), include_groups=False)
        .eval("r2 = rvalue ** 2")
        .groupby(level=0)
        .mean()
        .mean()
    )

# soh estimation 
def soh_estimation(X: DF, features:list[str]=SOH_ESTIMATION_FEATURES, degrees:int=6) -> DF:
    x = cluster[features].values
    y = cluster["energy_added"].values
    soh_estimator = (
        Pipeline([
            ('poly_features', PolynomialFeatures(degree=10)),
            ('regressor', LinearRegression())
        ])
        .fit(X=x, y=y)
    )
    cluster["general_energy_added"] = (
        soh_estimator
        .predict(X=x)
        .squeeze()
    )
    default_100_soh_cluster = cluster.query("is_default_100_soh")
    y2_pred = soh_estimator.predict(default_100_soh_cluster[features])
    residuals = default_100_soh_cluster['energy_added'] - y2_pred
    initial_intercept = soh_estimator.named_steps['regressor'].intercept_
    adjusted_intercept = initial_intercept + residuals.mean()
    soh_estimator.named_steps['regressor'].intercept_ = adjusted_intercept

    cluster:DF = (
        cluster
        .assign(default_100_energy_added=soh_estimator.predict(cluster[features]))
        .eval("soh = 100 * energy_added / default_100_energy_added")
        .eval("residual = default_100_energy_added - energy_added")
    )
    cluster["residual"] = cluster["residual"].abs()

    
    return cluster, soh_estimator

## Tunning

### Base score