# SoH estimation improvement 2
The goal of this notebook is to further improve the SoH estimation by examining the estimation results thoroughly.


# Setup

### Imports

In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.caching_utils import cache_result
from core.ev_models_info import models_info
from core.stats_utils import lr_params_as_series
from transform.fleet_info.main import fleet_info
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries

### Data extraction

In [None]:
@cache_result('data_cache/tesla_tss.parquet', on="local_storage")
def get_tss() -> DF:
    return ProcessedTimeSeries("tesla")

@cache_result('data_cache/tesla_charges.parquet', on="local_storage")
def get_charges() -> DF:
    charges = (
        get_tss()
        .query("trimmed_in_charge")
        .groupby(["vin", "trimmed_in_charge_idx"])
        .agg(
            energy_added=pd.NamedAgg("charge_energy_added", series_start_end_diff),
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"),
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_max=pd.NamedAgg("soc", "max"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            soc_start=pd.NamedAgg("soc", "first"),
            soc_end=pd.NamedAgg("soc", "last"),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            outside_temp=pd.NamedAgg("outside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            fast_charger_type=pd.NamedAgg("fast_charger_type", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            version=pd.NamedAgg("version", "first"),
            date=pd.NamedAgg("date", "first"),
            charge_rate=pd.NamedAgg("charge_rate", "median"),
            fast_charger_present=pd.NamedAgg("fast_charger_present", "median"),
            charge_current_request=pd.NamedAgg("charge_current_request", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
            battery_heater=pd.NamedAgg("battery_heater", "median"),
        )
        .reset_index(drop=False)
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("energy_added_min_end = energy_added_end - energy_added_min")
        .eval("soh_min_end = energy_added_min_end / (soc_diff / 100.0 * capacity)")
    )

In [None]:
charges = get_charges()

In [None]:
charges = (
    charges
    .eval("top_soh = soh_min_end.between(0.9, 1.0)")
    .eval("bottom_soh = soh_min_end.between(0.75, 0.9)")
)

In [None]:
charges.groupby("tesla_code").agg({"vin": "nunique"}).reset_index().sort_values("vin", ascending=False)

In [None]:
px.scatter(
    (
        charges.sort_values("tesla_code")
        .dropna(subset=["soh_min_end", "odometer", "tesla_code"])
        .query("soc_diff > 40")
    ),
    x="odometer",
    y="soh_min_end",
    color="tesla_code",
    color_continuous_scale="Rainbow",
    #facet_row="tesla_code",
    opacity=0.25,
)

In [None]:
charges.query("soh_min_end < 0.2").sort_values("size")

In [None]:
tss = get_tss()
px.scatter(
    tss.query("vin == '5YJ3E7EA0KF370880'"),
    x="odometer",
    y="soc",
    color="charge_energy_added",
    color_continuous_scale="Rainbow",
)

In [None]:
px.scatter(
    (
        charges
        .dropna(subset=["soh_min_end", "odometer", "tesla_code"])
        .query("soc_diff > 40 & tesla_code == 'MTY13'")
    ),
    x="odometer",
    y="soh_min_end",
    color="top_soh",
    color_continuous_scale="Rainbow",
    opacity=0.25,
)

In [None]:
px.scatter(
    charges.query("tesla_code == 'MTY13'").dropna(subset=["soh_min_end", "odometer", "tesla_code"]).query("soc_diff > 40"),
    x="date",
    y="soh_min_end",
    color="top_soh",
    opacity=0.25,
    color_continuous_scale="Rainbow",
)

In [None]:
soh_sections = (
    charges
    .query("tesla_code == 'MTY13'")
    .eval("top_soh = soh_min_end.between(0.9, 1.0)")
    .eval("bottom_soh = soh_min_end.between(0.75, 0.9)")
    .groupby("vin")
    .agg({
        "top_soh": "sum",
        "bottom_soh": "sum",
        "odometer": "last",
    })
    .reset_index(drop=False)
    .eval("top_soh_ratio = top_soh / (top_soh + bottom_soh)")
    .eval("bottom_soh_ratio = bottom_soh / (top_soh + bottom_soh)")
)

soh_sections

In [None]:
px.scatter(
    soh_sections,
    x='top_soh',
    y='bottom_soh',
    color="vin",
)