# SoH estimation improvement 2
The goal of this notebook is to further improve the SoH estimation by examining the estimation results thoroughly.


# Setup

### Imports

In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.caching_utils import cache_result
from core.ev_models_info import models_info
from core.stats_utils import lr_params_as_series
from core.plt_utils import plt_3d_df
from transform.fleet_info.main import fleet_info
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries

### Data extraction

In [None]:
@cache_result('data_cache/tesla_tss.parquet', on="local_storage")
def get_tss() -> DF:
    return TeslaProcessedTimeSeries("tesla")

USE_COLS = [
    "vin",
    "trimmed_in_charge",
    "trimmed_in_charge_idx",
    'date',
    'battery_heater',
    'charger_power',
    'soc',
    'charge_current_request',
    'charge_rate',
    'charge_energy_added',
    'inside_temp',
    'age',
    'outside_temp',
    'fast_charger_type',
    'odometer',
    'model',
    'fast_charger_present',
]
FLEET_INFO_COLS = [
    'version',
    'capacity',
    'tesla_code',
    'region_name',
]

@cache_result('data_cache/tesla_charges.parquet', on="local_storage")
def get_charges() -> DF:
    return (
        get_tss()
        .eval("age_in_days = age.dt.days")
        .query("trimmed_in_charge")
        .merge(fleet_info[FLEET_INFO_COLS + ["vin"]], on="vin", how="left")
        .groupby(["vin", "trimmed_in_charge_idx"])
        .agg(
            region_name=pd.NamedAgg("region_name", "first"),
            energy_added=pd.NamedAgg("charge_energy_added", series_start_end_diff),
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"),
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_max=pd.NamedAgg("soc", "max"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            soc_start=pd.NamedAgg("soc", "first"),
            soc_end=pd.NamedAgg("soc", "last"),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            outside_temp=pd.NamedAgg("outside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            fast_charger_type=pd.NamedAgg("fast_charger_type", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            version=pd.NamedAgg("version", "first"),
            date=pd.NamedAgg("date", "first"),
            charge_rate=pd.NamedAgg("charge_rate", "median"),
            fast_charger_present=pd.NamedAgg("fast_charger_present", "median"),
            charge_current_request=pd.NamedAgg("charge_current_request", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
            battery_heater=pd.NamedAgg("battery_heater", "median"),
            charger_power=pd.NamedAgg("charger_power", "median"),
            age=pd.NamedAgg("age", "first"),
            age_in_days=pd.NamedAgg("age_in_days", "first"),
        )
        .reset_index(drop=False)
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("energy_added_min_end = energy_added_end - energy_added_min")
        .eval("soh_min_end = energy_added_min_end / (soc_diff / 100.0 * capacity)")
    )


In [None]:
LFP_TESLA_CODES = [
    "MT351",
    "MT336",
    "MT322",
]
NCA_TESLA_CODES = [
    "MT353",
    "MT308",
]
NMC_TESLA_CODES = [
    "MTY09",
]
charges = (
    get_charges()
    .sort_values(["tesla_code", "vin", "date"])
    .eval("top_soh = soh_min_end.between(0.9, 1.0)")
    .eval("bottom_soh = soh_min_end.between(0.75, 0.9)")
    .eval("fixed_soh_min_end = soh_min_end.mask(tesla_code == 'MTY13', soh_min_end / 0.94)")
    .eval("fixed_soh_min_end = fixed_soh_min_end.mask(bottom_soh & tesla_code == 'MTY13', fixed_soh_min_end + 0.08)")
    .eval("LFP = tesla_code in @LFP_TESLA_CODES")
    .eval("NCA = tesla_code in @NCA_TESLA_CODES")
    .eval("NMC = tesla_code in @NMC_TESLA_CODES")
    .sort_values(["tesla_code", "vin"])
)
charges["chemistry"] = charges[["LFP","NCA","NMC",]].idxmax(axis=1)

In [None]:
agg_charges = (
    charges
    .groupby("vin")
    .agg({"odometer": "last", "soh_min_end": "median", "fixed_soh_min_end": "median", "tesla_code": "first", "chemistry": "first"})
    .reset_index()
    .sort_values(["tesla_code", "vin"])
)
agg_charges

### Random stuff

In [None]:
charges.groupby("tesla_code").agg({"vin": "nunique"}).reset_index().sort_values("vin", ascending=False)

In [None]:
models_info.query("manufacturer == 'tesla' & tesla_code == 'mt352'")

In [None]:
charges.query("tesla_code == 'MT352'")["capacity"].value_counts()

In [None]:
lr_params:DF = (
    charges
    .dropna(subset=["odometer", "soh_min_end", "tesla_code"])
    .query("soc_diff > 40 & soh_min_end.between(0.8, 1.05)")
    .groupby("tesla_code")
    .apply(lr_params_as_series, x="odometer", y="soh_min_end", include_groups=False)
    .sort_values("slope")
)
stats:DF = (
    charges
    .dropna(subset=["odometer", "soh_min_end", "tesla_code"])
    .query("soc_diff > 40 & soh_min_end.between(0.8, 1.05)")
    .groupby("tesla_code")
    .agg(
        soh_min_end_mean=("soh_min_end", "mean"),
        vin_nunique=("vin", "nunique"),
        soh_count=("soh_min_end", "count")
    )
    .reset_index()
    .merge(lr_params, on="tesla_code", how="left")
    .query("vin_nunique > 300")
    .sort_values("slope", ascending=True)
)
stats

In [None]:
px.scatter(
    (
        charges.sort_values("tesla_code")
        .dropna(subset=["soh_min_end", "odometer", "tesla_code"])
        .query("soc_diff > 40 & (tesla_code == 'MTY09' | tesla_code == 'MT322' | tesla_code == 'MT351' | tesla_code == 'MT308') & soh_min_end.between(0.75, 1.05)")
    ),
    x="odometer",
    y="soh_min_end",
    color="tesla_code",
    color_continuous_scale="Rainbow",
    #facet_row="tesla_code",
    trendline="ols",
    opacity=0.25,
)

In [None]:
charges["age_in_days"].notna().value_counts(normalize=True)

In [None]:
px.scatter(
    (
        charges.sort_values("tesla_code")
        .dropna(subset=["soh_min_end", "odometer", "tesla_code"])
        .query("soc_diff > 40")
    ),
    x="age_in_days",
    y="soh_min_end",
    color="tesla_code",
    color_continuous_scale="Rainbow",
    #facet_row="tesla_code",
    #trendline="ols",
    opacity=0.5,
)

In [None]:
nunique_vins = charges.groupby("tesla_code").agg({"vin":"nunique"}).sort_values("vin", ascending=False)
nunique_vins.to_csv("data_cache/tesla_nunique_vins.csv")
nunique_vins

In [None]:
charges.query("soh_min_end < 0.2").sort_values("size")

In [None]:
px.scatter(
    (
        charges
        .dropna(subset=["soh_min_end", "odometer", "tesla_code"])
        .query("soc_diff > 40 & tesla_code == 'MTY13'")
    ),
    x="odometer",
    y="soh_min_end",
    color="top_soh",
    color_continuous_scale="Rainbow",
    opacity=0.25,
)

In [None]:
charges.query("tesla_code == 'MTY13' & soc_diff > 40").groupby("vin").agg({"soh": "count"}).sort_values("soh")

In [None]:
charges.query("tesla_code == 'MTY13' & soh.between(0.80, 1) & soc_diff > 40").corr(numeric_only=True).loc[:, ["top_soh", "bottom_soh"]]

In [None]:
px.scatter(
    charges.query("tesla_code == 'MTY13' & soh.between(0.80, 1)").dropna(subset=["soh_min_end", "odometer", "tesla_code"]).query("soc_diff > 40"),
    x="energy_added",
    y="soh_min_end",
    color="odometer",
    opacity=0.25,
    color_continuous_scale="Rainbow",
)

In [None]:
plt_3d_df(
    charges.query("tesla_code == 'MTY13' & soh.between(0.80, 1)").dropna(subset=["soh_min_end", "odometer", "tesla_code"]).query("soc_diff > 40"),
    x="odometer",
    y="charge_rate",
    z="soh_min_end",
    opacity=0.25,
    #
    # color_continuous_scale="Rainbow",
)

In [None]:
soh_sections = (
    charges
    .query("tesla_code == 'MTY13'")
    .eval("top_soh = soh_min_end.between(0.9, 1.0)")
    .eval("bottom_soh = soh_min_end.between(0.75, 0.9)")
    .groupby("vin")
    .agg({
        "top_soh": "sum",
        "bottom_soh": "sum",
        "odometer": "last",
    })
    .reset_index(drop=False)
    .eval("top_soh_ratio = top_soh / (top_soh + bottom_soh)")
    .eval("bottom_soh_ratio = bottom_soh / (top_soh + bottom_soh)")
)

soh_sections

In [None]:
px.scatter(
    soh_sections,
    x='top_soh',
    y='bottom_soh',
    color="vin",
)

### Plots for presentation

In [None]:
px.scatter(
    (
        agg_charges
        .dropna(subset=["fixed_soh_min_end", "odometer", "tesla_code"])
    ),
    x="odometer",
    y="fixed_soh_min_end",
    color="tesla_code",
    color_continuous_scale="Rainbow",
    #trendline="ols",
    opacity=0.25,
    title="SoH(State of Health) over odometer for 12589 Tesla vehicles",
    labels={
        "odometer": "Odometer (km)",
        "fixed_soh_min_end": "SoH(State of Health)",
    },
)

In [None]:
px.scatter(
    (
        agg_charges
        .dropna(subset=["fixed_soh_min_end", "odometer", "tesla_code"])
    ),
    x="odometer",
    y="fixed_soh_min_end",
    color="chemistry",
    color_continuous_scale="Rainbow",
    #trendline="ols",
    opacity=0.4,
    title="SoH(State of Health) over odometer for 12589 Tesla vehicles",
    labels={
        "odometer": "Odometer (km)",
        "fixed_soh_min_end": "SoH(State of Health)",
    },
)

In [None]:
px.scatter(
    (
        agg_charges
        .dropna(subset=["fixed_soh_min_end", "odometer", "tesla_code", "chemistry"], how="any")
        .query("(tesla_code == 'MTY09' | tesla_code == 'MT322' | tesla_code == 'MT351' | tesla_code == 'MT308')")
        .query("fixed_soh_min_end.between(0.9, 1.05)")
    ),
    x="odometer",
    y="fixed_soh_min_end",
    color="chemistry",
    #symbol="tesla_code",
    color_continuous_scale="Rainbow",
    trendline="ols",
    opacity=0.25,
    title="SoH(State of Health) over odometer for 12589 Tesla vehicles",
    labels={
        "odometer": "Odometer (km)",
        "fixed_soh_min_end": "SoH(State of Health)",
    },
)