# Soh estimation experimentation of Ford vehicles
Based on the file `ford_eda.ipynb` we have seen that we could use the `charging.battery_energy` to estimate the SoH.  

All those result are based on what has been analysed in `ford_eda.ipynb`

We added one filter on the SoC for the SoH calculation, a seocnd one on the charge/discharge could also be added.

In [None]:
! mkdir -p data_cache/

## Imports

In [None]:
from datetime import datetime as DT
from datetime import timedelta as TD
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import numpy as np
from rich import print
from rich.progress import track
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px
from scipy import stats

from core.constants import *
from core.ev_models_info import models_info
from transform.ayvens.ayvens_fleet_info import fleet_info
from transform.ayvens.ayvens_get_raw_tss import get_ayvens_raw_tss

## Setup

In [None]:
KWH_BATTERY_CAPCITY_DICT = {
    "ZOE": {
        "R90 Life (batterijkoop) 5d": 52,
        "R135 Edition One (batterijkoop) 5d": 52,
        "R135 Intens (batterijkoop) 5d": 52,
        "R135":52
    }
}
KNOW_MODEL_TYPES = ["R90 Life (batterijkoop) 5d", "R135 Edition One (batterijkoop) 5d", "R135 Intens (batterijkoop) 5d", "R135"]

### Data extraction

In [None]:
raw_tss = get_ayvens_raw_tss()
tss_dict = {}

COLS_TO_CPY_FROM_FLEET_INFO = [
    "make",
    "model",
    "version",
    "dummy_soh_maker_offset",
    "dummy_soh_model_offset",
    "dummy_soh_model_slope",
    "dummy_soh_vehicle_offset",
]

RENAME_COLS_DICT = {
    "date_of_value": "date",
    "diagnostics.odometer": "odometer",
    "odometer.value": "odometer",
    "diagnostics.odometer": "odometer",
    "mileage": "odometer",
    "charging.battery_energy": "battery_energy",
    "charging.estimated_range": "estimated_range",
    "charging.battery_level": "soc",
    "soc_hv_header": "soc",
}

COLS_TO_KEEP = [
    "date",
    "soc",
    "odometer",
    "estimated_range",
    "battery_energy",
    "soc",
    "vin",
]

COL_DTYPES = {
    "soc": "float",
    "odometer": "float",
    "estimated_range": "float",
    "battery_energy": "float",
    "soc": "float",
    "dummy_soh_maker_offset": "float",
    "dummy_soh_model_offset": "float",
    "dummy_soh_model_slope": "float",
    "dummy_soh_vehicle_offset": "float",
    "dummy_soh_offset": "float",
    "vin": "string",
}

for brand, brand_raw_tss in track(raw_tss.items()):
    # Add model and model version columns
    brand_raw_tss = brand_raw_tss.rename(columns=RENAME_COLS_DICT)
    cols_to_drop = brand_raw_tss.columns[~brand_raw_tss.columns.isin(COLS_TO_KEEP)]
    brand_raw_tss = brand_raw_tss.drop(columns=cols_to_drop)
    brand_raw_tss[COLS_TO_CPY_FROM_FLEET_INFO] = fleet_info.loc[brand_raw_tss["vin"], COLS_TO_CPY_FROM_FLEET_INFO].values
    tss_dict[brand] = brand_raw_tss.eval("dummy_soh_offset = dummy_soh_maker_offset + dummy_soh_model_offset + dummy_soh_vehicle_offset")



In [None]:
# Add the capacity of the zoes
tss_dict["renault"]["capacity"] = (
    models_info
    .query("model == 'zoe'")
    .set_index("version")
    .loc[tss_dict["renault"]["version"], "kwh_capacity"].values
)

In [None]:
# # Add the default range of the mercedes
tss_dict["mercedes-benz"]["range"] = (
    models_info
    .set_index("model")
    .loc[tss_dict["mercedes-benz"]["model"], "default_km_range"].values
)

In [None]:
tss = (
    pd.concat(tss_dict, ignore_index=True)
    .astype(COL_DTYPES)
    .sort_values(by=["make", "vin", "date"])
)


In [None]:
tss["date"] = pd.to_datetime(tss["date"], format="mixed").dt.tz_localize(None)
tss["registration_date"] = pd.to_datetime(fleet_info.loc[tss["vin"], "registration_date"].values, format="mixed")
tss["age_in_years"] = tss.eval("date - registration_date").dt.days.div(365)

In [None]:
tss["age_in_years"].sort_values(ascending=False)

## SOH

Let's visualize the soh estimation.

In [None]:
tss["make"].unique()

In [None]:
ford_tss:DF = (
    tss
    .query("make == 'ford'")
    .query("soc > 0.5")
    .eval("soh = battery_energy / soc / capacity * 100")
)

ford_vehicles = (
    ford_tss
    .groupby("vin")
    .agg({
        "soh": "mean",
        "odometer": "last",
        "model": "first",
        "date": "last",
        "age_in_years": "last",
        "version": "last",
    })
)

In [None]:
fig = px.scatter(
    ford_vehicles,
    x="odometer",
    y="soh",
    color="model",
    height=600,
    title="Average State-of-Health (SoH) vs Mileage",
    trendline="ols",
    # trendline_scope="overall",
    hover_data=["vin"]
)
fig.update_layout(
    xaxis_title="Latest mileage (km)",
    yaxis_title="SoH (%)",
    legend_title="Model",
)
fig.update_traces(line=dict(color='black', dash='dash'))

fig.show()
fig.to_html("data_cache/soh_plot.html")

In [None]:
# # Ford soh
# ford_soh_mask = tss["make"] == "ford"

# # Add filter on the SoC for the SoH calculation
# tss = tss.query("soc > 0.5")

# tss.loc[ford_soh_mask, "soh"] = (
#     tss.loc[ford_soh_mask]
#     .eval("battery_energy / soc / capacity * 100")
# )

# # Calculate average SOH and last odometer reading for each VIN
# ford_df = (
#     tss[ford_soh_mask]
#     .reset_index(drop=True)
#     .groupby("vin")
#     .agg({
#         "soh": "mean",
#         "odometer": "last",
#         "model": "first",
#         "date": "last",
#     })
#     .reset_index()
# )

# # Ensure odometer and soh are numeric
# ford_df['odometer'] = pd.to_numeric(ford_df['odometer'], errors='coerce')
# ford_df['soh'] = pd.to_numeric(ford_df['soh'], errors='coerce')

# Remove any rows with NaN values
ford_df = ford_df.dropna(subset=['odometer', 'soh'])

# Create scatter plot
fig = px.scatter(
    ford_df,
    x="odometer",
    y="soh",
    color="model",
    height=600,
    title="Average State-of-Health (SoH) vs Mileage",
    trendline="ols",
    # trendline_scope="overall",
    hover_data=["vin"]
)

# Calculate trendline
slope, intercept, r_value, p_value, std_err = stats.linregress(ford_df["odometer"], ford_df["soh"])
line_x = np.array([ford_df["odometer"].min(), ford_df["odometer"].max()])
line_y = slope * line_x + intercept

# Add trendline
# fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='Trendline', ))

fig.update_layout(
    xaxis_title="Latest mileage (km)",
    yaxis_title="SoH (%)",
    legend_title="Model",
)
fig.update_traces(line=dict(color='black', dash='dash'))

fig.show()
fig.to_html("soh_plot.html")

In [None]:
ford_df["soh"]

-> There is no trend of battery degrading over the mileage.  
Which make us say that the SoH is not a good estimator for the battery degradation.  