# Soh estimation experimentation of Ford vehicles
Based on the file `ford_eda.ipynb` we have seen that we could use the `charging.battery_energy` to estimate the SoH.  

All those result are based on what has been analysed in `ford_eda.ipynb`

We added one filter on the SoC for the SoH calculation, a seocnd one on the charge/discharge could also be added.

## Imports

In [None]:
import logging
from datetime import datetime as DT
from datetime import timedelta as TD
from dateutil import parser
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from dotenv import load_dotenv
import os
import random

import numpy as np
from rich import print
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px

from core.s3_utils import S3_Bucket
from core.config import *
from core.time_series_processing import preprocess_date

## Setup

In [None]:
KWH_BATTERY_CAPCITY_DICT = {
    "ZOE": {
        "R90 Life (batterijkoop) 5d": 52,
        "R135 Edition One (batterijkoop) 5d": 52,
        "R135 Intens (batterijkoop) 5d": 52,
        "R135":52
    }
}
KNOW_MODEL_TYPES = ["R90 Life (batterijkoop) 5d", "R135 Edition One (batterijkoop) 5d", "R135 Intens (batterijkoop) 5d", "R135"]

In [None]:
fleet_info = pd.read_csv("../ayvens/fleet_info.csv", usecols=["VIN","Make","Model","Type", "Autonomie", "Capacity"], dtype={"Make":"string"})
fleet_info = (
    fleet_info
    .rename(columns={"VIN": "vin"})
    .assign(Make=fleet_info["Make"].str.lower().map({"mercedes": "mercedes-benz"}).fillna(fleet_info['Make'].str.lower()))
    .set_index("vin", drop=False)
)
fleet_info["maker_offset"] = fleet_info.groupby("Make")["vin"].transform(lambda vins: random.uniform(-1, 0.1))
fleet_info["model_offset"] = fleet_info.groupby(["Make", "Type"])["vin"].transform(lambda vins: random.uniform(-1, 0.1))
fleet_info["model_slope"] = fleet_info.groupby(["Make", "Type"])["vin"].transform(lambda vins: random.uniform(SOH_LOST_PER_KM_DUMMY_RATIO - 0.00001, SOH_LOST_PER_KM_DUMMY_RATIO + 0.00001))

fleet_info["Make"].unique()

In [None]:
bucket = S3_Bucket()

def get_ford_raw_ts(vin:str, brand:str) -> DF:
    key = f"raw_ts/{brand}/time_series/{vin}.parquet"
    # print(key)
    return (
        bucket.read_parquet_df(key)
        .set_index("date", drop=False)
        .sort_index()
    )

RENAME_COLS_DICT = {
    "diagnostics.odometer": "odometer",
    "charging.battery_energy": "battery_energy",
    "charging.battery_level": "battery_level",
    "charging.status": "status",
}

tss = {}
for vin, vehicle_info in fleet_info.iterrows():
    if vehicle_info["Model"] in KWH_BATTERY_CAPCITY_DICT and vehicle_info["Type"] in KWH_BATTERY_CAPCITY_DICT[vehicle_info["Model"]]:
        default_100_soc_energy = KWH_BATTERY_CAPCITY_DICT[vehicle_info["Model"]][vehicle_info["Type"]] 
    else:
        default_100_soc_energy = np.nan
    default_100_soc_energy = np.nan
    try:
        tss[vin] = (
            get_ford_raw_ts(vin, vehicle_info["Make"])
            .assign(
                vin=vin,
                maker=vehicle_info["Make"],
                model=vehicle_info["Model"],
                version=vehicle_info["Type"],
                autonomie=vehicle_info["Autonomie"],
                capacity=pd.to_numeric(vehicle_info["Capacity"], errors='coerce'),
                default_100_soc_energy=default_100_soc_energy
            )
            .rename(columns=RENAME_COLS_DICT)
        )
    except Exception as e:
        # display(e)
        # print(vin)
        continue
tss = pd.concat(tss, axis="index", keys=tss.keys(), names=["vin"])


## SOH

Let's visualize the soh estimation.

In [None]:
import plotly.graph_objects as go
import numpy as np
from scipy import stats

# Ford soh
ford_soh_mask = tss["maker"] == "ford"

# Add filter on the SoC for the SoH calculation
tss = tss.query("battery_level > 0.5")

tss.loc[ford_soh_mask, "soh"] = (
    tss.loc[ford_soh_mask]
    .eval("battery_energy / battery_level / capacity * 100")
)

# Calculate average SOH and last odometer reading for each VIN
ford_df = (
    tss[ford_soh_mask]
    .reset_index(drop=True)
    .groupby("vin")
    .agg({
        "soh": "mean",
        "odometer": "last",
        "model": "first",
        "date": "last",
    })
    .reset_index()
)

# Ensure odometer and soh are numeric
ford_df['odometer'] = pd.to_numeric(ford_df['odometer'], errors='coerce')
ford_df['soh'] = pd.to_numeric(ford_df['soh'], errors='coerce')

# Remove any rows with NaN values
ford_df = ford_df.dropna(subset=['odometer', 'soh'])

# Create scatter plot
fig = px.scatter(
    ford_df,
    x="odometer",
    y="soh",
    color="model",
    height=600,
    title="Average State-of-Health (SoH) vs Mileage",
    trendline="ols",
    trendline_scope="overall",
    hover_data=["vin"]
)

# Calculate trendline
slope, intercept, r_value, p_value, std_err = stats.linregress(ford_df["odometer"], ford_df["soh"])
line_x = np.array([ford_df["odometer"].min(), ford_df["odometer"].max()])
line_y = slope * line_x + intercept

# Add trendline
# fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='Trendline', ))

fig.update_layout(
    xaxis_title="Latest mileage (km)",
    yaxis_title="SoH (%)",
    legend_title="Model",
)
fig.update_traces(line=dict(color='black', dash='dash'))

fig.show()
fig.to_html("soh_plot.html")

-> There is no trend of battery degrading over the mileage.  
Which make us say that the SoH is not a good estimator for the battery degradation.  