# soh estimation experimentation of renualt vehicles
In this notebook, we will try to express the soh at at any point as the energy that the battery would have if the battery would have if it had 100% soh divided by the energy it actually has.  
```
soh = charging.battery_energy / (charging.battery_level * model_battery_capacity) 
```

This method is based on the assumption that the variable `charging.battery_energy` represents the actual energy present in the battery rather than simply `charging.battery_level * model_battery_capacity`.

## Imports

In [None]:
import logging
from datetime import datetime as DT
from datetime import timedelta as TD
from dateutil import parser
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from dotenv import load_dotenv
import os
import random

import numpy as np
from rich import print
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px

from core.s3_utils import S3_Bucket
from jobs.base_jobs.job_interval import Jobinterval
from core.constants import *
from core.time_series_processing import preprocess_date
from jobs.high_mobility.constants import *

## Setup

In [None]:
ZOE_TYPE_MAP = {
    "R90 Life (batterijkoop) 5d": "R90",
    "R135": "R135",
    "R110 Limited (batterijkoop) 5d": "R110",
    "R135 Intens (batterijkoop) 5d": "R135",
    "R135 Edition One (batterijkoop) 5d": "R135",
}
KWH_BATTERY_CAPCITY_DICT = {
    "ZOE": {
        "R90": 41,
        "R110":52,
        "R135":52,
    }
}
KNOW_MODEL_TYPES = ["R90", "R135", "R110"]

In [None]:
fleet_info = pd.read_csv("fleet_info.csv", sep=";", usecols=["VIN","Make","Model","Type", "Autonomie"], dtype={"Make":"string"})
fleet_info = (
    fleet_info
    .rename(columns={"VIN": "vin"})
    .assign(Make=fleet_info["Make"].str.lower().map({"mercedes": "mercedes-benz"}).fillna(fleet_info['Make'].str.lower()))
    .set_index("vin", drop=False)
)
# For dummy soh
fleet_info["maker_offset"] = fleet_info.groupby("Make")["vin"].transform(lambda vins: random.uniform(-1, 0.1))
fleet_info["model_offset"] = fleet_info.groupby(["Make", "Type"])["vin"].transform(lambda vins: random.uniform(-1, 0.1))
fleet_info["model_slope"] = fleet_info.groupby(["Make", "Type"])["vin"].transform(lambda vins: random.uniform(SOH_LOST_PER_KM_DUMMY_RATIO - 0.00001, SOH_LOST_PER_KM_DUMMY_RATIO + 0.00001))
# For renault, only retain the R version (like R90, R100 or R135)
is_renault_mask = fleet_info["Make"] == 'renault'
fleet_info.loc[is_renault_mask, "Type"] = fleet_info.loc[is_renault_mask, "Type"].apply(lambda type: ZOE_TYPE_MAP[type] if type in ZOE_TYPE_MAP else "x")
fleet_info["Make"].unique()

In [None]:
fleet_info.query("Make == 'renault'")[["Model", "Type"]].value_counts()

In [None]:
PROD_CREDS = {
    "bucket_name":os.getenv("PROD_S3_BUCKET"),
    "aws_access_key_id":os.getenv("PROD_S3_KEY"),
    "aws_secret_access_key":os.getenv("PROD_S3_SECRET"),
}

bucket = S3_Bucket(PROD_CREDS)

def get_renault_raw_ts(vin:str, brand:str) -> DF:
    if brand in ['citroÃ«n', 'ds', 'fiat', 'opel', 'peugeot']:
        brand = "stellantis"
    key = f"raw_ts/{brand}/time_series/{vin}.parquet"
    # print(key)
    return (
        bucket.read_parquet_df(key)
        .set_index("date", drop=False)
        .sort_index()
    )

RENAME_COLS_DICT = {
    "diagnostics.odometer": "odometer",
    "odometer.value": "odometer",
    "charging.battery_energy": "battery_energy",
    "diagnostics.odometer": "odometer",
    "charging.battery_level": "battery_level",
    "charging.estimated_range": "estimated_range",
}

tss = {}
for vin, vehicle_info in fleet_info.iterrows():
    if vehicle_info["Model"] in KWH_BATTERY_CAPCITY_DICT and vehicle_info["Type"] in KWH_BATTERY_CAPCITY_DICT[vehicle_info["Model"]]:
        default_100_soc_energy = KWH_BATTERY_CAPCITY_DICT[vehicle_info["Model"]][vehicle_info["Type"]] 
    else:
        default_100_soc_energy = np.nan
    try:
        tss[vin] = (
            get_renault_raw_ts(vin, vehicle_info["Make"])
            .assign(vin=vin)
            .assign(maker=vehicle_info["Make"])
            .assign(model=vehicle_info["Model"])
            .assign(version=vehicle_info["Type"])
            .assign(dummy_soh_slope=vehicle_info["model_slope"])
            .assign(dummy_soh_offset=vehicle_info["maker_offset"] + vehicle_info["model_offset"])
            .assign(autonomie=vehicle_info["Autonomie"])
            .assign(default_100_soc_energy=default_100_soc_energy)
            .rename(columns=RENAME_COLS_DICT)
        )
    except Exception as e:
        # display(e)
        # print(vin)
        continue
tss = pd.concat(tss, axis="index", keys=tss.keys(), names=["vin"])

## Preprocessing

In [None]:
tss['mileage'] = tss["odometer"] * 0.621371

## SOH

In [None]:
# Dummy soh
tss["soh"] = (
    tss
    .eval("100 + dummy_soh_offset - odometer * dummy_soh_slope")
    .groupby(level=0)
    .transform(lambda soh: soh + np.random.normal(0, 0.02, len(soh)))
    .clip(0, 100)
)
tss["soh_method"] = "general"

# Renault soh
renault_soh_mask:Series = tss["version"].isin(KNOW_MODEL_TYPES)
tss["default_100_soc_energy"] = tss["default_100_soc_energy"].astype("float")
tss.loc[renault_soh_mask, "soh"] = (
    tss[renault_soh_mask]
    .eval("expected_battery_energy = default_100_soc_energy * battery_level")
    .eval("100 * battery_energy / expected_battery_energy")
)
renault_soh_battery_level_too_low:Series = renault_soh_mask & tss.eval("battery_energy < 15")
tss.loc[renault_soh_battery_level_too_low, "soh"] = np.nan
tss.loc[renault_soh_mask, "soh_method"] = "renault"

# Mercedes soh
mercedes_soh_mask = tss["maker"] == "mercedes-benz"
tss.loc[mercedes_soh_mask, "soh"] = (
    tss.loc[mercedes_soh_mask]
    .eval("estimated_range / battery_level / autonomie * 100")
)
tss.loc[mercedes_soh_mask, "soh_method"] = "mercedes-benz"

fig = px.scatter(
    tss.sample(frac=0.2), # Sample a fraction of the dataframe otherwise plotly makes everythin crash -_-
    x="odometer",
    y="soh",
    color="maker",
    facet_col="soh_method",
    trendline="ols",
    facet_col_wrap=1,
    height=1000
)
fig.update_xaxes(title_text="Mileage")
fig.update_yaxes(title_text="SoH (%)", matches=None)
fig.show()
fig.write_html("soh_plot.html")

In [None]:
fig = px.histogram(tss, nbins=15, x="mileage", y="soh", color="soh_method", facet_col="soh_method")
fig.update_xaxes(title_text="Mileage")
fig.show()
fig.write_html("soh_distribution_over_mileage.html")

### zoe soh

In [None]:
soze_sohs_points = tss[renault_soh_mask].groupby(level=0)[["soh", "mileage"]].median()
fig = px.scatter(soze_sohs_points, x="mileage", y="soh", trendline="ols", title="Average SoH vs mileage")
fig.update_traces(line=dict(color="red", dash="dash"), selector=dict(mode="lines"))
fig.update_xaxes(title_text="Lattest mileage (km)")
fig.update_yaxes(title_text="SoH (%)")
fig.show()
fig.write_html("zoe_soh_over_mileage.html")

## odometer

In [None]:
max_miles = tss.groupby(level=0)["mileage"].max()
brands = tss.groupby(level=0)["maker"].first()
fig = px.histogram(max_miles, nbins=15, color=brands, facet_col=brands, title="soh vs mileage")
fig.update_xaxes(title_text="Mileage")
fig.show()
fig.write_html("mileage_distribution.html")