# soh estimation experimentation of renualt vehicles
In this notebook, we will try to express the soh at at any point as the energy that the battery would have if the battery would have if it had 100% soh divided by the energy it actually has.  
```
soh = charging.battery_energy / (charging.battery_level * model_battery_capacity) 
```

This method is based on the assumption that the variable `charging.battery_energy` represents the actual energy present in the battery rather than simply `charging.battery_level * model_battery_capacity`.

## Imports

In [None]:
import logging
from datetime import datetime as DT
from datetime import timedelta as TD
from dateutil import parser
from plotly.subplots import make_subplots
import plotly.graph_objects as go


from rich import print
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px

from core.s3_utils import S3_Bucket
from jobs.base_jobs.job_interval import Jobinterval
from core.constants import *
from core.time_series_processing import preprocess_date
from jobs.high_mobility.constants import *

## Setup

We must ensure that the data points of the time series can be compared together.  
To do this, we will extract their corresponding car model from `fleet_info.csv`("List finale des vin a activer" on the drive).

In [None]:
fleet_info = pd.read_csv("fleet_info.csv",sep=";", usecols=["VIN","Make","Model","Type"], dtype={"Make":"string"})
# fleet_info = pd.read_csv("fleet_info.csv")
print(fleet_info.columns)
fleet_info = (
    fleet_info
    .rename(columns={"VIN": "vin"})
    .assign(Make=fleet_info["Make"].str.lower())
    .query("Make == 'renault'")
    .set_index("vin", drop=False)
)
fleet_info[["Model", "Type"]].value_counts()

Then we will use data find online to get the default battery capacity of each model.  
Note: *Here a model is a combinatin of the `Model` and `Type` fleet_info variables since cars of the same model with different type can have different battery capacity*.

In [None]:
KWH_BATTERY_CAPCITY_DICT = {
    "ZOE": {
        "R90 Life (batterijkoop) 5d": 41,
        "R135 Edition One (batterijkoop) 5d": 52,
        "R135 Intens (batterijkoop) 5d": 52,
        "R135":52
    }
}
KNOW_MODEL_TYPES = ["R90 Life (batterijkoop) 5d", "R135 Edition One (batterijkoop) 5d", "R135 Intens (batterijkoop) 5d", "R135"]

Let's remove the vins that we don't have a known default battery capacity.

In [None]:
has_known_capcity = fleet_info["Type"].isin(KNOW_MODEL_TYPES)
fleet_info = fleet_info[has_known_capcity]
fleet_info

Let's extract the raw time seriess of all the cars we have into a multi indexed df. 

In [None]:
bucket = S3_Bucket()

def get_renault_raw_ts(vin:str) -> DF:
    return (
        bucket.read_parquet_df(f"raw_ts/renault/time_series/{vin}.parquet")
        .set_index("date", drop=False)
        .sort_index()
    )

raw_tss = {}
for vin, vehicle_info in fleet_info.iterrows():
    default_100_soc_energy = KWH_BATTERY_CAPCITY_DICT[vehicle_info["Model"]][vehicle_info["Type"]]
    try:
        raw_tss[vin] = (
            get_renault_raw_ts(vin)
            .assign(default_100_soc_energy=default_100_soc_energy)
            .assign(vin=vin)
            .assign(type=vehicle_info["Type"])
        )
    except Exception as e:
        # display(e)
        # print(vin)
        continue
raw_tss = pd.concat(raw_tss, axis="index", keys=raw_tss.keys(), names=["vin"])

raw_tss["type"].unique()

**Note**: *There are only R135 models.*

### Time series processing
Let's implement a naive soh estimation pipeline.  

In [None]:
tss:DF = (
    raw_tss
    .rename(columns={"charging.battery_energy": "battery_energy", "diagnostics.odometer": "odometer", "charging.battery_level": "battery_level","charging.estimated_range": "estimated_range"})
    .eval("soc = battery_level * 100")
    .eval("expected_battery_energy = default_100_soc_energy * battery_level")
    .eval("soh = 100 * expected_battery_energy / battery_energy")
)
tss.columns

In [None]:
tss[tss['vin']=='VF1AG000366046670'].tail(10).head(25)

## EDA

## Assumption verification
First, we will verify that the `soc` and `battery_energy` are two "real" variables.  
That is, none of them is calculated from the other.

In [None]:
px.scatter(tss, x="soc", y="battery_energy", color="vin")


Looking at this scatter plot we can see that:
- The two variables are in fact two real variables instead of one being a synthetic variable calculated from the other.  
- The difference is much more important at high `soc` values.

Let's verify that the `soh` is not correlated with the `soc` or `odometer`.

In [None]:
px.scatter(tss, x="soc", y="soh", color="vin")

In [None]:
px.scatter(tss, x="odometer", y="soh", color="vin")

# Adding filters


In [None]:
#Removing low SoC values
tss_filtered = tss.query("soc > 40")
tss_filtered.head(2)
tss_filtered = tss_filtered.reset_index(drop=True)
tss_filtered['soh_mean'] = tss_filtered.groupby('vin')['soh'].transform(lambda x: x.rolling(window=1, min_periods=1).mean())
px.scatter(tss_filtered, x="odometer", y="soh_mean", color="vin")
# px.scatter(tss_filtered[tss_filtered['vin']=='VF1AG000366046670'], x="date", y="odometer", color="vin")



In [None]:
px.scatter(tss_filtered, x="odometer", y="soh_mean", color="vin")
# px.scatter(tss_filtered[tss_filtered['vin']=='VF1AG000964802627'], x="odometer", y="soh_mean", color="vin")


In [None]:
filtered_data = tss_filtered[tss_filtered['vin'] == 'VF1AG000X64802717']

# Create the subplot with a secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add the first trace (soh_mean)
fig.add_trace(
    go.Scatter(x=filtered_data["odometer"], y=filtered_data["soh_mean"], name="SOH Mean"),
    secondary_y=False,
)

# Add the second trace (e.g., battery_energy) on the secondary y-axis
fig.add_trace(
    go.Scatter(x=filtered_data["odometer"], y=filtered_data["soc"], name="SoC"),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="SOH Mean and Battery Energy vs Odometer",
    xaxis_title="Odometer",
)

# Update y-axes labels
fig.update_yaxes(title_text="SOH Mean", secondary_y=False)
fig.update_yaxes(title_text="Estimated range", secondary_y=True)

# Show the plot
fig.show()

# Trying with the estimated range


In [None]:
tss:DF = (
    tss
    .eval("soh2 = estimated_range/400/soc*100")
)
px.scatter(tss, x="soc", y="soh2", color="vin")

In [None]:
tss_filtered['soh_mean2'] = tss_filtered.groupby('vin')['soh2'].transform(lambda x: x.rolling(window=100, min_periods=1).mean())
px.scatter(tss_filtered, x="odometer", y="soh_mean2", color="vin")

In [None]:
# px.scatter(tss_filtered, x="odometer", y="soh_mean2", color="vin")
filtered_data = tss_filtered[tss_filtered['vin'] == 'VF1AG000X64802717']

# Create the subplot with a secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add the first trace (soh_mean)
fig.add_trace(
    go.Scatter(x=filtered_data["date"], y=filtered_data["soh_mean2"], name="SOH Mean"),
    secondary_y=False,
)

# Add the second trace (e.g., battery_energy) on the secondary y-axis
fig.add_trace(
    go.Scatter(x=filtered_data["date"], y=filtered_data["soc"], name="SoC"),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="SOH Mean and Battery Energy vs Odometer",
    xaxis_title="Odometer",
)

# Update y-axes labels
fig.update_yaxes(title_text="SOH Mean", secondary_y=False)
fig.update_yaxes(title_text="Estimated range", secondary_y=True)

# Show the plot
fig.show()


# Comparing the 2 SoH

In [None]:
filtered_data_2 = tss_filtered[tss_filtered['vin'] == 'VF1AG000966427889']
# Create the subplot with a secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add the first trace (soh_mean)
fig.add_trace(
    go.Scatter(x=filtered_data_2["date"], y=filtered_data_2["soh_mean"], name="soh_mean"),
    secondary_y=False,
)

# Add the second trace (e.g., battery_energy) on the secondary y-axis
fig.add_trace(
    go.Scatter(x=filtered_data_2["date"], y=filtered_data_2["soh_mean2"], name="soh_mean2"),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="Comparing the two SOH decay",
    xaxis_title="Odometer",
)

# Update y-axes labels
# fig.update_yaxes(title_text="SOH Mean", secondary_y=False)
# fig.update_yaxes(title_text="Estimated range", secondary_y=True)

# Show the plot
fig.show()

Using SoH based on battery_energy and SoH2 based on estimated _range

In [None]:
tss_filtered['soh_tot'] = (tss_filtered['soh_mean'] + tss_filtered['soh_mean2']) / 2
filtered_data_3 = tss_filtered[tss_filtered['vin'] == 'VF1AG000966427889']
# Create the subplot with a secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add the first trace (soh_mean)
fig.add_trace(
    go.Scatter(x=filtered_data_3["date"], y=filtered_data_3["soh_tot"], name="soh_tot"),
    secondary_y=False,
)

# Add the second trace (e.g., battery_energy) on the secondary y-axis
fig.add_trace(
    go.Scatter(x=filtered_data_3["date"], y=filtered_data_3["soc"], name="soc"),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="Comparing the two SOH decay",
    xaxis_title="Odometer",
)

# Update y-axes labels
# fig.update_yaxes(title_text="SOH Mean", secondary_y=False)
# fig.update_yaxes(title_text="Estimated range", secondary_y=True)

# Show the plot
fig.show()