# soh estimation experimentation of Ford vehicles


## Setup

In [None]:
! mkdir -p data_cache

### Imports

In [None]:
import plotly.express as px
from scipy import stats
import numpy as np
from core.stats_utils import *
from core.pandas_utils import *
from core.config import valid_soh_points
from core.stats_utils import filter_results_by_lines_bounds
from transform.fleet_info.main import fleet_info
from transform.processed_tss.main import get_processed_tss

### Data extraction

In [None]:
tss = get_processed_tss("ford", force_update=False)

In [None]:
tss.columns

In [None]:
sanity_check(tss)

In [None]:
fleet_info.query("make == 'ford'")["range"].value_counts(dropna=False, sort=True, ascending=False)

## Time series

In [None]:
most_common_vin = tss.groupby("vin").size().sort_values(ascending=False).idxmax()
most_common_vin
vin = "WF0TK3SU4MMA37317"
ts = tss.query(f"vin == '{vin}'")

In [None]:
# px.scatter(ts, x="date", y="soc", title=f"{vin}")

In [None]:
# px.scatter(ts, x="date", y="estimated_range", title=f"{vin}")

In [None]:
# px.scatter(ts, x="date", y="max_range", title=f"{most_common_vin}")

In [None]:
px.scatter(ts, x="date", y="odometer", title=f"{vin}")



In [None]:
tss.head()

In [None]:
corr  = tss.corr(numeric_only=True)
selected_column = "battery_energy"
selected_corr = corr[[selected_column]].sort_values(by=selected_column, ascending=False)

# heat map of the correlation matrix
px.imshow(selected_corr, title=f"Correlation Matrix for {selected_column}")


#### Rolling variance


In [None]:
# Calculation on the rolling variance 
tss["rolling_variance"] = tss.groupby("vin")["battery_energy"].transform(lambda group: group.rolling(window=3).var())
var = tss.dropna(subset=['rolling_variance'])


In [None]:
# Heat map 
# Sélectionner les colonnes d'intérêt
columns_of_interest = ['soc', 'battery_energy', 'rolling_variance']

# Calculer la matrice de corrélation
correlation_matrix = tss[columns_of_interest].corr()
# Visualiser la matrice de corrélation avec une heatmap
fig = px.imshow(correlation_matrix,
                labels=dict(x="Variables", y="Variables", color="Correlation"),
                x=correlation_matrix.columns,
                y=correlation_matrix.index,
                title="Heatmap de la Corrélation")

# Afficher le graphique
fig.show()

## First filtering 

In [None]:
# Deleting odometer = 0 
ts = ts.query("odometer != 0")
tss = tss.query("odometer != 0")

## Reducing depandicies to factors


In [None]:
tss['soh'] = tss['battery_energy'] / tss['soc'] / tss['capacity']
ts['soh'] = ts['battery_energy'] / ts['soc'] / ts['capacity']


### Depandicies to soc -> We take only values above 0.5

In [None]:
fig = px.scatter(
    ts,
    x="soc",
    y="soh",
    color="capacity",
    height=600,
    title="Average State-of-Health (SoH) vs Mileage",
    trendline="ols",
    trendline_scope="overall",
    hover_data=["vin"]
)

fig.show()

### Depandicies to discharging -> Much more stable in charge, we will keep only the values in charge

In [None]:
px.scatter(ts.query("soc > 0.5"), 
           x="odometer",
           y="soh"  , 
           color="in_charge")

##  Final SOH

### Estimation

In [None]:
# Mercedes soh
tss_filtered:DF = (
    tss
    .query('soc > 0.5')
     .query('soc < 0.99')
    .query("in_charge_perf_mask")
    .eval("soh = battery_energy / soc / capacity ")
    .assign(charge_size = lambda df: df.groupby(["vin", "in_charge_idx"]).transform("size"))
    .query("charge_size > 10")

)
#Applying filter on the number of charge 
# 1. Trouver les VINs qui ont un in_charge_idx maximum > 3
valid_vins = (
    tss_filtered.groupby('vin')['in_charge_idx']
    .max()
    .reset_index()
    .query('in_charge_idx > 3')
    ['vin']
)

# 2. Filtrer le DataFrame pour ne garder que ces VINs
tss_filtered = tss_filtered[tss_filtered['vin'].isin(valid_vins)]


#Applying the filter on the outliers
tss_filtered = filter_results_by_lines_bounds(tss_filtered, valid_soh_points)

# Applying correction model value 
mask = tss_filtered['model'] == 'vito'
tss_filtered.loc[mask, 'soh'] = tss_filtered.loc[mask, 'soh'] /0.97

mask = tss_filtered['model'] == 'sprinter'
tss_filtered.loc[mask, 'soh'] = tss_filtered.loc[mask, 'soh'] /0.98


#Applying the filter on the outliers
# tss_filtered = filter_results_by_lines_bounds(tss_filtered, valid_soh_points)

# Calculate average SOH and last odometer reading for each VIN
soh_per_vehicle = (
    tss_filtered
    .groupby("vin")
    .agg({
        "soh": "mean",
        "odometer": "max",
        "model": Series.mode,
        "date": "max",
        "battery_energy": "max",
    })
    .reset_index()
)

In [None]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Create the scatter plot
fig = px.scatter(
    soh_per_vehicle,
    x="odometer",
    y="soh",
    labels={"soh": "SoH", 'vin': 'VIN'},
    color="model",
    title="Average State-of-Health (SoH) vs Mileage",
    hover_data={"vin": True}
)

# Add a trendline for each model
for model_name, group in soh_per_vehicle.groupby('model'):
    x = group["odometer"]
    y = group["soh"]
    if len(x) > 1:  # Ensure there are enough points to fit a line
        coefficients = np.polyfit(x, y, 1)  # Linear fit (degree 1)
        trendline = np.polyval(coefficients, x)

        # Add the trendline to the plot using go.Scatter
        trendline_trace = go.Scatter(
            x=x,
            y=trendline,
            mode='lines',
            name=f'Trendline {model_name}',
            line=dict(dash='dash')  # Different dash style for each model
        )

        fig.add_trace(trendline_trace)

fig.show()

In [None]:
soh_per_vehicle['soh'] = soh_per_vehicle['soh'].round(2)
soh_per_vehicle[['vin', 'model', 'soh','odometer']].query('model == "vito" | model == "sprinter"').to_csv("soh_per_vehicle.csv", index=False)



In [None]:
soh_per_vehicle.query("vin == 'W1V44760313886610'")

## Visualization

In [None]:
px.scatter(tss_filtered.query("vin == 'WF0TK3SU8MMA46439'"), x="soc", y="soh", color="in_charge")


In [None]:
from transform.raw_tss.main import get_raw_tss
raw_tss = get_raw_tss("mercedes-benz", force_update=False)
raw_tss.columns

The resulting sohs follows the overall trend which makes a lot more sense than the previous results.  
We can assume that the informed default ranges in fleet info are wrong.

## Conclusion

Soh from estimated range seems promessing and could be used as our final resulsts to Ayvens.  
We would, however, need to improve the accuracy of the estimator.  