# Presentation of the Ayvens dataset

## Setup

### Create data cache folder if it does not exist already.

In [None]:
! mkdir -p data_cache

### Imports

In [None]:
from datetime import datetime as DT
from datetime import timedelta as TD
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import numpy as np
from rich import print
from rich.progress import track
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px

from core.constants import *
from core.ev_models_info import models_info
from transform.ayvens.ayvens_fleet_info import fleet_info
from transform.ayvens.ayvens_get_raw_tss import get_ayvens_raw_tss

### Data extraction

In [None]:
fleet_info = (
    fleet_info
    .drop_duplicates(subset="vin")
    .set_index("vin", drop=False)
)

In [None]:
raw_tss = get_ayvens_raw_tss()
tss_dict = {}

COLS_TO_CPY_FROM_FLEET_INFO = [
    "make",
    "model",
    "version",
    "dummy_soh_maker_offset",
    "dummy_soh_model_offset",
    "dummy_soh_model_slope",
    "dummy_soh_vehicle_offset",
]

RENAME_COLS_DICT = {
    "date_of_value": "date",
    "diagnostics.odometer": "odometer",
    "odometer.value": "odometer",
    "diagnostics.odometer": "odometer",
    "mileage": "odometer",
    "charging.battery_energy": "battery_energy",
    "charging.estimated_range": "estimated_range",
    "charging.battery_level": "soc",
    "soc_hv_header": "soc",
}

COLS_TO_KEEP = [
    "date",
    "soc",
    "odometer",
    "estimated_range",
    "battery_energy",
    "soc",
    "vin",
]

COL_DTYPES = {
    "soc": "float",
    "odometer": "float",
    "estimated_range": "float",
    "battery_energy": "float",
    "soc": "float",
    "dummy_soh_maker_offset": "float",
    "dummy_soh_model_offset": "float",
    "dummy_soh_model_slope": "float",
    "dummy_soh_vehicle_offset": "float",
    "dummy_soh_offset": "float",
    "vin": "string",
}

for brand, brand_raw_tss in track(raw_tss.items()):
    # Add model and model version columns
    brand_raw_tss = brand_raw_tss.rename(columns=RENAME_COLS_DICT)
    cols_to_drop = brand_raw_tss.columns[~brand_raw_tss.columns.isin(COLS_TO_KEEP)]
    brand_raw_tss = brand_raw_tss.drop(columns=cols_to_drop)
    brand_raw_tss[COLS_TO_CPY_FROM_FLEET_INFO] = fleet_info.loc[brand_raw_tss["vin"], COLS_TO_CPY_FROM_FLEET_INFO].values
    tss_dict[brand] = brand_raw_tss.eval("dummy_soh_offset = dummy_soh_maker_offset + dummy_soh_model_offset + dummy_soh_vehicle_offset")



In [None]:
# Add the capacity of the zoes
tss_dict["renault"]["capacity"] = (
    models_info
    .query("model == 'zoe'")
    .set_index("version")
    .loc[tss_dict["renault"]["version"], "kwh_capacity"].values
)



In [None]:
MODEL_IDX = ["model", "version"]
mercedes = tss_dict["mercedes-benz"]
mercedes[MODEL_IDX].drop_duplicates()

In [None]:
# Add the default range of the mercedes
mercedes = tss_dict["mercedes-benz"]
models = models_info.set_index(["model", "version"])

indices = pd.MultiIndex.from_frame(mercedes[["model", "version"]])

models.loc[indices, "default_km_range"]

In [None]:
tss = (
    pd.concat(tss_dict, ignore_index=True)
    .astype(COL_DTYPES)
    .sort_values(by=["make", "vin", "date"])
)

## Analytics

### Odometer

In [None]:
odometers = (
    tss
    .groupby("vin")
    .agg({
        "odometer": "max",
        "make": "first",
    })
)

odometers.to_csv("data_cache/odometers.csv")

In [None]:
fig = px.histogram(
    odometers,
    x="odometer",
    nbins=15,
    color="make",
    facet_col="make",
    title="Distribution of vehicles over odometer"
)
fig.show()
fig.to_html("data_cache/odometer_distribution.html")

### Dummy soh

In [None]:
# Dummy soh
tss["soh"] = (
    tss.eval("soh = 100 + dummy_soh_offset - odometer * dummy_soh_model_slope")
    .groupby("vin")["soh"]
    .transform(lambda soh: soh + np.random.normal(0, 0.02, len(soh)))
    .clip(0, 100)
)
tss["soh_method"] = "general"

In [None]:
px.scatter(
    tss.groupby("vin").agg({"odometer": "last", "soh": "mean", "make": "first"}),
    x="odometer",
    y="soh",
    trendline="ols",
    color="make",
)

### Renualt (only R315 ZOEs) soh

In [None]:
# Renault soh
# Note soc of renault is between 0 and 1, not 0 and 100.
renault_soh_mask:Series = tss.eval("make == 'renault'")
tss.loc[renault_soh_mask] = (
    tss[renault_soh_mask]
    .eval("expected_battery_energy = capacity * soc")
    .eval("soh = 100 * battery_energy / expected_battery_energy")
)
tss.loc[renault_soh_mask, "soh_method"] = "renault"
renault_soh = (
    tss
    .query("make == 'renault'")
    .groupby("vin")
    .agg({
        "odometer": "max",
        "soh": "median",
    })
)
renault_soh.to_csv("data_cache/renault_soh.csv")

In [None]:
fig = px.scatter(
    renault_soh,
    x="odometer",
    y="soh",
    trendline="ols",
)
fig.to_html("data_cache/renault_soh.html")
fig.show()

### Mercedes soh

In [None]:
mercedes_soh_mask = tss["make"] == "mercedes-benz"
tss.loc[mercedes_soh_mask, "soh"] = (
    tss.loc[mercedes_soh_mask]
    .eval("estimated_range / (soc * 100)")
)
tss.loc[mercedes_soh_mask, "soh_method"] = "mercedes-benz"
mercedes_soh = (
    tss
    .query("make == 'mercedes-benz'")
    .groupby("vin")
    .agg({
        "odometer": "max",
        "soh": "median",
        "model": "first"
    })
)
mercedes_soh.to_csv("data_cache/mercedes_soh.csv")

In [None]:
fig = px.scatter(
    mercedes_soh,
    x="odometer",
    y="soh",
    trendline="ols",
    color="model"
)
fig.show()
fig.to_html("mercedes_soh.html")

In [None]:
fig = px.histogram(tss_dict, nbins=15, x="odometer", y="soh", color="soh_method", facet_col="soh_method")
fig.show()
fig.to_html("soh_distribution_over_odometer.html")