In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from dateutil.relativedelta import relativedelta

from  transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries, ProcessedTimeSeries
from core.caching_utils import cache_result 
from core.pandas_utils import series_start_end_diff


In [None]:
from transform.raw_results.config import *


In [None]:
test = ProcessedTimeSeries("tesla", force_update=True)

In [None]:
df = (test.query("trimmed_in_charge")                                     # We only select the rows of the time series that are in charge.
        .groupby(["vin", "trimmed_in_charge_idx"])                      # We group by vin and the index of the charge.
        .agg(
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"), 
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_end=pd.NamedAgg("soc", "last"),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            outside_temp=pd.NamedAgg("outside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            charging_power=pd.NamedAgg("charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
            start_date=pd.NamedAgg("start_date", "first"),
            battery_chemistry=pd.NamedAgg("battery_chemistry", "first"),
        )
        .reset_index(drop=False)
        .eval("energy_added = energy_added_end - energy_added_min")
        .eval("soc_diff = soc_end - soc_min")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("level_1 = soc_diff * (charging_power < @LEVEL_1_MAX_POWER) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(@LEVEL_1_MAX_POWER, @LEVEL_2_MAX_POWER)) / 100")
        .eval("level_3 = soc_diff * (charging_power > @LEVEL_2_MAX_POWER) / 100")
        .query("soc_diff > 40 & soh.between(0.75, 1.05)")
	    .sort_values(["tesla_code", "vin", "date"]))

In [None]:
df['round_odometer'] = round(df['odometer'] / 10000) * 10000


In [None]:
df['date'] = pd.to_datetime(df['date'])
df['start_date'] = pd.to_datetime(df['start_date'])

# Calculer la différence en mois de manière vectorisée
df['life_battery'] = abs(((df['date'].dt.year - df['start_date'].dt.year) * 12 + 
                        (df['date'].dt.month - df['start_date'].dt.month)))

# EDA

## chimie

In [None]:
df["battery_chemistry"].value_counts()

In [None]:
df["battery_chemistry"].value_counts(normalize=True)

In [None]:
px.scatter(df.groupby(['round_odometer', 'battery_chemistry'], as_index=False, observed=True)[['soh']].median(), x="round_odometer", y='soh', color='battery_chemistry', trendline='ols')

In [None]:
px.scatter(df.groupby(['life_battery', 'battery_chemistry'], as_index=False, observed=True)[['soh']].median(), x="life_battery", y='soh', color='battery_chemistry', trendline='ols')

## Odometer

In [None]:
px.histogram(df, x='soh', color='round_odometer')

In [None]:
px.imshow(df[['soh', 'charging_power', 'odometer', 'level_1','level_2','level_3', 'outside_temp']].corr()[['soh']],text_auto=True, color_continuous_scale='viridis', width=800, height=800)

In [None]:

px.scatter(df, x='odometer', y='soh', title="SoH VS odometer", hover_data='tesla_code', color="tesla_code")

We have two dot clouds for MTY13, just a line for the MT336, look the same for the MT337.

## Study of discharge 

In [None]:
df.columns

In [None]:
px.scatter(df.groupby(['soc_min', 'battery_chemistry'], as_index=False, observed=True ).agg(
    nbr_charges=('vin',"count"),
    soh=('soh', 'median'),
    odometer=('odometer', 'median')
), x='soc_min', y='soh', color="battery_chemistry", trendline='ols', hover_data={'odometer':True})