In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from dateutil.relativedelta import relativedelta


In [None]:
from  transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
from core.caching_utils import cache_result 
from core.pandas_utils import series_start_end_diff
from transform.raw_results.config import *
from transform.raw_results.tesla_results import get_results


# Load data

In [None]:
USE_COLS = [
    "vin",
    "trimmed_in_charge_idx",
    "trimmed_in_charge",
    "charge_energy_added",
    "soc",
    "outside_temp",
    "capacity",
    "odometer",
    "model",
    "date",
    "tesla_code",
    "battery_heater",
    "charging_power",
    "version",
    "activation_status"

]

In [None]:
LEVEL_1_MAX_POWER = 8
LEVEL_2_MAX_POWER = 45

In [None]:
from core.sql_utils import *
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            join battery b 
            on b.id=vm.battery_id
            WHERE vm.model_name like '%model%';"""), con)

battery_chemistry_df = dbeaver_df.groupby('vin', as_index=False).agg(
    battery_chemistry=("battery_chemistry", 'first'),
    start_date=('start_date', 'first'),

    
)

In [None]:
results = get_results(force_update=False)

In [None]:
results = results.merge(battery_chemistry_df, how='left', on='vin')

In [None]:
results['soh'] = results.soh.replace(np.inf, np.nan)

## Tesla battery label & chemistry

In [None]:
# build life battery in month
def compute_life_battery(start_date, last_date):
    try:
        years = relativedelta(start_date, last_date).years
        months = relativedelta(start_date, last_date).months
        return years*12 + months
    except:
        return 0
    
results = results.merge(results.groupby('vin', as_index=False, observed=False).agg(life_battery=('date', 'max')), on='vin')
results['life_battery'] = results.apply(lambda x: compute_life_battery(x['life_battery'], x['start_date']), axis=1)

In [None]:
# build interval for vehicles odometer
results['odometer_interval'] = results['odometer'].apply(lambda x: "< 50k" if x < 50_000 
                                          else "50k-80k"  if x >= 50_000 and x < 80_000
                                          else "80-120" if x >= 80_000 and x <120_000
                                          else "120k >")

In [None]:
vin_soh = results.groupby('vin', as_index=False, observed=True).agg(
    chemistry= ('battery_chemistry','first'), 
    soh_mean=('soh', 'mean'),
    soh_median=('soh', 'median'),
    odometer_interval = ('odometer_interval',"last"),
    odometer = ('odometer', "max"),
    life_battery = ('life_battery','max'),
    tesla_code = ('tesla_code', 'first'),
    )

# EDA

## Diff of the impact beetween odometer and life battery on SoH

In [None]:
px.imshow(vin_soh[['soh_median', 'odometer', 'life_battery']].corr()[['soh_median']], text_auto=True, color_continuous_scale='viridis')

In [None]:
px.scatter(vin_soh[vin_soh['life_battery']>0][['odometer', 'soh_median', 'life_battery', 'chemistry', 'odometer_interval']],  
           x='life_battery', y='soh_median', title="Impact of battery life on SoH", trendline='ols', color='odometer')

In [None]:
px.scatter(vin_soh[vin_soh['life_battery']>0][['odometer', 'soh_median', 'life_battery', 'chemistry']],  x='odometer', y='soh_median', title="Impact of mileage on SoH", trendline='ols', color='life_battery')

## Chemistry impact 

The chimistry impact the SoH but we want to know **_how much ?_**

Chemistry repartition by vin

In [None]:
vin_soh.dropna(subset=['soh_mean']).value_counts('chemistry')

In [None]:
vin_soh.dropna(subset=['soh_mean']).value_counts('chemistry', normalize=True)

In [None]:
chemistry_rep_v = pd.DataFrame(vin_soh.value_counts(['chemistry'])).reset_index().rename(columns={'count':'total_vehicles_chemistry'})
chemistry_odo_rep_v = pd.DataFrame(vin_soh.value_counts(['chemistry', 'odometer_interval']).reset_index()).rename(columns={'count':'total_vehicles_chemistry_odometer'})
rep = chemistry_rep_v.merge(chemistry_odo_rep_v,  on='chemistry')
rep['proportion_chemistry'] = (rep['total_vehicles_chemistry_odometer'] / rep['total_vehicles_chemistry'] ).round(2)
rep

In [None]:
px.scatter(vin_soh, x='odometer', y='soh_median', color="chemistry", hover_data="tesla_code")

On constate un nueage tout droit au niveau de .9 de soh qui correspond au MT336. 

In [None]:
vin_soh.groupby(["odometer_interval", "chemistry"], as_index=False)[['soh_median', 'soh_mean']].mean().sort_values(["odometer_interval", 'soh_median'], ascending=False)

In [None]:
# mean over the median and the mean by vin 
vin_soh[~vin_soh['tesla_code'].isin(['MT336' ])].groupby([ 'odometer_interval', 'chemistry'])[['soh_median', 'soh_mean']].mean().sort_index()

**Général**  
Le SOH diminue avec l’augmentation du kilométrage pour toutes les chimies de batteries (LFP, NCA, NMC).
Les batteries LFP se dégradent moins rapidement que les NCA et NMC.
Les NMC conservent généralement le meilleur SOH à travers les différentes plages de kilométrage.

**<50 000 km :**  
SOH le plus élevé pour toutes les chimies, avec NMC (~0,98) et NCA (~0,97)
et LFP (~0,987).

**50 000 - 80 000 km :**  
Dégradation modérée.
NMC (~0.948-0953) 
NCA (~0.95-0.96)
LFP (~0.968-0.97).

**80 000 - 120 000 km :**  
LFP (~0.939-0.94) 
NCA (~0.930-0.94) 
NMC (~0.935-0.938)

**120 000+ km :**  
Dégradation la plus importante.
LFP (~0.929-0.932).
Le NCA se dégrade le plus (~0.909-0.92).
Le NMC conserve un bon SoH (~0.924-0.925).

## Charge types

The perpective is to see if a level of charge 1/2/3 as an impact on the SoH.   
We need to compare car with a number of charging/mileage/life/chemistry uniform. 


In [None]:
results['is_level_1'] = results['level_1'].apply(lambda x: 1 if x>0 else 0)
results['is_level_2'] = results['level_2'].apply(lambda x: 1 if x>0 else 0)
results['is_level_3'] = results['level_3'].apply(lambda x: 1 if x>0 else 0)

In [None]:
charges_vin = results.groupby(['vin'], as_index=False, observed=True).agg(
    total_level_1=("level_1", "sum"),
    total_level_2=("level_2", "sum"),
    total_level_3=("level_3", "sum"),
    nbr_charge_level_1=("is_level_1", 'sum'),
    nbr_charge_level_2=("is_level_2", 'sum'),
    nbr_charge_level_3=("is_level_3", 'sum'),
    nbr_charge=('trimmed_in_charge_idx', "count"),
    odometer=("odometer", "max"),
    odometer_interval=("odometer_interval", "last"),
    soh_mean=("soh", "mean"),
    soh_median=("soh", "median"),
    soh_min=("soh", "min"),
    soh_max=("soh", "max"),
    
).copy()

In [None]:

charges_vin.groupby("nbr_charge").agg({
    "nbr_charge_level_1":'sum',
    "nbr_charge_level_2":'sum',
    "nbr_charge_level_3":'sum',
}).plot(kind='bar', title='distribution du nombre de type de charge')

In [None]:
# on à 4.6 charge en moyenne par véhcule
charges_vin.nbr_charge.mean()

In [None]:
charges_vin[(charges_vin['total_level_3']>0) &(charges_vin['nbr_charge']>4)]

In [None]:
px.scatter(charges_vin[(charges_vin['total_level_3']>0) &(charges_vin['nbr_charge']>4)], 
           x='total_level_3', 
           y="soh_mean", 
           color='odometer_interval',
           size='nbr_charge', 
           trendline="ols", 
           hover_data='soh_max',
           title="Impact of level 3 charging on SoH")

In [None]:
px.scatter(charges_vin[(charges_vin['total_level_2']>0) &(charges_vin['nbr_charge']>4)], x='total_level_2', y="soh_mean", 
           color='odometer_interval', size='nbr_charge', trendline="ols",
           title="Impact of level 2 charging on SoH")


In [None]:
px.scatter(charges_vin[(charges_vin['total_level_1']>0) &(charges_vin['nbr_charge']>4)], x='total_level_1', y="soh_mean", color='odometer_interval',size='nbr_charge', trendline="ols",
           title="Impact of level 1 charging on SoH")

## vin with more than 5 charges

In [None]:
vin_with_more_than_5_charges = results.groupby('vin', observed=True, as_index=False)[['trimmed_in_charge_idx']].count().query('trimmed_in_charge_idx > 5').vin.unique()

In [None]:
len(vin_with_more_than_5_charges)

In [None]:
df_more_5_charges = results.query('vin in @vin_with_more_than_5_charges')

In [None]:
px.imshow(df_more_5_charges[df_more_5_charges['life_battery']>0].groupby("vin", observed=True)[['soh', 'odometer', 'life_battery']].mean().corr()[['soh']], text_auto=True, color_continuous_scale='viridis')

# mean discharging impact

In [None]:
#@cache_result("data_cache/tesla_results.parquet", "local_storage")
def get_results():
    return (
        TeslaProcessedTimeSeries("tesla", columns=TESLA_USE_COLS, filters=[("trimmed_in_charge", "==", True)])
        .groupby(["vin", "trimmed_in_charge_idx"], observed=True, as_index=False)
        .agg(
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"),
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_end=pd.NamedAgg("soc", "last"),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            charging_power=pd.NamedAgg("charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
        )
        .eval("energy_added = energy_added_end - energy_added_min")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("level_1 = soc_diff * (charging_power < @LEVEL_1_MAX_POWER) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(@LEVEL_1_MAX_POWER, @LEVEL_2_MAX_POWER)) / 100")
        .eval("level_3 = soc_diff * (charging_power > @LEVEL_2_MAX_POWER) / 100")
	    .eval("bottom_soh = soh.between(0.75, 0.9)")
        .eval("fixed_soh_min_end = soh.mask(tesla_code == 'MTY13', soh / 0.96)")
        .eval("fixed_soh_min_end = fixed_soh_min_end.mask(bottom_soh & tesla_code == 'MTY13', fixed_soh_min_end + 0.08)")
        .eval("soh = fixed_soh_min_end")
        .sort_values(["tesla_code", "vin", "date"])
    )

In [None]:
results_bis = get_results()

In [None]:
results_bis=results_bis[results_bis['tesla_code']!='MT336']

In [None]:
results_bis["low_soc"] =  results_bis["soc_min"].apply(lambda x: 1 if x <20 else 0)
results_bis["mid_soc"] =  results_bis["soc_min"].apply(lambda x: 1 if (x <=70 and x >=20) else 0)
results_bis["hight_soc"] =  results_bis["soc_min"].apply(lambda x: 1 if x > 70 else 0)

In [None]:
discharge_df = results_bis.groupby('vin',  observed=True, as_index=False).agg({
    "low_soc":"sum",
    "mid_soc":"sum",
    "hight_soc":"sum",
    "soc_min":"mean",
    "capacity": "count",
    "soh": "median",
    "odometer": 'last',
    "date": 'last'
}).rename(columns={'capacity':'nbr_charges'})

In [None]:
discharge_df['proportion_low'] = discharge_df['low_soc']/discharge_df['nbr_charges']
discharge_df['proportion_mid'] = discharge_df['mid_soc']/discharge_df['nbr_charges']

In [None]:
discharge_df['date'] = pd.to_datetime(discharge_df['date'])
discharge_df = discharge_df.merge(battery_chemistry_df, how='left', on='vin')

In [None]:
discharge_df['round_odometer'] = (round(discharge_df['odometer'] / 20000) * 20000).astype(str)


In [None]:
px.scatter(discharge_df, x='soc_min', y='soh', color='battery_chemistry')

In [None]:
px.scatter(discharge_df[discharge_df['proportion_low'] >0], x='proportion_low', y='soh', color='battery_chemistry', size='nbr_charges')

In [None]:
px.scatter(discharge_df[(discharge_df['nbr_charges'] > 5)], x='soc_min', y='soh', trendline='ols', size='nbr_charges', color='round_odometer')