Conclusion : 
Dans le premier fichier nous avons fait l'étude au niveau du VIN
Nous voulons voir si en allant au niveau de chacun des charges, nous avons des résultats différents 

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from dateutil.relativedelta import relativedelta

from  transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries, ProcessedTimeSeries
from core.caching_utils import cache_result 
from core.pandas_utils import series_start_end_diff


In [None]:
from transform.raw_results.config import *


In [None]:
test = ProcessedTimeSeries("tesla", force_update=True)
test.drop(columns=['tesla_code', 'start_date'], inplace=True)

In [None]:
from core.sql_utils import *
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            join battery b 
            on b.id=vm.battery_id
            WHERE vm.model_name like '%model%';"""), con)

battery_chemistry_df = dbeaver_df.groupby('vin', as_index=False).agg(
    battery_chemistry=("battery_chemistry", 'first'),
    start_date=('start_date', 'first'),
    tesla_code=('version', 'first'),
    
)
dbeaver_df.sort_values('timestamp', inplace=True)
dbeaver_df['timestamp'] = pd.to_datetime(dbeaver_df['timestamp'])

In [None]:
df = (test.query("trimmed_in_charge")                                     # We only select the rows of the time series that are in charge.
        .groupby(["vin", "trimmed_in_charge_idx"])                      # We group by vin and the index of the charge.
        .agg(
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"), 
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_end=pd.NamedAgg("soc", "last"),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            outside_temp=pd.NamedAgg("outside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            charging_power=pd.NamedAgg("charging_power", "median"),
        )
        .reset_index(drop=False)
        .eval("energy_added = energy_added_end - energy_added_min")
        .eval("soc_diff = soc_end - soc_min")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("level_1 = soc_diff * (charging_power < @LEVEL_1_MAX_POWER) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(@LEVEL_1_MAX_POWER, @LEVEL_2_MAX_POWER)) / 100")
        .eval("level_3 = soc_diff * (charging_power > @LEVEL_2_MAX_POWER) / 100")
        .query("soc_diff > 40 & soh.between(0.75, 1.05)")
	    .sort_values(["date", "vin"]))
df['date'] = pd.to_datetime(df['date'].dt.date)

In [None]:
df['vin'] = df['vin'].astype(str)
dbeaver_df['vin'] = dbeaver_df['vin'].astype(str)

In [None]:
dbeaver_df['version']

In [None]:
results = pd.merge_asof(dbeaver_df[['timestamp', 'soh','vin','version' ,'battery_chemistry', 'start_date']].rename(columns={'version':'tesla_code'}), df, left_on='timestamp', right_on='date', by='vin')

In [None]:
results['round_odometer'] = round(results['odometer'] / 10000) * 10000


In [None]:
results['date'] = pd.to_datetime(results['date'])
results['start_date'] = pd.to_datetime(results['start_date'])

# Calculer la différence en mois de manière vectorisée
results['life_battery'] = abs(((results['date'].dt.year - results['start_date'].dt.year) * 12 + 
                        (results['date'].dt.month - results['start_date'].dt.month)))

# EDA

## chimie

In [None]:
results["battery_chemistry"].value_counts()

In [None]:
results["battery_chemistry"].value_counts(normalize=True)

In [None]:
px.scatter(results.groupby(['round_odometer', 'battery_chemistry'], as_index=False, observed=True)[['soh_x']].median(), x="round_odometer", y='soh_x', color='battery_chemistry', trendline='ols')

In [None]:
px.scatter(results.groupby(['life_battery', 'battery_chemistry'], as_index=False, observed=True)[['soh_y']].median(), x="life_battery", y='soh_y', color='battery_chemistry', trendline='ols')

## Odometer

In [None]:
px.histogram(results, x='soh_y', color='round_odometer')

In [None]:
px.imshow(results[['soh_x', 'charging_power', 
              'odometer', 'level_1','level_2','level_3', 'outside_temp']].corr()[['soh_x']],text_auto=True, color_continuous_scale='viridis', width=800, height=800)

In [None]:
results.columns

In [None]:

px.scatter(results, x='odometer', y='soh_y', title="SoH VS odometer", hover_data='tesla_code', color="tesla_code")

We have two dot clouds for MTY13, just a line for the MT336, look the same for the MT337.

## Charge types

The perpective is to see if a level of charge 1/2/3 as an impact on the SoH.   
We need to compare car with a number of charging/mileage/life/chemistry uniform. 


In [None]:
results['is_level_1'] = results['level_1'].apply(lambda x: 1 if x>0 else 0)
results['is_level_2'] = results['level_2'].apply(lambda x: 1 if x>0 else 0)
results['is_level_3'] = results['level_3'].apply(lambda x: 1 if x>0 else 0)

In [None]:
# build interval for vehicles odometer
results['odometer_interval'] = results['odometer'].apply(lambda x: "< 50k" if x < 50_000 
                                          else "50k-80k"  if x >= 50_000 and x < 80_000
                                          else "80-120" if x >= 80_000 and x <120_000
                                          else "120k >")

In [None]:
charges_vin = results.groupby(['vin'], as_index=False, observed=True).agg(
    total_level_1=("level_1", "sum"),
    total_level_2=("level_2", "sum"),
    total_level_3=("level_3", "sum"),
    nbr_charge_level_1=("is_level_1", 'sum'),
    nbr_charge_level_2=("is_level_2", 'sum'),
    nbr_charge_level_3=("is_level_3", 'sum'),
    nbr_charge=('is_level_3', "count"),
    odometer=("odometer", "max"),
    odometer_interval=("odometer_interval", "last"),
    soh_mean=("soh_x", "mean"),
    soh_median=("soh_x", "median"),
    soh_min=("soh_x", "min"),
    soh_max=("soh_x", "max"),
    
).copy()

In [None]:

charges_vin.groupby("nbr_charge").agg({
    "nbr_charge_level_1":'sum',
    "nbr_charge_level_2":'sum',
    "nbr_charge_level_3":'sum',
}).plot(kind='bar', title='distribution du nombre de type de charge')

In [None]:
# on à 4.6 charge en moyenne par véhcule
charges_vin.nbr_charge.mean()

In [None]:
charges_vin[(charges_vin['total_level_3']>0) &(charges_vin['nbr_charge']>4)]

In [None]:
px.scatter(charges_vin[(charges_vin['total_level_3']>0) &(charges_vin['nbr_charge']>4)], 
           x='total_level_3', 
           y="soh_mean", 
           color='odometer_interval',
           size='nbr_charge', 
           trendline="ols", 
           hover_data='soh_max',
           title="Impact of level 3 charging on SoH")

In [None]:
px.scatter(charges_vin[(charges_vin['total_level_2']>0) &(charges_vin['nbr_charge']>4)], x='total_level_2', y="soh_mean", 
           color='odometer_interval', size='nbr_charge', trendline="ols",
           title="Impact of level 2 charging on SoH")


In [None]:
px.scatter(charges_vin[(charges_vin['total_level_1']>0) &(charges_vin['nbr_charge']>4)], x='total_level_1', y="soh_mean", color='odometer_interval',size='nbr_charge', trendline="ols",
           title="Impact of level 1 charging on SoH")