In [None]:
from transform.processed_tss.ProcessedTimeSeries import *
import pandas as pd
from core.pandas_utils import *
from transform.raw_results.config import *
import plotly.express as px
import plotly.graph_objects as go
from itertools import combinations
from transform.raw_results.tesla_results import get_results

## Initialization - Get right dataframe


In [None]:
tss = TeslaProcessedTimeSeries("tesla", force_update=True, filters=[("trimmed_in_charge", "==", True)]) 

In [None]:
tss = tss[tss['tesla_code'] != 'MT332'].copy()

In [None]:
from core.sql_utils import *
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            WHERE vm.model_name like '%model%';"""), con)

dbeaver_df.head()

In [None]:
dbeaver_df.dropna(subset='soh')['vin'].nunique()

In [None]:
df_merge = tss.merge(dbeaver_df[['timestamp', 'vin','soh']], left_on=['date', 'vin'], right_on=['timestamp','vin'], how='inner')
ts = (tss.groupby(["vin", "trimmed_in_charge_idx"], observed=True, as_index=False)
        .agg(
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"),
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            charging_power=pd.NamedAgg("charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
            start_date=pd.NamedAgg("start_date", "first"),
            soc_min=pd.NamedAgg("soc", "first"),
            soc_end=pd.NamedAgg("soc", "last"),
            est_battery_range=pd.NamedAgg('est_battery_range', 'last')
        )
        .eval("level_1 = soc_diff * (charging_power < @LEVEL_1_MAX_POWER) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(@LEVEL_1_MAX_POWER, @LEVEL_2_MAX_POWER)) / 100")
        .eval("level_3 = soc_diff * (charging_power > @LEVEL_2_MAX_POWER) / 100"))
ts['date'] = pd.to_datetime(ts['date'].dt.date)

In [None]:
px.histogram(df_merge[['charging_power']], x="charging_power", nbins=1000)

Il y'a deux pics un peu inexpliué à 7kW et 8kW  

## Compute & Plot

In [None]:
tss_charging = tss[(tss['charging_status']=='charging')].copy()

In [None]:
# compute time by soc point in charge
tss_charging["time_diff"] = tss_charging.groupby(["trimmed_in_charge_idx", 'vin'])["date"].diff()
tss_charging["soc_diff"] = tss_charging.groupby(["trimmed_in_charge_idx", 'vin'])["soc"].diff()
tss_charging["time_diff"] = tss_charging["time_diff"].dt.seconds 

In [None]:
# on veut que les véhicules qui ont fait pkus de 3 charges
df_merge = ts.merge(dbeaver_df[['timestamp', 'vin','soh']], left_on=['date', 'vin'], right_on=['timestamp','vin'], how='inner')
index = df_merge.vin.value_counts()[df_merge.vin.value_counts() > 3].index

In [None]:
# on récupère uniquement les lignes qui sont des vin avec + 3 charges
tss_charging = tss_charging[tss_charging['vin'].isin(index)].copy()

In [None]:
tss_charging.columns

In [None]:
# Pour être sur d'avoir la bonne puissance associé a la charges car plusieurs puissance au cours d'une charges
new_rows = []
for (vin, charge_idx), group in tss_charging[tss_charging['soc_diff']!=0].groupby(["vin", "trimmed_in_charge_idx"], observed=True):
    group = group.sort_values("soc").reset_index(drop=True).dropna(subset='soc')
    
    for i in range(len(group) - 1):
        soc_start, soc_end = int(group.loc[i, "soc"]), int(group.loc[i+1, "soc"])
        time_start, time_end = group.loc[i, "time_diff"], group.loc[i + 1, "time_diff"]
        if np.isnan(time_end):
            continue
        time_step = group.loc[i+1, "time_diff"] / group.loc[i+1, "soc_diff"]
        charging_power = group['charging_power'].median()
        for soc in range(soc_start , soc_end):
            new_rows.append({"time_diff": time_step, "soc": float(soc), "vin": vin, "trimmed_in_charge_idx": charge_idx, "charging_power": charging_power})

df_expanded =pd.DataFrame(new_rows).sort_values(["vin", "trimmed_in_charge_idx", "soc"]).reset_index(drop=True)


In [None]:
df_expanded = df_expanded[df_expanded['time_diff'] > 0]

In [None]:
# on créé des cat a partir des charges de base 
df_expanded['level_charge'] = df_expanded['charging_power'].apply(lambda x: 'under 5' if x < 5 
                                                  else '5 to 8' if 5 <= x <= 8 
                                                  else 'close to 11'if 8 < x <= 15 
                                                  else "level_2" if 15 < x < 45 
                                                  else "level_3 < 100" if 45 <= x < 100
                                                  else "level_3 > 100" 
                                                 
                                                  )

In [None]:
# select best and worst soh
worst_vin_soh = df_merge[df_merge['vin'].isin(index)].groupby('vin')[['soh']].median().dropna().sort_values('soh').head(150).index.values

best_vin_soh = df_merge[df_merge['vin'].isin(index)].groupby('vin')[['soh']].median().dropna().sort_values('soh').tail(150).index.values

In [None]:
# création du df
df_expanded_worst = df_expanded[df_expanded['vin'].isin(worst_vin_soh)].groupby(['soc', 'vin', 'level_charge'],observed=True, as_index=False)[['time_diff']].mean().copy()
df_expanded_best = df_expanded[df_expanded['vin'].isin(best_vin_soh)].groupby(['soc', 'vin', 'level_charge'], as_index=False)[['time_diff']].mean().dropna().copy()
df_expanded_worst['cat'] = 'worst'
df_expanded_best['cat'] = 'best'
df = pd.concat((df_expanded_worst, df_expanded_best))

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

i = 1
fig = make_subplots(rows=len(df.level_charge.unique()), cols=1, subplot_titles=df.level_charge.unique())  

for level in df.level_charge.unique():
    temp = df[df['level_charge'] == level]
    
    fig.add_trace(
        go.Scatter(
            x=temp[temp['cat'] == 'worst'].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[temp['cat'] == 'worst'].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers+text",
            name='Worst SoH',
            marker=dict(color="red")
        ),
        row=i, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=temp[temp['cat'] == 'best'].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[temp['cat'] == 'best'].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers+text",
            name='Best SoH',
             marker=dict(color="green")
        ),
        row=i, col=1
    )
    
    i += 1

fig.update_layout(title_text='Time spent by SoC point between best SoH and Worst SoH by charging power',
                  height=1000, width=1400)

fig.update_xaxes(title_text='SoC point', row=6, col=1)

fig.update_yaxes(title_text='Time passed in seconds', row=4, col=1)

fig.show()

### Mean time for a charge 

In [None]:
time = df_expanded.groupby(['vin', 'trimmed_in_charge_idx'], as_index=False, observed=True).agg(
    total_time=("time_diff", "sum"),
    total_soc_diff=("soc", series_start_end_diff),
    level_charge=("level_charge", 'first')
    
)

In [None]:
time['mean_time_soc'] = time['total_time'] / time['total_soc_diff']

In [None]:
time[(time['vin'].isin(worst_vin_soh)) & (time['mean_time_soc'] != np.inf)].groupby(["level_charge"])['mean_time_soc'].mean().round()

In [None]:
time[time['vin'].isin(best_vin_soh) & (time['mean_time_soc'] != np.inf)].groupby(["level_charge"])['mean_time_soc'].mean().round()

Pour les véhicules qui ont fait plus de 3 charges, les 100 véhicules avec le moins bon SoH ont un temps de charge plus long en moyenne pour chaque point de SoC  que les 100 avec le meilleur SoH.

### Study by slice 99 / 95-99/ 90-95/ 85-90/ sub 85

In [None]:
df_merge.vin.nunique()

In [None]:
df_merge = df_merge.merge(tss_charging[['odometer', 'vin']].groupby('vin', as_index=False,  observed=True).max(), how='left')

In [None]:
# ajout de l'odoemtre en colonne
df_expanded = df_expanded.merge(tss_charging[['odometer', 'vin']].groupby('vin', as_index=False,  observed=True).max(), how='left')

In [None]:
vin_under_90 = df_merge[df_merge['soh'] < .9].vin.values 
vin_under_95 = df_merge[(df_merge['soh'] >= .9) & (df_merge['soh'] < .95)].vin.values 
vin_under_99 = df_merge[(df_merge['soh'] < .99) & (df_merge['soh'] >= .95)].vin.values

In [None]:
df_merge['cat_soh'] = pd.cut(df_merge['soh'], 
                        bins=[0, 0.9, 0.95, 0.99, float('inf')], 
                        labels=['soh < 90', '90 < soh < 95', '95 < soh < 99', 'soh > 99'], 
                        right=False)

In [None]:
colors = {
    'soh < 90': 'red',
    '90 < soh < 95': 'orange',
    '95 < soh < 99': 'green',
    'soh > 99': 'blue'
}

In [None]:
df_expanded = df_expanded.merge(df_merge.groupby('vin', as_index=False)[['soh', 'cat_soh']].last(), on='vin', how="left")

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

i = 1
fig = make_subplots(rows=len(df_expanded.level_charge.unique()), cols=1, subplot_titles=['under 5', '5 to 8', 'close to 11','level_2', 'level_3 < 100', 'level_3 > 100'])  
for level in ['under 5', '5 to 8', 'close to 11','level_2', 'level_3 < 100', 'level_3 > 100']:
    temp = df_expanded[df_expanded['level_charge'] == level]
    
    fig.add_trace(
        go.Scatter(
            x=temp[temp['cat_soh'] == 'soh < 90'].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[temp['cat_soh'] == 'soh < 90'].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers",
            #legendgroup=f"Level {level}",
            name='SoH < 90',
             marker=dict(color="red")
            ),
        row=i, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=temp[temp['cat_soh'] == '90 < soh < 95'].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[temp['cat_soh'] == '90 < soh < 95'].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers+text",
            #legendgroup=f"Level {level}",
            name='90 < SoH < 95',
            marker=dict(color="orange")
            ),
        row=i, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=temp[temp['cat_soh'] == '95 < soh < 99'].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[temp['cat_soh'] == '95 < soh < 99'].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers+text",
            #legendgroup=f"Level {level}",
            name='95 < SoH < 99',
            marker=dict(color="blue")
            ),
        row=i, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=temp[(temp['cat_soh'] == 'soh > 99') & (temp['odometer'] < 100_000)].groupby(['soc'], as_index=False)['time_diff'].mean()['soc'], 
            y=temp[(temp['cat_soh'] == 'soh > 99') & (temp['odometer'] < 100_000)].groupby(['soc'], as_index=False)['time_diff'].mean()['time_diff'],
            mode="markers+text",
            #legendgroup=f"Level {level}",
            name='SoH > 99',
            marker=dict(color="#2A9D8F")
            ),
        row=i, col=1
    )
    
    i += 1


fig.update_layout(
    title_text='Time spent by SoC point between best SoH and Worst SoH by charging power',
    height=1000, width=1400,
    showlegend=True  # Ajoute cette ligne
)


fig.update_xaxes(title_text='SoC point', row=6, col=1)

fig.update_yaxes(title_text='Time passed in seconds', row=4, col=1)

fig.show()

On peut voir que plus la charge semble puissante plus les SoH faibles ont un temps de charges élevé par rapport aux autres.

In [None]:
# check de la puissance des charges 
df_expanded.groupby(['level_charge', 'cat_soh'], as_index=False)['charging_power'].describe()

Les puissance semble les mêmes pour l'enemble des catégories. Un peu plus fairble pour les SoH < 90 pour les charges de niveau 3 supérieur à 100kW

In [None]:
df_soc_by_charge = df_expanded.groupby(['level_charge', 'cat_soh'], as_index=False)['soc'].apply(set)

In [None]:
# récupérer les soc pour lesquels on a pas des valeurs de charges pour les catégories
d = {}
for level_carge in df_soc_by_charge.level_charge.unique():
    list_set = []
    for cat in df_soc_by_charge.cat_soh.unique():
        list_set.append(df_soc_by_charge[(df_soc_by_charge['level_charge']==level_carge) & (df_soc_by_charge['cat_soh']==cat)]['soc'].values[0])
    combis = list(combinations(list_set, 2))
    soc_to_drop = []
    for e in combis:
        soc_to_drop= soc_to_drop + list(e[0] - e[1])
        soc_to_drop= soc_to_drop + list(e[1] - e[0])

    soc_to_drop = list(set(soc_to_drop))
    d[level_carge] = soc_to_drop



In [None]:
df_expanded_clean  = df_expanded[~df_expanded.apply(lambda row: row["soc"] in d.get(row["level_charge"], []), axis=1)].copy()

In [None]:
time = df_expanded_clean.groupby(['vin', 'trimmed_in_charge_idx'], as_index=False, observed=True).agg(
    total_time=("time_diff", "sum"),
    total_soc_diff=("soc", series_start_end_diff),
    level_charge=("level_charge", 'first'),
    soh_cat=('cat_soh', 'first')
    
)

In [None]:
time['mean_time_by_soc'] = time['total_time'] /  time['total_soc_diff']

In [None]:
time.columns

In [None]:
time[(time['mean_time_by_soc'] != np.inf)].groupby(["level_charge", "soh_cat"])['mean_time_by_soc'].mean().round()



Pour 1 point de SoC si la charge est de niveau 3 avec une puissance comprise entre 45 et 100 kW , il y'a une différence d'environ 8 secondes entre un véhicules avec un SoH supérieur à 99% et un véhicules avec un SoH inférieur à 90%, soit 9:20 minutes pour une charges entre 20% et 90%, et de 4 secondes avec les autres soit 4:40 minutes.
Pour 1 point de SoC, si la charge est de niveau 3 avec une puissance supérieur à 100 kW, il y'a une différence d'environ 2 secondes entre un véhicules avec un SoH supérieur à 99% et un véhicules avec un SoH inférieur à 90%, soit 2:20 minutes pour une charges entre 20% et 90%.


## Check répartitions au sein de chacun des groupes 

In [None]:
corr_df = dbeaver_df.merge(df_expanded[['vin', 'cat_soh']].groupby('vin', as_index=False).first(), on='vin', how='inner')
corr_df['start_date'] = pd.to_datetime(corr_df['start_date'])

In [None]:
info_90 = corr_df[corr_df['cat_soh']=='soh < 90'][["battery_id", "version", "autonomy", "type", "model_name", "start_date", "odometer", "vin"]].groupby('vin').agg(
    battery_id=("battery_id", "first"),
    version=("version", "first"),
    autonomy=("autonomy", "first"),
    type=("type", "first"),
    model_name=("model_name", "first"),
    start_date=("start_date", "last"),
    odometer=("odometer", "last")
)

In [None]:
info_95 = corr_df[corr_df['cat_soh']=='90 < soh < 95'][["battery_id", "version", "autonomy", "type", "model_name", "start_date", "odometer", "vin"]].groupby('vin').agg(
    battery_id=("battery_id", "first"),
    version=("version", "first"),
    autonomy=("autonomy", "first"),
    type=("type", "first"),
    model_name=("model_name", "first"),
    start_date=("start_date", "last"),
    odometer=("odometer", "last")
)

In [None]:
info_99 = corr_df[corr_df['cat_soh']=='95 < soh < 99'][["battery_id", "version", "autonomy", "type", "model_name", "start_date", "odometer", "vin"]].groupby('vin').agg(
    battery_id=("battery_id", "first"),
    version=("version", "first"),
    autonomy=("autonomy", "first"),
    type=("type", "first"),
    model_name=("model_name", "first"),
    start_date=("start_date", "last"),
    odometer=("odometer", "last")
)

In [None]:
info_100 = corr_df[corr_df['cat_soh']=='soh > 99'][["battery_id", "version", "autonomy", "type", "model_name", "start_date", "odometer", "vin"]].groupby('vin').agg(
    battery_id=("battery_id", "first"),
    version=("version", "first"),
    autonomy=("autonomy", "first"),
    type=("type", "first"),
    model_name=("model_name", "first"),
    start_date=("start_date", "last"),
    odometer=("odometer", "last")
)

#### Odometer value

In [None]:
print(f"""odometer infos: under 90% SoH:\n {info_90.odometer.describe()}, 
      between 90-95% SoH:\n {info_95.odometer.describe()},
      between 95-99% SoH: \n{info_99.odometer.describe()}, 
      over 99% SoH: \n{info_100.odometer.describe()}""")


Sans surprise les véhicules avec le plus gros odomètres sont les véhicules avec le moins bon SoH.
Par contre un véhicules avec un SoH supérieur à 99% qui a fait 287000 km 



In [None]:
print(f"""start_date infos: under 90% SoH:\n {info_90['start_date'].describe()}, 
      between 90-95% SoH:\n {info_95['start_date'].describe()},
      between 95-99% SoH: \n{info_99['start_date'].describe()}, 
      over 99% SoH: \n{info_100['start_date'].describe()}""")


Comme pour l'odomètre les véhicules avec une batterie plus vieille ont les moins bon SoH.

In [None]:
print(f"""model infos: under 90% SoH:\n number of different model: {info_90.model_name.nunique()}\n model répartition: {info_90.model_name.value_counts(normalize=True).head(5)}, 
      between 90-95% SoH:\n number of different model: {info_95.model_name.nunique()}\n model répartition: {info_95.model_name.value_counts(normalize=True).head(5)},
      between 95-99% SoH: \n number of different model: {info_99.model_name.nunique()}\n model répartition: {info_99.model_name.value_counts(normalize=True).head(5)}, 
      over 99% SoH: \n number of different model: {info_100.model_name.nunique()}\n model répartition: {info_100.model_name.value_counts(normalize=True).head(5)}""")


La proportion de modèle Y augmente à mesure que le SoH augmente.
Corrélation ancien véhicules et modèle Y? Ils ont peut être commencé avec des modèles 3 puis sdes Y

In [None]:
print(f"""model infos: under 90% SoH:\n number of different model: {info_90.version.nunique()}\n top 3 model: {info_90.version.value_counts(normalize=True).head(5)}, 
      between 90-95% SoH:\n number of different model: {info_95.version.nunique()}\n top 3 model: {info_95.version.value_counts(normalize=True).head(5)},
      between 95-99% SoH: \n number of different model: {info_99.version.nunique()}\n top 3 model: {info_99.version.value_counts(normalize=True).head(5)}, 
      over 99% SoH: \n number of different model: {info_100.version.nunique()}\n top 3 model: {info_100.version.value_counts(normalize=True).head(5)}""")


Pas de sureprésentation d'un tesla_code en particulier dans aucun des groupes.