In [None]:
%load_ext autoreload
%autoreload 2

## Notebook pour explorer les SoH calculer via la pipeline de fleet-telemetry

Conclusion :

Nous avons 557 VIN qui renvoient des données.

Parmi eux, 190 VIN disposent d’un SoH si l’on conserve la pipeline actuelle.
→ Nous avons donc décidé d’étendre le critère jusqu’à 5 points de SoC, ce qui permet de couvrir davantage de véhicules.

Avec le processed_result classique, beaucoup de SoH sont rejetés à cause de la variance (fonction filter_results_by_lines_bounds).
→ Nous avons donc supprimé cette fonction pour le moment.

Au final, nous obtenons 511 VIN avec un SoH.

Pour les VIN manquants :

- 2 n'ont pas de données batterie → Calcul du SoH impossible.

- 4 n'ont pas de données pendant les phases de charge → Calcul du SoH impossible.

- 10 ont des SoH hors des bornes [0.75, 1.1] → À explorer pour comprendre ces anomalies.

- 7 VIN ont un SoH dans les bornes mais avec une différence de SoC sur la charge inférieure à 5 points → Attendre une charge plus significative.

- 23 VIN n'ont pas de données sur l’énergie ajoutée durant la charge → Supposition d’un problème dans les données récupérées.


In [None]:
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
import pandas as pd
import numpy as np
import plotly.express as px
from core.pandas_utils import series_start_end_diff
from core.stats_utils import *
from core.pandas_utils import *
from transform.processed_results.main import *

## Get Data

In [None]:
processed_tss = TeslaProcessedTimeSeries("tesla-fleet-telemetry", force_update=True)
processed_tss['in_charge_idx'] = processed_tss['in_charge_idx'].astype(str)

In [None]:
raw_results = (processed_tss.groupby(["vin", "in_charge_idx"], observed=True, as_index=False).agg(
            ac_energy_added_min=pd.NamedAgg("ac_charge_energy_added", "min"),
            dc_energy_added_min=pd.NamedAgg("dc_charge_energy_added", "min"),
            ac_energy_added_end=pd.NamedAgg("ac_charge_energy_added", "last"),
            dc_energy_added_end=pd.NamedAgg("dc_charge_energy_added", "last"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            net_capacity=pd.NamedAgg("net_capacity", "first"),
            range=pd.NamedAgg("range", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            ac_charging_power=pd.NamedAgg("ac_charging_power", "median"),
            dc_charging_power=pd.NamedAgg("dc_charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
        )
        .eval("charging_power = ac_charging_power + dc_charging_power")
        .eval("ac_energy_added = ac_energy_added_end  - ac_energy_added_min")
        .eval("dc_energy_added = dc_energy_added_end  - dc_energy_added_min")
        .assign(energy_added=lambda df: np.maximum(df["ac_energy_added"].replace([np.nan, -np.nan], 0), df["dc_energy_added"].replace([np.nan, -np.nan], 0)))
        .eval("soh = energy_added / (soc_diff / 100.0 * net_capacity)")
        .eval("level_1 = soc_diff * (charging_power < 8) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(8, 45)) / 100")
        .eval("level_3 = soc_diff * (charging_power > 45) / 100")
        .sort_values(["tesla_code", "vin", "date"]))

In [None]:
SOH_FILTER_EVAL = {
     "tesla-fleet-telemetry-30": "soh = soh.where(soc_diff > 30 & soh.between(0.75, 1.1))",
     "tesla-fleet-telemetry-25": "soh = soh.where(soc_diff > 25 & soh.between(0.75, 1.1))",
     "tesla-fleet-telemetry-20": "soh = soh.where(soc_diff > 20 & soh.between(0.75, 1.1))",
     "tesla-fleet-telemetry-15": "soh = soh.where(soc_diff > 15 & soh.between(0.75, 1.1))",
     "tesla-fleet-telemetry-8": "soh = soh.where(soc_diff > 8 & soh.between(0.75, 1.1))",
     "tesla-fleet-telemetry-5": "soh = soh.where(soc_diff > 5 & soh.between(0.75, 1.1))",
}


In [None]:
def get_processed_results(brand:str) -> DF:
    logger.info(f"{'Processing ' + brand + ' results.':=^{50}}")
    results =  (
        raw_results
        # Some raw estimations may have inf values, this will make mask_out_outliers_by_interquartile_range and force_monotonic_decrease fail
        # So we replace them by NaNs.
        .assign(soh=lambda df: df["soh"].replace([np.inf, -np.inf], np.nan))
        .sort_values(["vin", "date"])
        .pipe(make_charge_levels_presentable)
        .eval(SOH_FILTER_EVAL[brand])
        .pipe(agg_results_by_update_frequency)
        .groupby('vin', observed=True)
        .apply(make_soh_presentable_per_vehicle, include_groups=False)
        .reset_index(level=0)
        #.pipe(filter_results_by_lines_bounds, VALID_SOH_POINTS_LINE_BOUNDS, logger=logger)
        .sort_values(["vin", "date"])
    )
    results["soh"] = results.groupby("vin", observed=True)["soh"].ffill()
    results["soh"] = results.groupby("vin", observed=True)["soh"].bfill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].ffill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].bfill()
    return results

In [None]:
processed_results_30 = get_processed_results('tesla-fleet-telemetry-30')
processed_results_25 = get_processed_results('tesla-fleet-telemetry-25')
processed_results_20 = get_processed_results('tesla-fleet-telemetry-20')
processed_results_15 = get_processed_results('tesla-fleet-telemetry-15')
processed_results_8 = get_processed_results('tesla-fleet-telemetry-8')
processed_results_5 = get_processed_results('tesla-fleet-telemetry-5')


## Exploration

In [None]:
px.scatter(raw_results[(raw_results['soh'] >.7) &(raw_results['soh'] < 1.1)], x='odometer', y='soh', color='vin')

In [None]:
raw_results['tesla_code'].unique().tolist()

In [None]:
px.scatter(processed_results_30[(processed_results_30['soh'] > .75) &(processed_results_30['soh'] < 1.05)].dropna(subset='soh'), x='odometer', y='soh', color='vin')

In [None]:
processed_results_5.dropna(subset='soh').vin.nunique()

In [None]:
px.scatter(processed_results_5.dropna(subset='soh'), x='odometer', y='soh', color='vin')

# Problèmes vin

In [None]:
raw_results.vin.nunique()

**On à 557 vin pour CAPFM**

Pour le moment à des SoH pour ce nombre de vin:

In [None]:
print("nbr de tesla si on filtre les charges à 30:", processed_results_30.dropna(subset='soh').vin.nunique()) 
print("nbr de tesla si on filtre les charges à 25:", processed_results_25.dropna(subset='soh').vin.nunique()) 
print("nbr de tesla si on filtre les charges à 20:", processed_results_20.dropna(subset='soh').vin.nunique()) 
print("nbr de tesla si on filtre les charges à 15:", processed_results_15.dropna(subset='soh').vin.nunique()) 
print("nbr de tesla si on filtre les charges à 8:", processed_results_8.dropna(subset='soh').vin.nunique()) 
print("nbr de tesla si on filtre les charges à 5:", processed_results_5.dropna(subset='soh').vin.nunique()) 

Soit au maximum 33% du nombre de vehicules de la flotte.

## Missing vin

In [None]:
missing_vin =  set(raw_results.vin.unique()) - set(processed_results_5.dropna(subset = 'soh').vin.unique())

In [None]:
raw_results[raw_results['vin'].isin(missing_vin)].groupby('vin', observed=True).agg(
	energy_added=("energy_added", "max"),
 soh=("soh", 'mean'),
 soh_max=('soh', "max"),
 soh_min=('soh', "min"),
 tesla_code=('tesla_code', 'first'),
 net_capacity=('net_capacity', 'first')
)

## Vin sans net_capacity

In [None]:
net_capcity_missing = raw_results.groupby('vin', observed=True).net_capacity.first().isna()
len(net_capcity_missing[net_capcity_missing].index)

Conclusion:

261 vin (soit 49%) sans net_capacité -> on ne peut pas calculer de SoH



### Vin sans charge 

In [None]:
# Vin sans charge 
print(len(set(raw_results[raw_results['soc_diff'] <= 0].vin.unique() )- set(raw_results[raw_results['soc_diff'] > 0].vin.unique() )))
vin_list_sans_charge = set(raw_results[raw_results['soc_diff'] <= 0].vin.unique() )- set(raw_results[raw_results['soc_diff'] > 0].vin.unique() )

In [None]:
px.scatter(raw_results[raw_results['vin'].isin(vin_list_sans_charge)], x='odometer', y="soc_diff", color='vin')

Conclusion:

35 vin n'ont pas de charge. 
Certains n'ont pas d'observation d'une potentielle charge -> pas assez roulé   
D'autres n'ont pas de point lorsque la charge a eu lieu   
De manière générale on dirait qu'on ne récupère plus d'info sur ces vin depuis plusieurs jours/ semaines 

## Vin out of bound 

In [None]:
vin_out_of_bound =( set(raw_results[(raw_results['soh'] >= 1.1) | (raw_results['soh'] <= .8) & (raw_results['soh'] > 0)].vin.unique() ) 
                    - set(raw_results[(raw_results['soh'] < 1.1) & (raw_results['soh'] > .8)].vin.unique() ))

In [None]:
px.scatter(raw_results[raw_results['vin'].isin(vin_out_of_bound)], x='odometer', y='soh', color='vin')

In [None]:
print(len(vin_out_of_bound))
vin_out_of_bound

In [None]:
raw_results[raw_results['vin'].isin(vin_out_of_bound)].groupby('vin', observed=True)["tesla_code"].first().value_counts().head(10)

In [None]:
vin = 'XP7YGCEK2RB502483'

In [None]:
px.scatter(raw_results[raw_results['vin']==vin], x='date', y='soh')

In [None]:
px.scatter(processed_tss[processed_tss['vin']==vin], x='date', y='soc', color='in_charge')

In [None]:
processed_tss[(processed_tss['vin']==vin) & 
              (processed_tss['in_charge_idx']=="2.0")].dropna(subset='soc')[["soc", "charge_energy_added", 'ac_charge_energy_added', 'dc_charge_energy_added',]].dropna()

Conclusion:  
On à 21 vin pour lesquels ont calcul un SoH mais qui n'entre pas dans les bornes. 

### soc diff too small 

In [None]:
vin_less_than_15_soc_point = list(set(raw_results[(raw_results['soh'] < 1.1) & (raw_results['soh'] > .8) &  (raw_results['soc_diff'] <= 8)].vin.unique() ) 
                              - set(raw_results[(raw_results['soh'] < 1.1) & (raw_results['soh'] > .8) &  (raw_results['soc_diff'] > 8)].vin.unique() ) )

In [None]:
len(vin_less_than_15_soc_point)

In [None]:
px.scatter(raw_results[(raw_results['vin'].isin(vin_less_than_15_soc_point)) &(raw_results['soh'] < 1.1) & (raw_results['soh'] > .7)], x='odometer', y='soh', color='vin')

In [None]:
processed_tss[processed_tss['vin'].isin(vin_less_than_15_soc_point)].groupby('vin', observed=True)['tesla_code'].first().value_counts().head(10)

## Nop energy_added

In [None]:
no_energy_vin = raw_results[(raw_results['vin'].isin(missing_vin)) & (raw_results['net_capacity']>0)].groupby('vin', observed=True).agg(
	energy_added=("energy_added", "max"),
)

In [None]:
no_energy_vin[no_energy_vin['energy_added']==0].shape