In [None]:
%load_ext autoreload
%autoreload 2

## Notebook pour tester le pipe de fleet-telemetry

### Raw tss

In [None]:
from core.s3_utils import *
from transform.raw_tss.tesla_raw_tss import *
import pandas as pd
import plotly.express as px
from transform.processed_tss.config import *
from core.constants import *
from transform.raw_tss.config import *
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries
from transform.raw_results.tesla_fleet_telemetry import get_results as get_results_origin
from transform.processed_results.main import get_processed_results



In [None]:

S3_RAW_TSS_KEY_FORMAT = "raw_ts/{brand}/time_series/raw_tss.parquet"
TESLA_RAW_TSS_KEY = S3_RAW_TSS_KEY_FORMAT.format(brand="tesla")

In [None]:
s3 = S3_Bucket()

In [None]:
def get_raw_tss(bucket: S3_Bucket = S3_Bucket()) -> DF:
    logger.debug("Getting raw tss from responses provided by tesla fleet telemetry.")
    keys = get_response_keys_to_parse(bucket)
    new_raw_tss = get_raw_tss_from_keys(keys, bucket)
    if bucket.check_file_exists(FLEET_TELEMETRY_RAW_TSS_KEY):
        return concat([bucket.read_parquet_df(FLEET_TELEMETRY_RAW_TSS_KEY), new_raw_tss])
    else:
        return new_raw_tss

In [None]:
raw_tss.columns

In [None]:
px.scatter(raw_tss[raw_tss["vin"] == "LRWYGCFS6PC992837"], x="readable_date", y="ACChargingEnergyIn_stringValue")

In [None]:
raw_tss = get_raw_tss(s3)

### Processed tss

In [None]:
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries

In [None]:

class TeslaProcessedTimeSeries(ProcessedTimeSeries):

    def __init__(self, make:str="tesla", id_col:str="vin", log_level:str="INFO", max_td:TD=MAX_TD, force_update:bool=False, **kwargs):
        self.logger = getLogger(make)
        set_level_of_loggers_with_prefix(log_level, make)
        super().__init__(make, id_col, log_level, max_td, force_update, **kwargs)

    def compute_charge_n_discharge_vars(self, tss:DF) -> DF:
        return (
            tss
            .pipe(self.compute_charge_n_discharge_masks)
            .pipe(self.compute_charge_idx)
            .pipe(self.compute_idx_from_masks, ["in_discharge"])
            .pipe(self.trim_leading_n_trailing_soc_off_masks, ["in_charge", "in_discharge"])
            .pipe(self.compute_idx_from_masks, ["trimmed_in_charge", "trimmed_in_discharge"])
        )

    def compute_charge_n_discharge_masks(self, tss:DF) -> DF:
        self.logger.debug("Computing tesla specific charge and discharge masks")
        # We use a nullable boolean Series to represnet the rows where:
        tss["nan_charging"] = (
            Series(pd.NA, index=tss.index, dtype="boolean")# We are not sure of anything.
            .mask(tss["charging_status"].isin(IN_CHARGE_CHARGING_STATUS_VALS), True)# We are sure that the vehicle is in charge.
            .mask(tss["charging_status"].isin(IN_DISCHARGE_CHARGING_STATUS_VALS), False)# We are sure that the vehicle is not in charge.
        )
        # If a period of uncertainty (NaN) is surrounded by equal periods of certainties (True-NaN-True or False-NaN-False),
        # We will fill them to the value of these certainties.
        # However there are edge cases that have multiple days of uncertainties periods (I can't find the VIN but I'm sure you can ;-) )
        # Interestingly enough the charge_energy_added variable does not get forwared that far and gets reset to zero. 
        # This would create outliers in our charge SoH estimation as we estimate the energy_gained as the diff between the last(0) and first value of charge_energy_added.
        # So we set a maximal uncertainty period duration over which we don't fill it.
        tss["nan_date"] = tss["date"].mask(tss["nan_charging"].isna())
        tss[["ffill_charging", "ffill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].ffill()
        tss[["bfill_charging", "bfill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].bfill()
        nan_period_duration:Series = tss.eval("bfill_date - ffill_date")
        fill_unknown_period = tss.eval("ffill_charging.eq(bfill_charging) & @nan_period_duration.le(@MAX_CHARGE_TD)")
        tss["nan_charging"] = tss["nan_charging"].mask(fill_unknown_period, tss["ffill_charging"])
        # As mentioned before, the SoC oscillates at [charge_limit_soc - ~3%, charge_limit_soc] so we set these periods to NaN as well.
        tss["nan_charging"] = tss["nan_charging"].mask(tss["soc"] >= (tss["charge_limit_soc"] - 3))
        # Then we seperate the Series into two, more explicit, columns.
        tss["in_charge"] = tss.eval("nan_charging.notna() & nan_charging")
        tss["in_discharge"] = tss.eval("nan_charging.notna() & ~nan_charging")
        return tss.drop(columns=["nan_charging", "ffill_charging", "bfill_charging", "ffill_date", "bfill_date"])
    
    def compute_enenergy_added(self, tss:DF) -> DF:
        tss['charge_energy_added'] = tss['dc_charge_energy_added'].where(
            tss['dc_charge_energy_added'].notnull() & 
            (tss['dc_charge_energy_added'] > 0), 
            tss['ac_charge_energy_added'])
        return tss
    
    def compute_charge_idx(self, tss:DF) -> DF:
        self.logger.debug("Computing tesla specific charge index.")
        if self.make == 'tesla-fleet-telemetry':
            tss = tss.pipe(self.compute_enenergy_added)
        tss_grp = tss.groupby("vin", observed=False)
        tss["charge_energy_added"] = tss_grp["charge_energy_added"].ffill()
        energy_added_over_time = tss_grp['charge_energy_added'].diff().div(tss["sec_time_diff"].values)
        # charge_energy_added is cummulative and forward filled, 
        # We check that the charge_energy_added decreases too fast to make sure that  correctly indentify two charging periods before and after a gap as two separate charging periods.
        new_charge_mask = energy_added_over_time.lt(MIN_POWER_LOSS, fill_value=0) 
        # For the same reason, we ensure that there are no gaps bigger than MAX_CHARGE_TD in between to rows of the same charging period.
        new_charge_mask |= tss["time_diff"].gt(MAX_CHARGE_TD) 
        # And of course we also check that there is no change of status. 
        new_charge_mask |= (~tss_grp["in_charge"].shift() & tss["in_charge"]) 
        tss["in_charge_idx"] = new_charge_mask.groupby(tss["vin"], observed=True).cumsum()
        print(tss["in_charge_idx"].count() / len(tss))
        tss["in_charge_idx"] = tss["in_charge_idx"].fillna(-1).astype("uint16")
        return tss

In [None]:
processed_tss = TeslaProcessedTimeSeries("tesla-fleet-telemetry", force_update=True, filters=[("trimmed_in_charge", "==", True)])

In [None]:
processed_tss[processed_tss['vin'] == "LRWYGCFS6PC992837"]

### raw results

In [None]:
raw_results = (processed_tss.groupby(["vin", "trimmed_in_charge_idx"], observed=True, as_index=False).agg(
            ac_energy_added_min=pd.NamedAgg("ac_charge_energy_added", "min"),
            dc_energy_added_min=pd.NamedAgg("dc_charge_energy_added", "min"),
            ac_energy_added_end=pd.NamedAgg("ac_charge_energy_added", "last"),
            dc_energy_added_end=pd.NamedAgg("dc_charge_energy_added", "last"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            net_capacity=pd.NamedAgg("net_capacity", "first"),
            range=pd.NamedAgg("range", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            ac_charging_power=pd.NamedAgg("ac_charging_power", "median"),
            dc_charging_power=pd.NamedAgg("dc_charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
        )
        .eval("charging_power = ac_charging_power + dc_charging_power")
        .eval("ac_energy_added = ac_energy_added_end  - ac_energy_added_min")
        .eval("dc_energy_added = dc_energy_added_end  - dc_energy_added_min")
        .assign(energy_added=lambda df: np.minimum(df["ac_energy_added"], df["dc_energy_added"]))
        .eval("soh = energy_added / (soc_diff / 100.0 * net_capacity)")
        .eval("level_1 = soc_diff * (charging_power < 8) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(8, 45)) / 100")
        .eval("level_3 = soc_diff * (charging_power > 45) / 100")
        .sort_values(["tesla_code", "vin", "date"]))

In [None]:
raw_results_origin = get_results_origin(force_update=False)

In [None]:
raw_results.columns

In [None]:
raw_results[raw_results['vin'] == "LRWYGCFS6PC992837"]

### Processed results

In [None]:
from transform.processed_results.main import *

In [None]:
def get_processed_results(brand:str) -> DF:
    logger.info(f"{'Processing ' + brand + ' results.':=^{50}}")
    results =  (
        raw_results
        # Some raw estimations may have inf values, this will make mask_out_outliers_by_interquartile_range and force_monotonic_decrease fail
        # So we replace them by NaNs.
        .assign(soh=lambda df: df["soh"].replace([np.inf, -np.inf], np.nan))
        .sort_values(["vin", "date"])
        .pipe(make_charge_levels_presentable)
        .eval(SOH_FILTER_EVAL_STRINGS[brand])
        .pipe(agg_results_by_update_frequency)
        .groupby('vin', observed=True)
        .apply(make_soh_presentable_per_vehicle, include_groups=False)
        .reset_index(level=0)
        .pipe(filter_results_by_lines_bounds, VALID_SOH_POINTS_LINE_BOUNDS, logger=logger)
        .sort_values(["vin", "date"])
    )
    results["soh"] = results.groupby("vin", observed=True)["soh"].ffill()
    results["soh"] = results.groupby("vin", observed=True)["soh"].bfill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].ffill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].bfill()
    return results

In [None]:
processed_results = get_processed_results('tesla-fleet-telemetry')

In [None]:
(22.9) / (38.950089 / 100.0 * 60)

In [None]:
raw_results[raw_results['vin']=="LRWYGCFS6PC992837"]

In [None]:
pd.set_option('display.max_rows', 10000)


In [None]:
processed_tss[(processed_tss['vin']=="LRWYGCFS6PC992837") & 
              (processed_tss['trimmed_in_charge_idx']) > 0][['date', 'soc', 'odometer','dc_charge_energy_added',"ac_charge_energy_added", 'trimmed_in_charge_idx']].dropna(subset='soc')

In [None]:
processed_tss['trimmed_in_charge_idx'] = processed_tss['trimmed_in_charge_idx'].astype('int')

In [None]:
processed_tss[(processed_tss['vin']=="LRWYGCFS6PC992837")]['trimmed_in_charge_idx'].max() > 1