In [None]:
%load_ext autoreload
%autoreload 2

## Notebook pour tester le pipe de fleet-telemetry

### Raw tss

In [None]:
from core.s3_utils import *
from transform.raw_tss.fleet_telemetry_raw_tss import *
import pandas as pd
import plotly.express as px
from transform.processed_tss.config import *
from core.constants import *
from transform.raw_tss.config import *
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries
from transform.raw_results.tesla_fleet_telemetry import get_results as get_results_origin
from transform.processed_results.main import get_processed_results
from datetime import timedelta

import numpy as np

In [None]:
S3_RAW_TSS_KEY_FORMAT = "raw_ts/{brand}/time_series/raw_tss.parquet"
FLEET_TELEMETRY_RAW_TSS_KEY = S3_RAW_TSS_KEY_FORMAT.format(brand="tesla-fleet-telemetry")

In [None]:
s3 = S3_Bucket()

In [None]:
def get_response_keys_to_parse(bucket:S3_Bucket) -> DF:
    if bucket.check_file_exists(FLEET_TELEMETRY_RAW_TSS_KEY):
        raw_tss_subset, _ = bucket.read_parquet_df_ter(FLEET_TELEMETRY_RAW_TSS_KEY, columns=["vin", "readable_date"])
    else:
        raw_tss_subset = DEFAULT_TESLA_RAW_TSS_DF
    last_parsed_date = (
        raw_tss_subset
        .groupby("vin", observed=True)
        # Use "max" instead of "last" as the keys are not sorted
        .agg(last_parsed_date=pd.NamedAgg("readable_date", "max"))
    ).compute()
    return (
        bucket.list_responses_keys_of_brand("tesla-fleet-telemetry")
        .assign(date=lambda df: df["file"].str[:-5].astype("datetime64[ns]"))
        .merge(last_parsed_date, "outer", "vin")
        .query("last_parsed_date.isna() | date > last_parsed_date")
    )

In [None]:
def get_raw_tss(bucket: S3_Bucket = S3_Bucket()) -> DF:
    logger.debug("Getting raw tss from responses provided by tesla fleet telemetry.")
    keys = get_response_keys_to_parse(bucket)
    if bucket.check_file_exists(FLEET_TELEMETRY_RAW_TSS_KEY):
        raw_tss = bucket.read_parquet_df_ter(FLEET_TELEMETRY_RAW_TSS_KEY)
        new_raw_tss = get_raw_tss_from_keys(keys, bucket)
        return concat([raw_tss, new_raw_tss])
    else:
        new_raw_tss = get_raw_tss_from_keys(keys, bucket)
        return new_raw_tss

In [None]:
raw_tss, path = s3.read_parquet_df_ter('raw_ts/tesla-fleet-telemetry/time_series/raw_tss.parquet')

In [None]:
raw_tss.to_parquet("test_raw_tss.parquet")

In [None]:
raw_tss = dd.read_parquet('test_raw_tss.parquet')

In [None]:
last_parsed_date = raw_tss.groupby('vin', observed=True).agg(last_parsed_date=pd.NamedAgg("readable_date", "max")).compute()

In [None]:

keys_to_parse = s3.list_responses_keys_of_brand("tesla-fleet-telemetry").assign(date=lambda df: df["file"].str[:-5].astype("datetime64[ns]"))

In [None]:
keys_to_parse.date.max()

In [None]:
keys = keys_to_parse.merge(last_parsed_date, "outer", "vin")

In [None]:
raw_tss['readable_date'].max().compute()

In [None]:
raw_tss = []
grouped = keys.groupby(pd.Grouper(key='date', freq='W-MON'))
grouped_items = list(grouped)[:1]
for week, week_keys in track(grouped_items, description="Processing weekly groups"):
    week_date = week.date().strftime('%Y-%m-%d')
    logger.debug(f"Parsing the responses of the week {week_date}:")
    logger.debug(f"{len(week_keys)} keys to parse for {week_keys['vin'].nunique()} vins.")
    logger.debug(f"This represents {round(len(week_keys) / len(keys) * 100)}% of the total keys to parse.")
    responses = s3.read_multiple_json_files(week_keys["key"].tolist(), max_workers=64)
    logger.debug(f"Read the responses.")
    with ThreadPoolExecutor(max_workers=64) as executor:
        week_raw_tss = list(executor.map(DF.from_records, responses))
    logger.debug(f"Parsed the responses:")
    week_raw_tss = pd.concat([explode_data(df) for df in week_raw_tss])
    logger.debug(f"Concatenated the responses into a single DF.")
    raw_tss.append(week_raw_tss)
    logger.debug("")

In [None]:
raw_tss

### Processed tss

In [None]:
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries

In [None]:
def compute_charge_n_discharge_masks(tss:DF) -> DF:
        # We use a nullable boolean Series to represnet the rows where:
        tss["nan_charging"] = (
            Series(pd.NA, index=tss.index, dtype="boolean")# We are not sure of anything.
            .mask(tss["charging_status"].isin(IN_CHARGE_CHARGING_STATUS_VALS), True)# We are sure that the vehicle is in charge.
            .mask(tss["charging_status"].isin(IN_DISCHARGE_CHARGING_STATUS_VALS), False)# We are sure that the vehicle is not in charge.
        )
        # If a period of uncertainty (NaN) is surrounded by equal periods of certainties (True-NaN-True or False-NaN-False),
        # We will fill them to the value of these certainties.
        # However there are edge cases that have multiple days of uncertainties periods (I can't find the VIN but I'm sure you can ;-) )
        # Interestingly enough the charge_energy_added variable does not get forwared that far and gets reset to zero. 
        # This would create outliers in our charge SoH estimation as we estimate the energy_gained as the diff between the last(0) and first value of charge_energy_added.
        # So we set a maximal uncertainty period duration over which we don't fill it.
        tss["nan_date"] = tss["date"].mask(tss["nan_charging"].isna())
        tss[["ffill_charging", "ffill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].ffill()
        tss[["bfill_charging", "bfill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].bfill()
        nan_period_duration:Series = tss.eval("bfill_date - ffill_date")
        fill_unknown_period = tss.eval("ffill_charging.eq(bfill_charging) & @nan_period_duration.le(@MAX_CHARGE_TD)")
        tss["nan_charging"] = tss["nan_charging"].mask(fill_unknown_period, tss["ffill_charging"])
        # As mentioned before, the SoC oscillates at [charge_limit_soc - ~3%, charge_limit_soc] so we set these periods to NaN as well.
        tss["nan_charging"] = tss["nan_charging"].mask(tss["soc"] >= (tss["charge_limit_soc"] - 3))
        # Then we seperate the Series into two, more explicit, columns.
        tss["in_charge"] = tss.eval("nan_charging.notna() & nan_charging")
        tss["in_discharge"] = tss.eval("nan_charging.notna() & ~nan_charging")
        return tss.drop(columns=["nan_charging", "ffill_charging", "bfill_charging", "ffill_date", "bfill_date"])
    
def compute_enenergy_added(tss:DF) -> DF:
        tss['charge_energy_added'] = tss['dc_charge_energy_added'].where(
            tss['dc_charge_energy_added'].notnull() & 
            (tss['dc_charge_energy_added'] > 0), 
            tss['ac_charge_energy_added'])
        return tss
    
def compute_charge_idx_bis(tss):

        tss = tss.pipe(compute_enenergy_added)
        tss_na = tss.dropna(subset=['soc']).copy()
        tss_na['soc_diff'] = tss_na.groupby('vin', observed=True)['soc'].diff()
        tss_na['soc_diff_rolling'] = tss_na['soc_diff'].rolling(window=5, min_periods=1).mean()
        # Determine trend
        tss_na['trend'] = tss_na['soc_diff_rolling'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else np.nan)
        tss_na['trend'] = tss_na['trend'].ffill()

        def detect_trend_change(group):

            group['prev_trend'] = group['trend'].shift(1)
            group['prev_prev_trend'] = group['trend'].shift(2)
            
            group['prev_date'] = group['date'].shift(1)
            group['time_diff_min'] = (group['date'] - group['prev_date']).dt.total_seconds() / 60
            group['time_gap'] = group['time_diff_min'] > 60  

            # Faire une sépration charge_idx et discharge_idx
            group['trend_change'] = (
                (((group['trend'] != group['prev_trend']) & 
                  (group['prev_trend'] == group['prev_prev_trend']) ) |
                group['time_gap'])
            )
            group.loc[group.index[0:2], 'trend_change'] = False
            return group


        tss_na = tss_na.groupby('vin', observed=True).apply(detect_trend_change).reset_index(drop=True)
        
        # Compute charge id
        tss_na['in_charge_idx'] = tss_na.groupby('vin',  observed=True)['trend_change'].cumsum()
        tss = tss.merge(tss_na[["soc", "date", "vin", 'soc_diff', 'in_charge_idx', 'trend', 'prev_trend', 'prev_prev_trend', 'trend_change',]], 
                        on=["soc", "date", "vin"], how="left")
        tss[["odometer","in_charge_idx"]] = tss[["odometer", "in_charge_idx"]].ffill()
        return tss

In [None]:
from core.pandas_utils import safe_locate, safe_astype, str_lower_columns
from transform.fleet_info.main import fleet_info 

In [None]:
make = "tesla-fleet-telemetry"
def run(tss):
    tss = tss.rename(columns=RENAME_COLS_DICT, errors="ignore")
    tss = tss.pipe(safe_locate, col_loc=list(COL_DTYPES.keys()), logger=logger)
    tss = tss.pipe(safe_astype, COL_DTYPES, logger=logger)
    tss = tss.pipe(normalize_units_to_metric)
    tss = tss.sort_values(by=["vin", "date"])
    tss = tss.pipe(str_lower_columns, COLS_TO_STR_LOWER)
    tss = tss.pipe(compute_date_vars)
    tss = tss.pipe(compute_charge_n_discharge_vars)
    tss = tss.merge(fleet_info, on="vin", how="left")
    tss = tss.eval("age = date.dt.tz_localize(None) - start_date.dt.tz_localize(None)")
    # It seems that the reset_index calls doesn't reset the "vin" into a category if the groupby's by argument was categorical.
    # So we recall astype on the "vin"  in case it is supposed to be categorical.
    tss = tss.astype({"vin": COL_DTYPES["vin"]})
    return tss

def compute_charge_n_discharge_vars(tss:DF) -> DF:
    return (
        tss
        # Compute the in_charge and in_discharge masks 
        .pipe(compute_charge_n_discharge_masks, IN_CHARGE_CHARGING_STATUS_VALS, IN_DISCHARGE_CHARGING_STATUS_VALS)
        # Compute the correspding indices to perfrom split-apply-combine ops
        .pipe(compute_idx_from_masks, ["in_charge", "in_discharge"])
        # We recompute the masks by trimming off the points that have the first and last soc values
        # This is done to reduce the noise in the output due to measurments noise.
        .pipe(trim_leading_n_trailing_soc_off_masks, ["in_charge", "in_discharge"]) 
        .pipe(compute_idx_from_masks, ["trimmed_in_charge", "trimmed_in_discharge"])
        .pipe(compute_cum_var, "power", "cum_energy")
        .pipe(compute_cum_var, "charger_power", "cum_charge_energy_added")
        .pipe(compute_status_col)
    )

def normalize_units_to_metric( tss:DF) -> DF:
    tss["odometer"] = tss["odometer"] * ODOMETER_MILES_TO_KM.get(make, 1)
    return tss
from scipy.integrate import cumulative_trapezoid
def compute_cum_var( tss: DF, var_col:str, cum_var_col:str) -> DF:
    if not var_col in tss.columns:
        logger.debug(f"{var_col} not found, not computing {cum_var_col}.")
        return tss
    logger.debug(f"Computing {cum_var_col} from {var_col}.")
    tss[cum_var_col] = (
        cumulative_trapezoid(
            # Leave the keywords as default order is y x not x y (-_-)
            # Make sure that date time units are in seconds before converting to int
            x=tss["date"].dt.as_unit("s").astype(int),
            y=tss[var_col].fillna(0).values,
            initial=0,
        )            
        .astype("float32")
    )
    tss[cum_var_col] *= KJ_TO_KWH # Convert from kj to kwh
    # Reset value to zero at the start of each vehicle time series
    # This is better than performing a groupby.apply with cumulative_trapezoid
    tss[cum_var_col] -= tss.groupby("vin", observed=True)[cum_var_col].transform("first")
    return tss

def compute_date_vars( tss:DF) -> DF:
    tss["time_diff"] = tss.groupby("vin", observed=False)["date"].diff()
    tss["sec_time_diff"] = tss["time_diff"].dt.total_seconds()
    return tss

def compute_charge_n_discharge_masks(tss:DF, in_charge_vals:list, in_discharge_vals:list) -> DF:
    """Computes the `in_charge` and `in_discharge` masks either from the charging_status column or from the evolution of the soc over time."""
    if make in CHARGE_MASK_WITH_CHARGING_STATUS_MAKES:
        return charge_n_discharging_masks_from_charging_status(tss, in_charge_vals, in_discharge_vals)
    if make in CHARGE_MASK_WITH_SOC_DIFFS_MAKES:
        return charge_n_discharging_masks_from_soc_diff(tss)
    raise ValueError(MAKE_NOT_SUPPORTED_ERROR.format(make=make))

def charge_n_discharging_masks_from_soc_diff( tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=True)
    tss["soc_ffilled"] = tss_grp["soc"].ffill()
    tss["soc_diff"] = tss_grp["soc_ffilled"].diff()
    tss["soc_diff"] /= tss["soc_diff"].abs()
    soc_diff_ffilled = tss_grp["soc_diff"].ffill()
    soc_diff_bfilled = tss_grp["soc_diff"].bfill()
    tss["in_charge"] = soc_diff_ffilled.gt(0, fill_value=False) & soc_diff_bfilled.gt(0, fill_value=False)
    tss["in_discharge"] = soc_diff_ffilled.lt(0, fill_value=False) & soc_diff_bfilled.lt(0, fill_value=False)
    return tss

def charge_n_discharging_masks_from_charging_status( tss:DF, in_charge_vals:list, in_discharge_vals:list) -> DF:
    assert "charging_status" in tss.columns, NO_CHARGING_STATUS_COL_ERROR
    return (
        tss
        .eval(f"in_charge = charging_status in {in_charge_vals}")
        .eval(f"in_discharge = charging_status in {in_discharge_vals}")
    )

def trim_leading_n_trailing_soc_off_masks( tss:DF, masks:list[str]) -> DF:
    for mask in masks:
        tss["naned_soc"] = tss["soc"].where(tss[mask])
        soc_grp = tss.groupby(["vin", mask + "_idx"], observed=True)["naned_soc"]
        trailing_soc = soc_grp.transform("first")
        leading_soc = soc_grp.transform("last")
        tss["trailing_soc"] = trailing_soc
        tss["leading_soc"] = leading_soc
        tss[f"trimmed_{mask}"] = tss[mask] & (tss["soc"] != trailing_soc) & (tss["soc"] != leading_soc)
    tss = tss.drop(columns="naned_soc")
    return tss
max_td = TD(hours=1, minutes=30)
def compute_idx_from_masks( tss: DF, masks:list[str]) -> DF:
    for mask in masks:
        idx_col_name = f"{mask}_idx"
        shifted_mask = tss.groupby("vin", observed=True)[mask].shift(fill_value=False)
        tss["new_period_start_mask"] = shifted_mask.ne(tss[mask]) 
        if max_td is not None:
            tss["new_period_start_mask"] |= (tss["time_diff"] > max_td)
        tss[idx_col_name] = tss.groupby("vin", observed=True)["new_period_start_mask"].cumsum().astype("uint16")
        tss.drop(columns=["new_period_start_mask"], inplace=True)
    return tss

def compute_status_col( tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=True)
    status = tss["in_charge"].map({True: "charging", False:"discharging", pd.NA:"unknown"})
    tss["status"] = status.mask(
        tss["in_charge"].eq(False, fill_value=True),
        np.where(tss_grp["odometer"].diff() > 0, "moving", "idle_discharging"),
    )
    return tss

In [None]:
processed_tss = run(raw_tss)

In [None]:

class TeslaProcessedTimeSeries(ProcessedTimeSeries):

    def __init__(self, make:str="tesla", id_col:str="vin", log_level:str="INFO", max_td:TD=MAX_TD, force_update:bool=False, **kwargs):
        self.logger = getLogger(make)
        set_level_of_loggers_with_prefix(log_level, make)
        super().__init__(make, id_col, log_level, max_td, force_update, **kwargs)

    def compute_charge_n_discharge_vars(self, tss:DF) -> DF:
        return (
            tss
            .pipe(self.compute_charge_n_discharge_masks)
            .pipe(self.compute_charge_idx_bis)
            # .pipe(self.compute_idx_from_masks, ["in_charge"])
            # .pipe(self.trim_leading_n_trailing_soc_off_masks, ["in_charge", "in_discharge"])
            # # .pipe(self.compute_idx_from_masks, ["trimmed_in_charge", "trimmed_in_discharge"])
        )

    def compute_charge_n_discharge_masks(self, tss:DF) -> DF:
        self.logger.debug("Computing tesla specific charge and discharge masks")
        # We use a nullable boolean Series to represnet the rows where:
        tss["nan_charging"] = (
            Series(pd.NA, index=tss.index, dtype="boolean")# We are not sure of anything.
            .mask(tss["charging_status"].isin(IN_CHARGE_CHARGING_STATUS_VALS), True)# We are sure that the vehicle is in charge.
            .mask(tss["charging_status"].isin(IN_DISCHARGE_CHARGING_STATUS_VALS), False)# We are sure that the vehicle is not in charge.
        )
        # If a period of uncertainty (NaN) is surrounded by equal periods of certainties (True-NaN-True or False-NaN-False),
        # We will fill them to the value of these certainties.
        # However there are edge cases that have multiple days of uncertainties periods (I can't find the VIN but I'm sure you can ;-) )
        # Interestingly enough the charge_energy_added variable does not get forwared that far and gets reset to zero. 
        # This would create outliers in our charge SoH estimation as we estimate the energy_gained as the diff between the last(0) and first value of charge_energy_added.
        # So we set a maximal uncertainty period duration over which we don't fill it.
        tss["nan_date"] = tss["date"].mask(tss["nan_charging"].isna())
        tss[["ffill_charging", "ffill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].ffill()
        tss[["bfill_charging", "bfill_date"]] = tss.groupby("vin", observed=True)[["nan_charging", "nan_date"]].bfill()
        nan_period_duration:Series = tss.eval("bfill_date - ffill_date")
        fill_unknown_period = tss.eval("ffill_charging.eq(bfill_charging) & @nan_period_duration.le(@MAX_CHARGE_TD)")
        tss["nan_charging"] = tss["nan_charging"].mask(fill_unknown_period, tss["ffill_charging"])
        # As mentioned before, the SoC oscillates at [charge_limit_soc - ~3%, charge_limit_soc] so we set these periods to NaN as well.
        tss["nan_charging"] = tss["nan_charging"].mask(tss["soc"] >= (tss["charge_limit_soc"] - 3))
        # Then we seperate the Series into two, more explicit, columns.
        tss["in_charge"] = tss.eval("nan_charging.notna() & nan_charging")
        tss["in_discharge"] = tss.eval("nan_charging.notna() & ~nan_charging")
        return tss.drop(columns=["nan_charging", "ffill_charging", "bfill_charging", "ffill_date", "bfill_date"])
    
    def compute_charge_n_discharge_masks_bis(self, tss:DF) -> DF:
        self.logger.debug("Computing tesla specific charge and discharge masks")

        tss_na = tss.dropna(subset=['soc']).copy()

        tss_na['soc_diff'] = tss_na.groupby('vin', observed=True)['soc'].diff()

        tss_na['trend'] = tss_na['soc_diff'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0)

        #tss_na['trend_change'] = tss_na.groupby('vin', observed=True)['trend'].transform(lambda x: x != x.shift())
        tss = tss.merge(tss_na[["soc", "date", "vin", 'soc_diff', 'trend']], 
                        on=["soc", "date", "vin"], how="left")
        tss[["trend", "soc", "odometer",]].bfill(inplace=True)
        tss["in_charge"] = tss.eval('trend==1')
        tss["in_discharge"] = tss.eval('trend==-1')
        return tss
    
    
    def compute_enenergy_added(self, tss:DF) -> DF:
        tss['charge_energy_added'] = tss['dc_charge_energy_added'].where(
            tss['dc_charge_energy_added'].notnull() & 
            (tss['dc_charge_energy_added'] > 0), 
            tss['ac_charge_energy_added'])
        return tss
    
    # def compute_charge_idx(self, tss:DF) -> DF:
    #     self.logger.debug("Computing tesla specific charge index.")
    #     if self.make == 'tesla-fleet-telemetry':
    #         tss = tss.pipe(self.compute_enenergy_added)
    #     tss_grp = tss.groupby("vin", observed=False)
    #     tss["charge_energy_added"] = tss_grp["charge_energy_added"].ffill()
    #     energy_added_over_time = tss_grp['charge_energy_added'].diff().div(tss["sec_time_diff"].values)
    #     # charge_energy_added is cummulative and forward filled, 
    #     # We check that the charge_energy_added decreases too fast to make sure that  correctly indentify two charging periods before and after a gap as two separate charging periods.
    #     new_charge_mask = energy_added_over_time.lt(MIN_POWER_LOSS, fill_value=0) 
    #     # For the same reason, we ensure that there are no gaps bigger than MAX_CHARGE_TD in between to rows of the same charging period.
    #     new_charge_mask |= tss["time_diff"].gt(MAX_CHARGE_TD) 
    #     # And of course we also check that there is no change of status. 
    #     new_charge_mask |= (~tss_grp["in_charge"].shift().bfill() & tss["in_charge"]) 
    #     tss["in_charge_idx"] = new_charge_mask.groupby(tss["vin"], observed=True).cumsum()
    #     print(tss["in_charge_idx"].count() / len(tss))
    #     tss["in_charge_idx"] = tss["in_charge_idx"].fillna(-1).astype("uint16")
    #     return tss
    
    def compute_charge_idx_bis(self, tss):

        if self.make == 'tesla-fleet-telemetry':
                    tss = tss.pipe(self.compute_enenergy_added)
        tss_na = tss.dropna(subset=['soc']).copy()
        tss_na['soc_diff'] = tss_na.groupby('vin', observed=True)['soc'].diff()
        tss_na['soc_diff_rolling'] = tss_na['soc_diff'].rolling(window=5, min_periods=1).mean()
        # Determine trend
        tss_na['trend'] = tss_na['soc_diff_rolling'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else np.nan)
        tss_na['trend'] = tss_na['trend'].ffill()

        def detect_trend_change(group):

            group['prev_trend'] = group['trend'].shift(1)
            group['prev_prev_trend'] = group['trend'].shift(2)
            
            group['prev_date'] = group['date'].shift(1)
            group['time_diff_min'] = (group['date'] - group['prev_date']).dt.total_seconds() / 60
            group['time_gap'] = group['time_diff_min'] > 60  

            # Faire une sépration charge_idx et discharge_idx
            group['trend_change'] = (
                (((group['trend'] != group['prev_trend']) & 
                  (group['prev_trend'] == group['prev_prev_trend']) ) |
                group['time_gap'])
            )
            group.loc[group.index[0:2], 'trend_change'] = False
            return group


        tss_na = tss_na.groupby('vin', observed=True).apply(detect_trend_change).reset_index(drop=True)
        
        # Compute charge id
        tss_na['in_charge_idx'] = tss_na.groupby('vin',  observed=True)['trend_change'].cumsum()
        tss = tss.merge(tss_na[["soc", "date", "vin", 'soc_diff', 'in_charge_idx', 'trend', 'prev_trend', 'prev_prev_trend', 'trend_change',]], 
                        on=["soc", "date", "vin"], how="left")
        tss[["odometer","in_charge_idx"]] = tss[["odometer", "in_charge_idx"]].ffill()
        return tss


In [None]:
processed_tss = TeslaProcessedTimeSeries("tesla-fleet-telemetry", force_update=True)
processed_tss['in_charge_idx'] = processed_tss['in_charge_idx'].astype(str)


### raw results

In [None]:
from core.stats_utils import series_start_end_diff

In [None]:
raw_results = (processed_tss.groupby(["vin", "in_charge_idx"], observed=True, as_index=False).agg(
            ac_energy_added_min=pd.NamedAgg("ac_charge_energy_added", "min"),
            dc_energy_added_min=pd.NamedAgg("dc_charge_energy_added", "min"),
            ac_energy_added_end=pd.NamedAgg("ac_charge_energy_added", "last"),
            dc_energy_added_end=pd.NamedAgg("dc_charge_energy_added", "last"),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            net_capacity=pd.NamedAgg("net_capacity", "first"),
            range=pd.NamedAgg("range", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            ac_charging_power=pd.NamedAgg("ac_charging_power", "median"),
            dc_charging_power=pd.NamedAgg("dc_charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
        )
        .eval("charging_power = ac_charging_power + dc_charging_power")
        .eval("ac_energy_added = ac_energy_added_end  - ac_energy_added_min")
        .eval("dc_energy_added = dc_energy_added_end  - dc_energy_added_min")
        .assign(energy_added=lambda df: np.maximum(df["ac_energy_added"], df["dc_energy_added"]))
        .eval("soh = energy_added / (soc_diff / 100.0 * net_capacity)")
        .eval("level_1 = soc_diff * (charging_power < 8) / 100")
        .eval("level_2 = soc_diff * (charging_power.between(8, 45)) / 100")
        .eval("level_3 = soc_diff * (charging_power > 45) / 100")
        .sort_values(["tesla_code", "vin", "date"]))

In [None]:
25.788389 / (40.029564 / 100 * 60)

In [None]:
raw_results[(raw_results['vin']=="LRWYGCFS6PC992837")][['soh', 'odometer', 'soc_diff', "energy_added"]]

In [None]:
# pour repartir de ce qui est stocké
#raw_results_origin = get_results_origin(force_update=False)

### Processed results

In [None]:
from transform.processed_results.main import *

In [None]:
SOH_FILTER_EVAL = {
     "tesla-fleet-telemetry-30": "soh = soh.where(soc_diff > 30 & soh.between(0.75, 1.05))",
     "tesla-fleet-telemetry-25": "soh = soh.where(soc_diff > 25 & soh.between(0.75, 1.05))",
     "tesla-fleet-telemetry-20": "soh = soh.where(soc_diff > 20 & soh.between(0.75, 1.05))",
     "tesla-fleet-telemetry-15": "soh = soh.where(soc_diff > 15 & soh.between(0.75, 1.05))",
     "tesla-fleet-telemetry-8": "soh = soh.where(soc_diff > 8 & soh.between(0.75, 1.05))",
}


In [None]:
def get_processed_results(brand:str) -> DF:
    logger.info(f"{'Processing ' + brand + ' results.':=^{50}}")
    results =  (
        raw_results
        # Some raw estimations may have inf values, this will make mask_out_outliers_by_interquartile_range and force_monotonic_decrease fail
        # So we replace them by NaNs.
        .assign(soh=lambda df: df["soh"].replace([np.inf, -np.inf], np.nan))
        .sort_values(["vin", "date"])
        .pipe(make_charge_levels_presentable)
        .eval(SOH_FILTER_EVAL[brand])
        .pipe(agg_results_by_update_frequency)
        .groupby('vin', observed=True)
        .apply(make_soh_presentable_per_vehicle, include_groups=False)
        .reset_index(level=0)
        #.pipe(filter_results_by_lines_bounds, VALID_SOH_POINTS_LINE_BOUNDS, logger=logger)
        .sort_values(["vin", "date"])
    )
    results["soh"] = results.groupby("vin", observed=True)["soh"].ffill()
    results["soh"] = results.groupby("vin", observed=True)["soh"].bfill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].ffill()
    results["odometer"] = results.groupby("vin", observed=True)["odometer"].bfill()
    return results

In [None]:
processed_results_30 = get_processed_results('tesla-fleet-telemetry-30')
processed_results_25 = get_processed_results('tesla-fleet-telemetry-25')
processed_results_20 = get_processed_results('tesla-fleet-telemetry-20')
processed_results_15 = get_processed_results('tesla-fleet-telemetry-15')
processed_results_8 = get_processed_results('tesla-fleet-telemetry-8')

In [None]:
px.scatter(raw_results[(raw_results['soh'] >.7) &(raw_results['soh'] < 1.05)].dropna(subset='soh'), x='odometer', y='soh', color='vin')

In [None]:
px.scatter(processed_results_30[(processed_results_30['soh'] > .75) &(processed_results_30['soh'] < 1.05)].dropna(subset='soh'), x='odometer', y='soh', color='vin')