# Tesla specific time series processing
The goal of this notebook is to demonstrate the implementation of time series processing steps that are specific to Tesla.

## Setup

### Imports

In [None]:
import plotly.express as px

from pandas.api.types import CategoricalDtype

from core.pandas_utils import *
from transform.processed_tss.config import IN_CHARGE_CHARGING_STATUS_VALS, IN_DISCHARGE_CHARGING_STATUS_VALS
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
from transform.raw_results.tesla_results import get_results

### Data extraction

In [None]:
! mkdir -p data_cache

In [None]:
tss = TeslaProcessedTimeSeries() # To get all time series 
#tss = pd.read_parquet("data_cache/tesla_sub_tss.parquet") # To get a subset... and prevent my laptop from crashing

In [None]:
tss = tss.astype({
    "vin": CategoricalDtype(),
    "charging_status": CategoricalDtype(),
})

## Segmentation and indexing

In [None]:
tss["charging_status"].value_counts(normalize=True, dropna=False)

In [None]:
def compute_charge_n_discharge_masks(tss:DF) -> DF:
    charging = (
        Series(pd.NA, index=tss.index, dtype="boolean")
        .mask(tss["charging_status"].isin(IN_CHARGE_CHARGING_STATUS_VALS), True)
        .mask(tss["charging_status"].isin(IN_DISCHARGE_CHARGING_STATUS_VALS), False)
    )
    ffill_base = charging.groupby(tss["vin"], observed=True).ffill()
    bfill_base = charging.groupby(tss["vin"], observed=True).bfill()
    charging = charging.mask(ffill_base.eq(bfill_base), ffill_base)
    charging = charging.mask(tss["soc"] >= (tss["charge_limit_soc"] - 3))
    tss["in_charge"] = charging.notna() & charging
    tss["in_discharge"] = charging.notna() & ~charging
    return tss

MIN_POWER_LOSS = -0.001
MAX_CHARGE_TD = TD(days=1)

def compute_charge_idx(tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=False)
    tss["charge_energy_added"] = tss_grp["charge_energy_added"].ffill()
    tss["power_loss"] = tss_grp['charge_energy_added'].diff().div(tss["sec_time_diff"].values)
    new_charge_mask = tss["power_loss"].lt(MIN_POWER_LOSS, fill_value=0) 
    new_charge_mask |= tss["time_diff"].gt(MAX_CHARGE_TD) 
    new_charge_mask |= (~tss_grp["in_charge"].shift() & tss["in_charge"])
    tss["in_charge_idx"] = new_charge_mask.groupby(tss["vin"], observed=True).cumsum()
    return tss

def compute_status_col(tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=False)
    status = tss["in_charge"].map({True: "charging", False:"discharging", pd.NA:"unknown"})
    tss["status"] = status.mask(
        tss["in_charge"].eq(False, fill_value=True),
        np.where(tss_grp["odometer"].diff() > 0, "moving", "idle_discharging"),
    )
    return tss

def trim_leading_n_trailing_soc_off_masks(tss:DF, masks:list[str]) -> DF:
    for mask in masks:
        tss["naned_soc"] = tss["soc"].where(tss[mask])
        soc_grp = tss.groupby(["vin", mask + "_idx"], observed=True)["naned_soc"]
        trailing_soc = soc_grp.transform("first")
        leading_soc = soc_grp.transform("last")
        tss["trailing_soc"] = trailing_soc
        tss["leading_soc"] = leading_soc
        tss[f"trimmed_{mask}"] = tss[mask] & (tss["soc"] != trailing_soc) & (tss["soc"] != leading_soc)
    tss = tss.drop(columns="naned_soc")
    return tss

In [None]:
tss = (
    tss
    .pipe(compute_charge_n_discharge_masks)
    .pipe(compute_charge_idx)
    .pipe(compute_status_col)
    .pipe(trim_leading_n_trailing_soc_off_masks, ["in_charge"])
)

Thibault's vehicle example of vehiclee that is not always charging up to 100%.

In [None]:
px.scatter(
    tss.query("vin == 'LRW3E7FA5LC098336'").eval("charging_status_str = charging_status.astype('string').fillna('unknown')"),
    x="date",
    y="soc",
    color="charging_status_str",
    # symbol="",
    hover_data=["odometer", "charging_status_str"]
).update_layout(showlegend=True)

Usually, very long charging periods are caused by some edge case scenario that prevents our pipeline to correctly segment/separate multiple periods.

In [None]:
charge_lengths = (
    tss
    .query("status == 'charging'")
    .groupby(["vin", "in_charge_idx"], observed=True)
    .agg(
        start_date=pd.NamedAgg("date", "first"),
        end_date=pd.NamedAgg("date", "last")
    )
    .eval("duration = end_date - start_date")
    .sort_values(by="duration")
)
charge_lengths

In [None]:
charge_lengths[charge_lengths["duration"].between(TD(hours=15), TD(hours=20))]

In [None]:
px.histogram(
    charge_lengths.eval("hours_duration = duration.dt.total_seconds().div(3600)"),
    x="hours_duration",
    log_y=True,
)

In [None]:
px.scatter(
    (
        tss
        .query("vin == 'XP7YGCEKXPB198770'")
        .eval("charging_status = charging_status.astype('string').fillna('unknown')")
        .eval("status = status.astype('string').fillna('unknown')")
        .melt(["date", "status", "in_charge_idx", "charging_status", "in_charge"], ["soc", "charge_limit_soc", "charge_energy_added", "power_loss"])
    ),
    x="date",
    y="value",
    facet_row="variable",
    color="in_charge",
    # symbol="in_charge_idx",
    hover_data=["in_charge_idx", "charging_status"],
    height=650,
).update_yaxes(matches=None)

Same goes for vehicles that have a lot of charges, sometimes this is due to an error on our end.

In [None]:
charge_counts = tss.query("in_charge").groupby("vin", observed=True)["in_charge_idx"].nunique().sort_values()
charge_counts

## Charging up to 60 and oscilating

vin: `5YJ3E7EB1KF334219`

In [None]:
px.scatter(
    (
        tss
        .query("vin == 'XP7YGCEK5PB114421'")
        .eval("charging_status = charging_status.astype('string').fillna('unknown')")
        .eval("status = status.astype('string').fillna('unknown')")
        .melt(["date", "status", "in_charge_idx", "charging_status"], ["soc", "power_loss"])
    ),
    x="date",
    y="value",
    facet_row="variable",
    color="status",
    symbol="in_charge_idx",
    hover_data=["in_charge_idx", "charging_status"],
    height=650,
).update_yaxes(matches=None)