# Tesla specific time series processing
The goal of this notebook is to demonstrate the implementation of time series processing steps that are specific to Tesla.

## Setup

### Imports

In [None]:
import plotly.express as px

from pandas.api.types import CategoricalDtype

from core.pandas_utils import *
from transform.processed_tss.config import IN_CHARGE_CHARGING_STATUS_VALS, IN_DISCHARGE_CHARGING_STATUS_VALS
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
from transform.raw_results.tesla_results import get_results

### Data extraction

In [None]:
! mkdir -p data_cache

In [None]:
tss = pd.read_parquet("data_cache/tesla_sub_tss.parquet") #TeslaProcessedTimeSeries()

In [None]:
tss = tss.astype({
    "vin": CategoricalDtype(),
    "charging_status": CategoricalDtype(),
})

## Segmentation and indexing

In [None]:
def compute_charge_n_discharge_masks(tss:DF) -> DF:
    charging = (
        Series(pd.NA, index=tss.index, dtype="boolean")
        .mask(tss["charging_status"].isin(IN_CHARGE_CHARGING_STATUS_VALS), True)
        .mask(tss["charging_status"].isin(IN_DISCHARGE_CHARGING_STATUS_VALS), False)
    )
    ffill_base = charging.groupby(tss["vin"], observed=True).ffill()
    bfill_base = charging.groupby(tss["vin"], observed=True).bfill()
    charging = charging.mask(ffill_base.eq(bfill_base), ffill_base)
    charging = charging.mask(tss["soc"] >= 98)
    tss["in_charge"] = charging.notna() & charging
    tss["in_discharge"] = charging.notna() & ~charging
    return tss

def compute_charge_idx(tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=False)
    tss["charge_energy_added"] = tss_grp["charge_energy_added"].ffill()
    power_loss = tss_grp['charge_energy_added'].diff().div(tss["sec_time_diff"].values)
    tss["power_loss"] = "power_loss"
    min_power_loss = 0.0001
    new_charge_mask = tss["in_charge"] & (power_loss.lt(min_power_loss, fill_value=0) | tss["time_diff"].gt(TD(days=1)))
    tss["in_charge_idx"] = new_charge_mask.groupby(tss["vin"], observed=True).cumsum()
    return tss

def compute_status_col(tss:DF) -> DF:
    tss_grp = tss.groupby("vin", observed=False)
    status = tss["in_charge"].map({True: "charging", False:"discharging", pd.NA:"unknown"})
    tss["status"] = status.mask(
        tss["in_charge"].eq(False, fill_value=True),
        np.where(tss_grp["odometer"].diff() > 0, "moving", "idle_discharging"),
    )
    return tss

def trim_leading_n_trailing_soc_off_masks(tss:DF, masks:list[str]) -> DF:
    for mask in masks:
        tss["naned_soc"] = tss["soc"].where(tss[mask])
        soc_grp = tss.groupby(["vin", mask + "_idx"], observed=True)["naned_soc"]
        trailing_soc = soc_grp.transform("first")
        leading_soc = soc_grp.transform("last")
        tss["trailing_soc"] = trailing_soc
        tss["leading_soc"] = leading_soc
        tss[f"trimmed_{mask}"] = tss[mask] & (tss["soc"] != trailing_soc) & (tss["soc"] != leading_soc)
    tss = tss.drop(columns="naned_soc")
    return tss

In [None]:
tss = (
    tss
    .sort_values(["vin", "date"])
    .pipe(compute_charge_n_discharge_masks)
    .pipe(compute_charge_idx)
    .pipe(compute_status_col)
    .pipe(trim_leading_n_trailing_soc_off_masks, ["in_charge"])
)

Visualization of vin that had the most amount of charges in previous implementation.

In [None]:
TARGET_VIN = "LRW3E7FA4MC314534"
px.scatter(
    tss.query("vin == @TARGET_VIN"),
    x="date",
    y="soc",
    color="status",
    hover_data=["odometer"]
)

Thibault's vehicle.

In [None]:
THIBAULT_VIN = "5YJ3E7EB7KF474436"
px.scatter(
    tss.query("vin == @THIBAULT_VIN"),
    x="date",
    y="soc",
    color="status",
    symbol="charging_status",
    hover_data=["odometer", "charging_status"]
).update_layout(showlegend=True)

In [None]:
px.scatter(
    (
        tss
        .query("vin == '5YJ3E7EA6LF558840'")
        .eval("in_charge_str = in_charge.astype('string').fillna('unknown')")
        .eval("trimmed_in_charge_str = trimmed_in_charge.astype('string').fillna('unknown')")
        .eval("in_charge_idx = in_charge_idx.fillna(-1)")
        .melt(['odometer', "charging_status", "date", "in_charge_str", "in_charge_idx", "trimmed_in_charge_str", "leading_soc", "trailing_soc", "status"], ["soc", "power_loss"])
    ),
    x='date',
    y='value',
    facet_row="variable",
    color="status",
    symbol="in_charge_idx",
    color_continuous_scale="Rainbow",
    hover_data=["charging_status", "leading_soc", "trailing_soc", "in_charge_idx"],
).update_yaxes(matches=None)

Visualization of masking with 4 random vins.

In [None]:
vin_samples = tss["vin"].pipe(uniques_as_series).sample(n=4)
px.scatter(
    tss.query("vin in @vin_samples"),
    x="date",
    y="soc",
    color="status",
    symbol="in_charge_idx",
    facet_row="vin",
    hover_data=["odometer", "charging_status"],
    height=750,
).update_yaxes(matches=None)

Usually, very long charging periods are caused by some edge case scenario that prevents our pipeline to correctly segment/separate multiple periods.

In [None]:
charge_lengths = (
    tss
    .query("status == 'charging'")
    .groupby(["vin", "in_charge_idx"], observed=True)
    .agg(start_date=pd.NamedAgg("date", "first"), end_date=pd.NamedAgg("date", "last"))
    .eval("duration = end_date - start_date")
    .sort_values(by="duration")
)

In [None]:
charge_lengths

In [None]:
LONG_CHARGES_VINS = charge_lengths.index.get_level_values(0)[-7:]
px.scatter(
    tss.query("vin in @LONG_CHARGES_VINS"),
    x="date",
    y="soc",
    color="status",
    symbol="in_charge_idx",
    facet_row="vin",
    hover_data=["odometer"],
    height=750,
).update_yaxes(matches=None)

Same goes for vehicles that have a lot of charges, sometimes this is due to an error on our end.

In [None]:
charge_counts = tss.query("in_charge").groupby("vin", observed=True)["in_charge_idx"].nunique().sort_values()
charge_counts