## Setup

### Import

In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.plt_utils import plt_3d_df
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries

### Data Extraction

In [None]:
raw_tss = (
    pd.read_csv(
        "./data_cache/ituran_response.csv",
        parse_dates=["signal_time", "year_of_manufacture"],
        usecols = [
            "dataran_id",
            "signal_time",
            "vehicle_make",
            "vehicle_model",
            "signal_name",
            "signal_value",
            "year_of_manufacture",
        ],
        dtype={
            "dataran_id": "string",
            "vehicle_make": "string",
            "vehicle_model": "string",
            "signal_name": "string",
        }
    )
)
raw_tss

In [None]:
raw_tss["signal_name"].value_counts(sort=True, ascending=False, dropna=False)

In [None]:
raw_tss["dataran_id"].value_counts(sort=True, ascending=False)

### Preprocessing

In [None]:
INDEX_COLS = [
    "year_of_manufacture",
    'vehicle_make',
    'vehicle_model',
    'dataran_id',
    'signal_time',
]

COLUMNS_NAMES_MAP = {
    "Electric Data - Battery Status Of Charge - 2334": "soc",
    "Electric Data - Charging AC Mode - 2227": "charging_ac_mode",
    "Electric Data - Charging Current - 232": "charging_current",
    "Electric Data - Charging DC Mode - 9629": "charging_dc_mode",
    "Electric Data - Charging Voltage - 7C": "charging_voltage",
    "Electric Data - Ready Switch Open - 2015": "switch_open",
    "Electric Data - Time Remaining for Charge - 2291": "time_remaining_for_charge",
    "Electric Data - Vehicle Range Of Battery - 2229": "estimated_range",
    "signal_time": "date",
    "dataran_id": "vehicle_id",
    "vehicle_make": "make",
}

DTYPES = {
    "date": "datetime64[ns]",
    "vehicle_id": "string",
    "switch_open": "bool",
    "charging_ac_mode": "bool",
    "charging_dc_mode": "bool",
    "time_remaining_for_charge": "int",
    "soc": "float32",
    "charging_current": "float32",
    "charging_voltage": "float32",
    "estimated_range": "float32",
}

In [None]:
class HighFreqProcecssedTimeSeries(ProcessedTimeSeries):

    def run(self) -> DF:
        return (
            raw_tss
            .drop_duplicates(INDEX_COLS + ["signal_name"], keep="first")
            .pivot(index=INDEX_COLS, columns="signal_name", values="signal_value")
            .reset_index()
            .rename(columns=COLUMNS_NAMES_MAP, errors="ignore")
            .astype(DTYPES, errors="ignore")
            .sort_values(by=[self.id_col, "date"], ascending=True)
            .pipe(self.compute_date_vars)
            .pipe(self.compute_charge_n_discharge_masks)
            .pipe(self.compute_current_vars)
            .pipe(self.compute_idx_from_masks, masks=["in_charge", "in_discharge"])
            .pipe(self.trim_leading_n_trailing_soc_off_masks, masks=["in_charge", "in_discharge"])
            .pipe(self.compute_idx_from_masks, masks=["trimmed_in_charge", "trimmed_in_discharge"])
            .pipe(self.ffill_vars, vars=["estimated_range", "charging_voltage", "charging_current", "time_remaining_for_charge","soc"])
        )

    def compute_charge_n_discharge_masks(self, tss:DF) -> DF:
        tss_grp = tss.groupby(self.id_col)
        tss["soc_ffilled"] = tss_grp["soc"].ffill()
        tss["soc_diff"] = tss_grp["soc_ffilled"].diff()
        tss["soc_diff"] /= tss["soc_diff"].abs()
        tss["soc_diff_rolled_mean"] = (
            tss
            .groupby(self.id_col)
            .rolling(window=pd.Timedelta(minutes=20), on="date")
            ["soc_diff"]
            .mean()
            .reset_index(drop=True)
        )
        soc_diff_ffilled = tss_grp["soc_diff_rolled_mean"].ffill()
        soc_diff_bfilled = tss_grp["soc_diff_rolled_mean"].bfill()
        tss["in_charge"] = soc_diff_ffilled.gt(0, fill_value=False) & soc_diff_bfilled.gt(0, fill_value=False)
        tss["in_discharge"] = soc_diff_ffilled.lt(0, fill_value=False) & soc_diff_bfilled.lt(0, fill_value=False)
        return tss

    def compute_current_vars(self, tss:DF) -> DF:
        tss["power"] = tss.eval("charging_current * charging_voltage")
        tss["charging_power"] = tss["power"].mask(~tss["in_charge"], pd.NA)
        tss["power"] = tss["power"].mask(tss["in_charge"], pd.NA)
        tss = self.compute_cum_var(tss, var_col="charging_power", cum_var_col="cum_energy_added")
        tss = self.compute_cum_var(tss, var_col="power", cum_var_col="cum_energy_spent")
        return tss

    def ffill_vars(self, tss:DF, vars:list[str]) -> DF:
        tss_grp = tss.groupby(self.id_col)
        self.logger.info(f"ffilling vars")
        for var in vars:
            tss[f"ffilled_{var}"] = tss_grp[var].ffill()
        return tss

In [None]:
tss = HighFreqProcecssedTimeSeries(make="ituran", id_col="vehicle_id", force_update=True, log_level="ERROR")
sanity_check(tss)
display(tss.memory_usage(deep=True).div(1024**2).sum())

## EDA

### Charging points SoH

In [None]:
def compute_first_charge_soc(tss:DF) -> DF:
    tss["first_charge_soc"] = (
        tss
        .groupby(["vehicle_id", "trimmed_in_charge_idx"])
        ["soc"]
        .transform("first")
    )
    return tss

charging_points:DF = (
    tss
    .pipe(compute_first_charge_soc)
    .query("trimmed_in_charge")
    .groupby(["vehicle_id", "trimmed_in_charge_idx", "soc"])
    .agg(
        energy_added_at_start=pd.NamedAgg(column="cum_energy_added", aggfunc="first"),
        energy_added_at_end=pd.NamedAgg(column="cum_energy_added", aggfunc="last"),
        energy_added=pd.NamedAgg(column="cum_energy_added", aggfunc=series_start_end_diff),
        ac_mode_mean=pd.NamedAgg(column="charging_ac_mode", aggfunc="mean"),
        dc_mode_mean=pd.NamedAgg(column="charging_dc_mode", aggfunc="mean"),
        current=pd.NamedAgg(column="ffilled_charging_current", aggfunc="median"),
        voltage=pd.NamedAgg(column="ffilled_charging_voltage", aggfunc="median"),
        estimated_range=pd.NamedAgg(column="ffilled_estimated_range", aggfunc="median"),
        time_remaining_for_charge=pd.NamedAgg(column="ffilled_time_remaining_for_charge", aggfunc="median"),
        model=pd.NamedAgg(column="vehicle_model", aggfunc="first"),
        first_charge_soc=pd.NamedAgg(column="first_charge_soc", aggfunc="first"),
    )
    .reset_index()
    .eval("energy_added=energy_added_at_end - energy_added_at_start")
    .eval("soc_added = soc - first_charge_soc")
    .eval("power = current * voltage")
    .eval("in_ac = ac_mode_mean > 0.3")
    .eval("in_dc = dc_mode_mean > 0.3")
    .eval("power = current * voltage")
)

charging_points

In [None]:
(
    charging_points
    .corr(numeric_only=True)
    .sort_values(by="energy_added", ascending=False)
    .loc[:, "energy_added"]
)

In [None]:
plt_3d_df(
    charging_points.query("voltage > 300").dropna(subset=["voltage", "current", "energy_added"]),
    x='voltage',
    y="current",
    z="energy_added",
    color="ac_mode_mean",
    opacity=0.25,
    size=3,
    width=1500,
    height=1000,
    log_z=True,
)

### Per charge SoH

In [None]:
charges = (
    tss
    .eval("age = date - year_of_manufacture")
    .query("trimmed_in_charge")
    .groupby(["vehicle_id", "trimmed_in_charge_idx"])
    .agg(
        energy_added_at_start=pd.NamedAgg(column="cum_energy_added", aggfunc="first"),
        energy_added_at_end=pd.NamedAgg(column="cum_energy_added", aggfunc="last"),
        energy_added=pd.NamedAgg(column="cum_energy_added", aggfunc=series_start_end_diff),
        ac_mode_mean=pd.NamedAgg(column="charging_ac_mode", aggfunc="mean"),
        dc_mode_mean=pd.NamedAgg(column="charging_dc_mode", aggfunc="mean"),
        current=pd.NamedAgg(column="ffilled_charging_current", aggfunc="median"),
        voltage=pd.NamedAgg(column="ffilled_charging_voltage", aggfunc="median"),
        estimated_range=pd.NamedAgg(column="ffilled_estimated_range", aggfunc="median"),
        time_remaining_for_charge=pd.NamedAgg(column="ffilled_time_remaining_for_charge", aggfunc="median"),
        model=pd.NamedAgg(column="vehicle_model", aggfunc="first"),
        age=pd.NamedAgg(column="age", aggfunc="first"),
    )
    .reset_index()
    .eval("age_in_years = age.dt.days // 365")
)
charges

In [None]:
charges.dtypes

In [None]:
tss.groupby("vehicle_model")[["charging_voltage", "charging_current"]].describe()

In [None]:
tss.groupby("vehicle_model")["vehicle_id"].nunique()

In [None]:
charges.query("energy_added > 0").groupby("age_in_years")["energy_added"].describe()

In [None]:
px.box(
    charges.query("energy_added > 0"),
    x="age_in_years",
    y="energy_added",
    color="model",
    width=1500,
    height=1000,
)