# Ituran preliminary EDA
The goal of this notebook is to examine the data provided by Ituran.  
We want to know what columns we will need for the POC and their respective quality (frequence, error margin, ...).  

## Setup

### Import

In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.time_series_processing import compute_cum_energy, compute_charging_n_discharging

### Data Extraction

In [None]:
raw_tss = pd.read_csv("./data_cache/ituran_response.csv", parse_dates=["signal_time"])
raw_tss

In [None]:
raw_tss["signal_name"].value_counts(sort=True, ascending=False, dropna=False)

In [None]:
raw_tss["dataran_id"].value_counts(sort=True, ascending=False)

In [None]:
test_vins = raw_tss["dataran_id"].value_counts(sort=True, ascending=False).index[:4].astype("string")
test_raw_tss = raw_tss.astype({"dataran_id": "string"}).query("dataran_id in @test_vins")
test_raw_tss

The data we are intrested in residdes in the columns `signal_name` and `LocTime_utc`.  
We will first perform a split to obtain the variables names and values into two corresponding columns.  
Then, we will pivot those columns to get the dat into a usefull format.  

In [None]:
INDEX_COLS = [
    'dataran_id',
    'signal_time',
]

COLUMNS_NAMES_MAP = {
    "Electric Data - Battery Status Of Charge - 2334": "soc",
    "Electric Data - Charging AC Mode - 2227": "charging_ac_mode",
    "Electric Data - Charging Current - 232": "charging_current",
    "Electric Data - Charging DC Mode - 9629": "charging_dc_mode",
    "Electric Data - Charging Voltage - 7C": "charging_voltage",
    "Electric Data - Ready Switch Open - 2015": "switch_open",
    "Electric Data - Time Remaining for Charge - 2291": "time_remaining_for_charge",
    "Electric Data - Vehicle Range Of Battery - 2229": "estimated_range",
    "signal_time": "date",
    "dataran_id": "vehicle_id",
    "vehicle_make": "make",
}

DTYPES = {
    "date": "datetime64[ns]",
    "vehicle_id": "string",
    #"make": "string",
    "soc": "float",
    "charging_ac_mode": "bool",
    "charging_current": "float",
    "charging_dc_mode": "bool",
    "charging_voltage": "float",
    "switch_open": "bool",
    "time_remaining_for_charge": "int",
    "estimated_range": "float",
}

In [None]:
tss.dtypes

In [None]:
def compute_charge_n_discharge(tss:DF) -> DF:
    tss["soc_ffilled"] = tss.groupby("vehicle_id")["soc"].ffill()
    tss["soc_diff"] = tss.groupby("vehicle_id")["soc_ffilled"].diff()
    tss["soc_diff"] /= tss["soc_diff"].abs()
    tss["soc_diff_rolled_mean"] = (
        tss
        .groupby("vehicle_id")
        .rolling(window=pd.Timedelta(minutes=20), on="date")
        ["soc_diff"]
        .mean()
        .reset_index(drop=True)
    )
    soc_diff_ffilled = tss.groupby("vehicle_id")["soc_diff_rolled_mean"].ffill()
    soc_diff_bfilled = tss.groupby("vehicle_id")["soc_diff_rolled_mean"].bfill()
    tss["in_charge"] = soc_diff_ffilled.gt(0, fill_value=False) & soc_diff_bfilled.gt(0, fill_value=False)
    tss["in_discharge"] = soc_diff_ffilled.lt(0, fill_value=False) & soc_diff_bfilled.lt(0, fill_value=False)
    tss["in_charge_idx"] = tss.groupby("vehicle_id")["in_charge"].cumsum()
    #tss["in_charge_perf"] = 
    #tss["in_discharge_idx"] = tss.groupby("vehicle_id")["in_discharge"].cumsum()
    return tss

def compute_current_vars(tss:DF) -> DF:
    tss["charging_power"] = tss.eval("charging_current * charging_voltage").mask(~tss["in_charge"], pd.NA)
    tss["power"] = tss.eval("charging_current * charging_voltage").mask(tss["in_charge"], pd.NA)
    tss = compute_cum_energy(tss, power_col="charging_power", cum_energy_col="cum_energy_added")
    tss = compute_cum_energy(tss, power_col="power", cum_energy_col="cum_energy_spent")
    tss["cum_energy_added"] -= tss.groupby("vehicle_id")["cum_energy_added"].transform("first")
    tss["cum_energy_spent"] -= tss.groupby("vehicle_id")["cum_energy_spent"].transform("first")
    return tss

tss:DF = (
    test_raw_tss
    .pivot_table(
        values="signal_value",
        index=['dataran_id', 'signal_time', 'vehicle_make', 'vehicle_model'],
        columns="signal_name",
        #dropna=False,
    )
    .reset_index()
    .rename(columns=COLUMNS_NAMES_MAP, errors="ignore")
    .sort_values(by=["vehicle_id", "date"], ascending=True)
    .astype(DTYPES, errors="ignore")
    .pipe(compute_charge_n_discharge)
    .pipe(compute_current_vars)
)
tss

In [None]:
sanity_check(tss)

## EDA

### Data sparcity

In [None]:
COLS_TO_PLOT = [
    "cum_energy_added",
    "cum_energy_spent",
    #"charging_ac_mode",
    #"charging_current",
    #"charging_dc_mode",
    #"charging_voltage",
    #"time_remaining_for_charge",
]
for col in COLS_TO_PLOT:
    px.line(
        (
            tss
            .dropna(subset=["date", col], how="any")
            .set_index("vehicle_id", drop=False)
        ),
        facet_row="vehicle_id",
        x="date",
        y=col,
        color="vehicle_id"
    ).update_layout(height=1000).show()

In [None]:
raw_tss.dtypes

In [None]:
px.scatter(
    tss.query("vehicle_id == '666112423'").dropna(subset=["date", "soc"]),
    x="date",
    y="soc",
    color="in_charge",
    hover_data="soc_diff_rolled_mean"
)

In [None]:
px.scatter(
    tss.query("vehicle_id == '666112423'").dropna(subset=["date", "power"]),
    x="date",
    y="power",
)

In [None]:
tss[["charging_voltage", "charging_current"]].describe()

In [None]:
px.scatter(
    tss.query("vehicle_id == '666112423'").dropna(subset=["date", "charging_power"]),
    x="date",
    y="charging_power",
)

In [None]:
px.line(
    tss.query("vehicle_id == '666112423'").dropna(subset=["date", "soc"]),
    x="date",
    y="cum_energy_added",
)

In [None]:
tss.groupby("vehicle_model")["vehicle_id"].nunique()

## Conclusion
We can see that while the date range in the time series is 6 month, ther are only 2 days wotrth of data.  
Given the variables at hand, we *could* implement an soh estimation similar to the one we used in watea.  
For that we would need more data and ideally the temperature.  
If we don't have the temperature we would need to check how the models handle heat differentials? (do they use a heater compensate low temps? Is the battery simply not affected by the temperature?)