# Infer results for vehicle with missing data
We need to make a BIB report for a tesla that we don’t API Bearer for.   
The last data that we have from it dates back to the 27th of November 2024.  
AND we don’t have the tesla_code for this vehicle.  
The vin of the vehicle is `5YJ3E7EB7KF474436`.

Ultimately, we need to estimate the current SoH, odometer, and charge levels ratio.  

## Setup

### Imports

In [None]:
import plotly.express as px

from core.pandas_utils import *
from core.stats_utils import lr_params_as_series
from core.caching_utils import cache_result
from transform.raw_results.config import LEVEL_1_MAX_POWER, LEVEL_2_MAX_POWER
from transform.processed_results.config import UPDATE_FREQUENCY
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries

### Data extraction

In [None]:
USE_COLS = [
    "vin",
    "trimmed_in_charge_idx",
    "trimmed_in_charge",
    "charge_energy_added",
    "soc",
    "inside_temp",
    "capacity",
    "odometer",
    "model",
    "date",
    "tesla_code",
    "battery_heater",
    "charging_power",
    "version",
]

@cache_result("data_cache/raw_tesla_results.parquet", "local_storage")
def get_results() -> DF:
    results = (
        TeslaProcessedTimeSeries("tesla", use_cols=USE_COLS)
        .query("trimmed_in_charge")
        .groupby(["vin", "trimmed_in_charge_idx"])
        .agg(
            energy_added_min=pd.NamedAgg("charge_energy_added", "min"),
            energy_added_end=pd.NamedAgg("charge_energy_added", "last"),
            #soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            soc_min=pd.NamedAgg("soc", "min"),
            soc_end=pd.NamedAgg("soc", "last"),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            version=pd.NamedAgg("version", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            date=pd.NamedAgg("date", "first"),
            charging_power=pd.NamedAgg("charging_power", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
        )
        .reset_index(drop=False)
        .eval("soc_diff = soc_end - soc_min")
        .eval("energy_added = energy_added_end - energy_added_min")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        #.query("soc_diff > 40 & soh.between(0.75, 1.05)")
        .eval("bottom_soh = soh.between(0.75, 0.9)")
        .eval("fixed_soh_min_end = soh.mask(tesla_code == 'MTY13', soh / 0.96)")
        .eval("fixed_soh_min_end = fixed_soh_min_end.mask(bottom_soh & tesla_code == 'MTY13', fixed_soh_min_end + 0.08)")
        #.eval("soh = fixed_soh_min_end")
        .sort_values(["tesla_code", "vin", "date"])
    )

    return results


In [None]:
results = get_results()

In [None]:
results["target_vin"] = results["vin"] == "5YJ3E7EB7KF474436"

In [None]:
results.query("vin == '5YJ3E7EB7KF474436'")

## Infering results

### Infering the odometer

In [None]:
px.scatter(
    results.query("target_vin"),
    x="date",
    y="odometer",
    trendline="ols",
)

In [None]:
TARGET_DATE = Series([pd.Timestamp.now()]).dt.as_unit('s').astype("int")[0]
TARGET_DATE

In [None]:
slope, intercept, _, _, _, _ = lr_params_as_series(results.query("target_vin").eval("int_date = date.dt.as_unit('s').astype('int')"), "int_date", "odometer")
slope, intercept

In [None]:
ESTIMATED_ODOMETER = TARGET_DATE * 6.408623e-04 - 1.017035e+06
ESTIMATED_ODOMETER

## Infering the tesla_code
We need to find the telsa code to infer the capacity to infer the SoH.  
According to a few vin decoders, this is an all wheel drive 2019 model 3.  
This only leaves the MT310 tesla code as option which has a 75 kWh capacity.  

In [None]:
results = (
    results
    .eval("capacity = capacity.mask(target_vin, 75.0)")
    .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
)

In [None]:
px.scatter(
    (
        results
        .query("target_vin")
        .eval("capacity = 75.0")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .dropna(subset=["soh", "odometer"])
    ),
    x="odometer",
    y="soh",
    trendline="ols",
)

We can see that the SoH values are suprisingly high for a range 90 - 83.5 thousands kilometers.  
Let's take a look a the description of the charges to see why that is.

In [None]:
results.query("target_vin").describe()

In [None]:
# Computing the median SoH
(
    results
    .query("target_vin")
    .eval("capacity = capacity.mask(target_vin, 75.0)")
    .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
    ['soh']
    .median()
)

### Comparing SoH to the rest of the MT310s

In [None]:
px.scatter(
    (
        results
        .query("tesla_code == 'MT310' | target_vin")
        #.query("target_vin")
        .eval("capacity = capacity.mask(target_vin, 75.0)")
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .dropna(subset=["soh", "odometer"])
    ),
    x="odometer",
    y="soh",
    trendline="ols",
    color="target_vin",
    opacity=0.4
)

### Computing raw results

In [None]:
UPDATE_FREQUENCY = pd.Timedelta(days=7)

def agg_results_by_update_frequency(results:DF) -> DF:
    results["date"] = (
        pd.to_datetime(results["date"], format='mixed')
        .dt.floor(UPDATE_FREQUENCY)
        .dt.tz_localize(None)
        .dt.date
        .astype('datetime64[ns]')
    )
    return (
        results
        # Setting level columns to 0 if they don't exist.
        .assign(
            level_1=results.get("level_1", 0),
            level_2=results.get("level_2", 0),
            level_3=results.get("level_3", 0),
        )
        .groupby(["vin", "date"])
        .agg(
            odometer=pd.NamedAgg("odometer", "last"),
            soh=pd.NamedAgg("soh", "median"),
            model=pd.NamedAgg("model", "first"),
            version=pd.NamedAgg("version", "first"),
            level_1=pd.NamedAgg("level_1", "sum"),
            level_2=pd.NamedAgg("level_2", "sum"),
            level_3=pd.NamedAgg("level_3", "sum"),
        )
        .reset_index()
    )

results:DF = (
    results
    .eval("level_1 = soc_diff * (charging_power < @LEVEL_1_MAX_POWER) / 100")
    .eval("level_2 = soc_diff * (charging_power.between(@LEVEL_1_MAX_POWER, @LEVEL_2_MAX_POWER)) / 100")
    .eval("level_3 = soc_diff * (charging_power > @LEVEL_2_MAX_POWER) / 100")
)

agg_target_results:DF = (
    results
    .query("target_vin")
    .eval("soh = soh.where(soh.between(0.9, 1.05))")
    .pipe(agg_results_by_update_frequency)
)
agg_target_results

In [None]:
agg_target_results[["level_1", "level_2", "level_3"]].sum()

## Conclusion
estimated odometer: 97,3 km   
SoH: 99%   
charge_level_1_percentage: 0%   
charge_level_1_percentage: 100%   
charge_level_1_percentage: 0%   