# Tesla fleet info bug check

There seems to be discrepancies between the fleet info and the tss data.
This notebook aims to keep track of the discrepancies.

In [None]:
! mkdir -p data_cache

In [None]:
import plotly.express as px

from core.sql_utils import *
from core.pandas_utils import *
from core.plt_utils import *
from core.caching_utils import cache_result
from transform.fleet_info.main import fleet_info
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries

In [None]:
@cache_result("data_cache/tesla_tss.parquet", on='local_storage')
def get_tss() -> DF:
    return ProcessedTimeSeries("tesla")

@cache_result("data_cache/tesla_charges.parquet", on='local_storage')
def get_charges() -> DF:
    return get_tss().pipe(compute_charges)

def compute_charges(tss:DF) -> DF:
    return (
        tss
        .drop(columns=fleet_info.drop(columns=["vin"]).columns)
        .merge(fleet_info, on="vin", how="left")
        .query("trimmed_in_charge")
        .groupby(["vin", "trimmed_in_charge_idx"])
        .agg(
            energy_added=pd.NamedAgg("charge_energy_added", series_start_end_diff),
            soc_diff=pd.NamedAgg("soc", series_start_end_diff),
            soc_start=pd.NamedAgg("soc", "first"),
            soc_end=pd.NamedAgg("soc", "last"),
            inside_temp=pd.NamedAgg("inside_temp", "mean"),
            outside_temp=pd.NamedAgg("outside_temp", "mean"),
            capacity=pd.NamedAgg("capacity", "first"),
            odometer=pd.NamedAgg("odometer", "first"),
            fast_charger_type=pd.NamedAgg("fast_charger_type", "first"),
            size=pd.NamedAgg("soc", "size"),
            model=pd.NamedAgg("model", "first"),
            version=pd.NamedAgg("version", "first"),
            date=pd.NamedAgg("date", "first"),
            charge_rate=pd.NamedAgg("charge_rate", "median"),
            fast_charger_present=pd.NamedAgg("fast_charger_present", "median"),
            charge_current_request=pd.NamedAgg("charge_current_request", "median"),
            tesla_code=pd.NamedAgg("tesla_code", "first"),
            battery_heater=pd.NamedAgg("battery_heater", "median"),
        )
        .reset_index(drop=False)
        .eval("soh = energy_added / (soc_diff / 100.0 * capacity)")
        .eval("model_version = model + version")
    )

In [None]:
charges = get_charges() # Using charges as it is way smaller than tss
tesla_fleet_info_vins = fleet_info.query("make == 'tesla' & vin.notna()")["vin"]

all_vins_in_charges = charges["vin"].drop_duplicates()
vins_in_fleet_info_and_charges = tesla_fleet_info_vins[tesla_fleet_info_vins.isin(all_vins_in_charges)]
vins_in_charges_not_in_fleet_info = all_vins_in_charges[~all_vins_in_charges.isin(tesla_fleet_info_vins)]
vins_in_fleet_info_not_in_charges = tesla_fleet_info_vins[~tesla_fleet_info_vins.isin(all_vins_in_charges)]


display(f"vins_in_fleet_info_and_charges: {len(vins_in_fleet_info_and_charges)}")
display(f"vins_in_charges_not_in_fleet_info: {len(vins_in_charges_not_in_fleet_info)}")
display(f"vins_in_fleet_info_not_in_charges: {len(vins_in_fleet_info_not_in_charges)}")

vins_in_fleet_info_and_charges.to_csv("data_cache/vins_in_fleet_info_and_charges.csv", index=False)
vins_in_charges_not_in_fleet_info.to_csv("data_cache/vins_in_charges_not_in_fleet_info.csv", index=False)
vins_in_fleet_info_not_in_charges.to_csv("data_cache/vins_in_fleet_info_not_in_charges.csv", index=False)