In [None]:
import plotly.express as px

from core.stats_utils import *
from core.sql_utils import *
from core.pandas_utils import *
from core.plt_utils import *
from core.caching_utils import cache_result
from transform.fleet_info.main import fleet_info
from transform.processed_tss.ProcessedTimeSeries import ProcessedTimeSeries
from transform.results.main import process_results

In [None]:
charges = (
    pd.read_parquet("data_cache/tesla_charges.parquet")
    .pipe(left_merge, fleet_info, "vin", "vin", src_dest_cols=["capacity", "tesla_code", "fleet", "fleet_name"])
    .query("soc_diff > 20 & soh.between(0.75, 1.0)")
)

In [None]:
mean_soh = charges["soh"].mean()
inside_temp_soh_lr = lr_params_as_series(charges, "inside_temp", "soh")
inside_temp_soh_lr

In [None]:
charges = (
    charges
    .eval("soh_offset_pred = inside_temp * @inside_temp_soh_lr['slope'] + @inside_temp_soh_lr['intercept']")
    .eval("offseted_soh = soh - soh_offset_pred + @mean_soh")
)

px.scatter(
    charges.eval("fleet_name = fleet_name.fillna('Unknown')"),
    x="inside_temp",
    y="offseted_soh",
    trendline="ols",
    trendline_scope="overall",
    color="fleet_name",
)

In [None]:
def agg_soh_per_vin(charges:DF) -> DF:
    return (
        charges
        .groupby("vin")
        .agg(
            soh=("soh", "median"),
            odometer=("odometer", "last"),
            fleet_name=("fleet_name", "first"),
            date=("date", "last"),
        )
        .reset_index()
        .pipe(left_merge, fleet_info, "vin", "vin", src_dest_cols=["fleet_name"])
    )
raw_soh_per_vin = agg_soh_per_vin(charges)
raw_soh_per_vin.query("fleet_name == 'Ayvens'").to_csv("data_cache/raw_soh_per_vin.csv", index=False, float_format="%.2f")

In [None]:
px.scatter(
    raw_soh_per_vin,
    x="odometer",
    y="soh",
    trendline="ols",
    trendline_scope="overall",
)

In [None]:
process_results(charges).dtypes

In [None]:
processed_soh_per_vin = (
    charges
    .pipe(process_results)
    .pipe(left_merge, fleet_info, "vin", "vin", src_dest_cols=["fleet_name"])
    .pipe(agg_soh_per_vin)
)
processed_soh_per_vin.query("fleet_name == 'Ayvens'").to_csv("data_cache/processed_soh_per_vin.csv", index=False, float_format="%.2f")
px.scatter(
    processed_soh_per_vin,
    x="odometer",
    y="soh",
    trendline="ols",
    trendline_scope="overall",
)

In [None]:
ground_truth = (
    pd.read_csv(
        "data_cache/ground_truth.csv",
        dtype={
            "Score Aviloo": "int64",
            "SoH Readout": "float64",
            "VIN": "string",
            "BIB SOH": "float64",
            "Brand (FlashTest)": "string",
            "Model Group (FlashTest)": "string",
            "Mileage": "float64",
        }
    )
    .rename(columns={"VIN": "vin", "SoH Readout": "ground_truth_soh"})
)

In [None]:
processed_soh_per_vin_with_ground_truth = (
    processed_soh_per_vin
    .query("vin in @ground_truth.vin")
    .pipe(left_merge, ground_truth, "vin", "vin", src_dest_cols=["ground_truth_soh"])
    .eval("ground_truth_soh = ground_truth_soh / 100.0")
    .eval("residual_soh = soh - ground_truth_soh")
    .assign(abs_residual_soh=lambda x: x["residual_soh"].abs())
)
display(processed_soh_per_vin_with_ground_truth)
display(processed_soh_per_vin_with_ground_truth.describe())

In [None]:
raw_soh_per_vin_soh_per_vin_with_ground_truth = (
    raw_soh_per_vin
    .query("vin in @ground_truth.vin")
    .pipe(left_merge, ground_truth, "vin", "vin", src_dest_cols=["ground_truth_soh"])
    .eval("ground_truth_soh = ground_truth_soh / 100.0")
    .eval("residual_soh = soh - ground_truth_soh")
    .assign(abs_residual_soh=lambda x: x["residual_soh"].abs())
)
display(raw_soh_per_vin_soh_per_vin_with_ground_truth)
display(raw_soh_per_vin_soh_per_vin_with_ground_truth.describe())