# SOH estimation EDA
The goal of this notebook is to find a way/lead to compute the soh for tesla vehicles.  
We are using the data received from personal API, not to be mistaken with the fleet telematic API which is what we will eventually, one day maybe,I hope... use.

## Setup

### Imports

In [None]:
from core.pandas_utils import *
import plotly.express as px

from transform.fleet_info.tesla_fleet_info import get_fleet_info
from transform.fleet_info.ayvens_fleet_info import fleet_info as ayvens_fleet_info
from transform.raw_tss.tesla_raw_tss import get_raw_tss
from transform.processed_tss.tesla_processed_tss import ProcessedTimeSeries
from transform.tesla.tesla_config import *
from core.pandas_utils import floor_to, uniques_as_series, series_start_end_diff
from core.plt_utils import plt_3d_df

### Data extraction

In [None]:
fleet_info = pd.concat((ayvens_fleet_info, get_fleet_info())).query("make == 'tesla'")
with pd.option_context('display.max_columns', None):
    display(fleet_info)
    display(sanity_check(fleet_info))


In [None]:
fleet_info.query("vin == '5YJ3E7EB1LF765211'")

In [None]:
raw_tss = (
    get_raw_tss(force_update=False)
    .pipe(left_merge, fleet_info, left_on="vin", right_on=["vin"], src_dest_cols=["model", "version"])
)
with pd.option_context('display.max_columns', None):
    display(raw_tss)
    display(sanity_check(raw_tss))
#raw_tss.loc[:, ["model", "default_capacity"]] = fleet_info.loc[raw_tss["vin"], ["model", "default_kwh_energy_capacity"]].values # Use .values so that pandas ignores the index

## Raw time Series analysis

### Visualization
Let's view some time series to check that everything seems normal.

In [None]:
vins = uniques_as_series(raw_tss["vin"]).sample(n=4)
raw_tss_to_plot = raw_tss.set_index("vin", drop=False).loc[vins]
fig = px.scatter(raw_tss_to_plot, x="readable_date", y="battery_level", facet_col="vin", facet_col_wrap=1)
fig.update_layout(height=1000)

In [None]:
fig = px.scatter(raw_tss_to_plot, x="readable_date", y="power", facet_col="vin", facet_col_wrap=1)
fig.update_layout(height=1000)

In [None]:
fig = px.scatter(raw_tss_to_plot, x="readable_date", y="charger_power", facet_col="vin", facet_col_wrap=1)
fig.update_layout(height=1000)

We can see that the data is there but that it is fairly sparse.  

### Dataset skewness


Let's check skewness of our dataset over models to avoid bad suprises:

In [None]:
vins_stats = raw_tss["vin"].value_counts().sort_values(ascending=False).to_frame()
#vins_stats[["model", "default_kwh_energy_capacity"]] = fleet_info.loc[vins_stats.index, ["model", "default_kwh_energy_capacity"]]
#px.pie(vins_stats, values="count", names="model")

The number of raws per model is very skewed.  
We will try to implement a solution to handle all models but this might end up being possible for the most common models.

## Raw ts processing

In [None]:
tss:DF = ProcessedTimeSeries(force_update=False)

In [None]:
ts = tss.query("vin == '5YJ3E7EB1KF334219'")
#px.line(ts, x="date", y="power").show()
#px.line(ts, x="date", y="cum_energy")
#px.line(ts, x="date", y="soc", markers=True)

## Energy distribution
We will try to implement an soh estimation similar to the one we used for watea.  

### Discharge energy distribution
For now we will focus on only the most common model.  

In [None]:
print(*tss.columns, sep="\n")

In [None]:
POWER_FLOORING = 3
charging_points:DF = (
    tss
    .assign(energy_added= lambda tss: tss["charge_energy_added"].diff())
    .query("model == 'Model 3' & version == 'Rear-Wheel Drive' & in_charge_perf_mask")
    .groupby(["vin", "in_charge_perf_idx", "floored_soc"])
    .agg(
        charge_current_request=pd.NamedAgg("charge_current_request", "median"),
        charge_current_request_max=pd.NamedAgg("charge_current_request_max", "median"),
        charge_enable_request=pd.NamedAgg("charge_enable_request", Series.mode),
        charge_energy_added=pd.NamedAgg("charge_energy_added", "median"),
        charge_limit_soc=pd.NamedAgg("charge_limit_soc", "median"),
        charge_limit_soc_max=pd.NamedAgg("charge_limit_soc_max", "median"),
        charge_limit_soc_min=pd.NamedAgg("charge_limit_soc_min", "median"),
        charge_limit_soc_std=pd.NamedAgg("charge_limit_soc_std", "median"),
        charge_port_cold_weather_mode=pd.NamedAgg("charge_port_cold_weather_mode", Series.mode),
        charge_rate=pd.NamedAgg("charge_rate", "median"),
        charger_actual_current=pd.NamedAgg("charger_actual_current", "median"),
        charger_pilot_current=pd.NamedAgg("charger_pilot_current", "median"),
        charger_power=pd.NamedAgg("charger_power", "median"),
        charger_voltage=pd.NamedAgg("charger_voltage", "median"),
        charging_state=pd.NamedAgg("charging_state", Series.mode),
        fast_charger_present=pd.NamedAgg("fast_charger_present", Series.mode),
        fast_charger_type=pd.NamedAgg("fast_charger_type", Series.mode),

        odometer=pd.NamedAgg("ffilled_odometer", "mean"),
        energy_added=pd.NamedAgg("energy_added", "sum"),
        power=pd.NamedAgg("power", "median"),
        battery_heater=pd.NamedAgg("battery_heater", Series.mode),
        inside_temp=pd.NamedAgg("ffiled_inside_temp", "median"),
        outside_temp=pd.NamedAgg("ffiled_outside_temp", "median"),
        sec_duration=pd.NamedAgg("date", lambda s: series_start_end_diff(s).total_seconds()),
        date=pd.NamedAgg("date", "first"),
        soc=pd.NamedAgg("floored_soc", "mean"),
        size=pd.NamedAgg("floored_soc", "size"),
    )
    .reset_index()
    .eval("inside_to_outside_temp = outside_temp - inside_temp")
)
charging_points["floored_power"] = floor_to(charging_points["power"], POWER_FLOORING)


In [None]:
corr = charging_points.select_dtypes(include='number').corr()
display(corr)
corr.abs().sum().sort_values(ascending=False)

In [None]:
display(sanity_check(charging_points))

In [None]:
plt_3d_df(
    df=charging_points.query("energy_added < 10 & energy_added > 0"), #.query("energy_added > 0.5 & energy_added < 10 & inside_temp > 17 & inside_temp < 35 & fast_charger_type == 'Combo'"),
    x="charger_power",
    y="sec_duration",
    z="energy_added",
    color="inside_temp",
    colorscale="Rainbow",
    width=1700,
    height=900,
)

In [None]:
energy_added_median_over_power = charging_points.groupby("floored_power")["energy_added"].median().reset_index()
px.line(energy_added_median_over_power, y="energy_added", x="floored_power")

In [None]:
energy_added_median_over_power = energy_added_median_over_power.query(f"floored_power < -{POWER_FLOORING} & floored_power > -100")
px.line(energy_added_median_over_power, y="energy_added", x="floored_power").update_layout(yaxis_scaleanchor="x", yaxis_scaleratio=1)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer
from sklearn.pipeline import Pipeline

charging_points_to_fit = (
    charging_points
    .query(f"floored_power < -{POWER_FLOORING} & floored_power > -100 & energy_added < 10 & energy_added > 0")
    .sort_values("floored_power")
)

POLYNOMIAL_LINEAR_REGRESSION_PIPELINE = Pipeline([
    ('reshape', FunctionTransformer(lambda x: x.reshape(-1, 1))),
    ('poly_features', PolynomialFeatures(degree=10)),
    ('regressor', LinearRegression())
])
soh_estimator = POLYNOMIAL_LINEAR_REGRESSION_PIPELINE.fit(X=charging_points_to_fit["floored_power"].values.reshape(-1, 1), y=charging_points_to_fit["energy_added"].values)
charging_points_to_fit["expected_energy_added"] = soh_estimator.predict(X=charging_points_to_fit["floored_power"].values.reshape(-1, 1))

px.scatter(
    charging_points_to_fit.query(f"floored_power < -{POWER_FLOORING} & floored_power > -100 & energy_added < 10 & energy_added > 0"),
    x="power",
    y="energy_added",
    color="vin",
).add_trace(px.line(charging_points_to_fit, y="expected_energy_added", x="floored_power").data[0])

In [None]:
charging_points_to_fit["soh"] = charging_points_to_fit["energy_added"] / charging_points_to_fit["expected_energy_added"] * 100
px.scatter(
    charging_points_to_fit,
    x="odometer",
    y="soh",
    color="vin",
)

In [None]:
charges = charging_points_to_fit.groupby(["vin", "in_charge_perf_idx"]).agg({
    "soh": "median",
    "odometer": "last",
}).reset_index()
charges
px.scatter(
    charges,
    x="odometer",
    y="soh",
    color="vin",
)