In [None]:
from pandas import DataFrame as DF
from pandas import Series
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from core.plt_utils import plt_charge_energy_data
from watea.watea_perfs import fleet_wise_perfs_of_watea
from watea.watea_constants import *
from watea.processed_watea_ts import processed_ts_of
from watea.watea_fleet_info import fleet_info_df
from watea.watea_perfs import energy_soh_perfs_of, fleet_perfs

In [None]:
# Legacy code:
MOST_COMMON_CHARGE_REGIME_QUERY = "energy_added > 300 & sec_duration < 900 & temp < 35 & mean_voltage < 400 & mean_current > -40"


In [None]:
def compute_charge_energy_median(charge_energy_points_df: DF) -> pd.Series:
    return (
        charge_energy_points_df
        .query(MOST_COMMON_CHARGE_REGIME_QUERY)
        .loc[:, "energy_added"]
        .groupby(level=[0, 1, 2])
        .agg("median")
        .groupby(level=[0, 1])
        .rolling(4, center=True, min_periods=1)
        .median()
        .dropna()
        .droplevel([0, 1])
    )

def fit_poly_lr_to_charge_dist_xs(dist_median_xs: DF) -> Series:
    charge_energy_distribution = (
        CHARGE_ENERGY_POINTS_TO_DIST_MODEL
        .fit(dist_median_xs.index.get_level_values(2).values, dist_median_xs.values)
        .predict(SOC_RANGE)
        .squeeze()
    )
    fitted_fleet_charge_energy_dist_xs = (
        Series(charge_energy_distribution, SOC_RANGE)
        .clip(300, 500)
    )

    return fitted_fleet_charge_energy_dist_xs


fleet_energy_medians = compute_charge_energy_median(fleet_wise_perfs_of_watea["charging_points"])
fleet_energy_dist_fits = (
    fleet_energy_medians
    .groupby(level=[0, 1])
    .apply(fit_poly_lr_to_charge_dist_xs)
) 

In [None]:


# plt_charge_energy_data(
#     fleet_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY),
#     fleet_energy_dist_fits,
#     plt.figure(figsize=(20, 40))
# )

plt_charge_energy_data(
    fleet_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY).xs(0, drop_level=False),
    fleet_energy_dist_fits.xs(0, drop_level=False),
    plt.figure(figsize=(20, 40))
)

### Use the fitted distribution as the reference of shape
Here, we consider the best distribution fit to be the one computed from the charging dataset with the least variance and most soc range coverage.  
Most datasets have about the same soc range coverage but the 25C° has the least variance so that's what the distribution that we are going to use.  
The distributions(even the 25C° one) will be represented as some intercept added to the mean centered fit of the 25C° dataset.

This method allows use to get a representation of the charge energy distribution from very few data points.  
It does, however assume that only the center of the distribution changes, not its shape.   
This may or may not turn out to be true...

In [None]:
# best_dist_fit = fleet_energy_dist_fits.xs((0, 25.0))
# best_dist_shape_mean = best_dist_fit.min()
# dist_shape = best_dist_fit - best_dist_shape_mean

# def compute_intercept_from_dist_shape_and_points(median: DF, dist_shape: Series) -> float:
#     median = median.drop_duplicates()
#     idx = (median.index.get_level_values(2) * 2).round() / 2
#     return np.median(median.values - dist_shape.loc[idx]) 

# fleet_charge_dist_intercept = (
#     fleet_energy_medians
#     .groupby(level=[0, 1])
#     .apply(compute_intercept_from_dist_shape_and_points, dist_shape)
# )

# display(fleet_perfs)
print("is_monotonic_increasing over temp:")
display(fleet_perfs["intercepts"].groupby(level=0).is_monotonic_increasing)
print("is_monotonic_decreasing over odo:")
display(fleet_perfs["intercepts"].groupby(level=1).is_monotonic_decreasing)

In [None]:
fleet_perfs["intercepts"].unstack(level=0)

### Recompute distributions
This time as a sum of their intercept with the best fit and the fit itslef

In [None]:

fleet_dist = (
    fleet_charge_dist_intercept
    .groupby(level=[0, 1])
    .apply(lambda intercept: Series((dist_shape + intercept.iat[0]).values, index=dist_shape.index)) 
)


In [None]:

plt_charge_energy_data(
    fleet_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY),
    fleet_dist,
    plt.figure(figsize=(20, 40))
)

In [None]:
# plt_charge_energy_data(
#     fleet_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY).xs(0, drop_level=False),
#     fleet_dist.xs(0, drop_level=False),
#     plt.figure(figsize=(20, 40))
# )

In [None]:
# plt_charge_energy_data(
#     fleet_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY).xs(20, level=1, drop_level=False),
#     fleet_dist.xs(20, level=1, drop_level=False),
#     plt.figure(figsize=(20, 40))
# )

### Compute single vheicle intercepts

In [None]:
# biggest_odo_idx = fleet_info_df.query("has_power_during_charge")["max_odo"].argmax()
# ID of vehicle that represented an edge case in the first implementation of energy soh
id = "kdh372" #fleet_info_df.query("has_power_during_charge")["id"].iat[biggest_odo_idx]
vehicle_df = processed_ts_of(id)


vehicle_charge_energy_points_df:DF = (
    compute_perfs(vehicle_df, id, force_update=True)
    ["charging_points"]
    .query(MOST_COMMON_CHARGE_REGIME_QUERY)
)

vehicle_energy_medians = compute_charge_energy_median(vehicle_charge_energy_points_df)

vehicle_charge_dist_intercept = (
    vehicle_energy_medians
    .groupby(level=[0, 1])
    .apply(compute_intercept_from_dist_shape_and_points, dist_shape)
)

vheicle_dist = (
    vehicle_charge_dist_intercept
    .groupby(level=[0, 1])
    .apply(lambda intercept: Series((dist_shape + intercept.iat[0]).values, index=best_dist_fit.index)) 
)

plt_charge_energy_data(
    vehicle_charge_energy_points_df.query(MOST_COMMON_CHARGE_REGIME_QUERY),
    vheicle_dist,
    plt.figure(figsize=(20, 40))
)

### Compute soh from ratio of vehicle and default intercepts

In [None]:
default_intercepts = fleet_charge_dist_intercept.xs(0)
soh = (
    vehicle_charge_dist_intercept
    .div(default_intercepts, level=1)
    .mul(100)
    .groupby(level=0)
    .median()
)
display(default_intercepts)
display(vehicle_charge_dist_intercept)
soh