# Oppening statement

In [None]:
! mkdir -p data_cache

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import linregress
from pandas import Series
from pandas import DataFrame as DF
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer
from sklearn.pipeline import Pipeline


from core.caching_utils import cache_result
from core.pandas_utils import floor_to
from core.plt_utils import plt_3d_df
from transform.watea.soh_estimation import get_processed_cluster, get_soh_per_charges
from transform.watea.watea_processed_tss import get_processed_tss
from transform.watea.watea_fleet_info import fleet_info
from transform.watea.watea_config import POLYNOMIAL_LINEAR_REGRESSION_PIPELINE

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"

In [None]:
processed_cluster = get_processed_cluster()
charges = get_soh_per_charges()

In [None]:
@cache_result("data_cache/most_common_ts.parquet", on="local_storage")
def get_most_common_ts() -> DF:
    tss = get_processed_tss()
    most_common_id = tss["id"].value_counts(sort=True, ascending=False).index[0]
    return tss.query(f"id == '{most_common_id}'")

ts = get_most_common_ts()

In [None]:
@cache_result("data_cache/{id}.parquet", on="local_storage", path_params=["id"])
def get_ts(id:str ) -> DF:
    return get_processed_tss().query(f"id == '{id}'")

no_power_id = fleet_info.query("~has_power_during_charge")["id"].iat[0]
no_power_ts = get_ts(no_power_id)

In [None]:
soh_per_vehicle = charges.groupby('id').agg({
    "soh": "mean",
    "odometer": "max",
}).reset_index(drop=False)

## Timer series

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=ts["date"],
        y=ts["soc"],
        name="State of Charge",
        yaxis="y1"
    )
)

fig.add_trace(
    go.Scatter(
        x=ts["date"], 
        y=ts["power"],
        name="Power",
        yaxis="y2"
    )
)

fig.update_layout(
    yaxis=dict(title="State of Charge (%)"),
    yaxis2=dict(title="Power", overlaying="y", side="right"),
    title="State of Charge and Power over Time",
    xaxis_title="Date"
)
fig.write_html("data_cache/most_common_ts.html")
fig.show()

## soh results

In [None]:
fig = px.scatter(
    charges.assign(soh=charges["soh"].sub(2.5).clip(90, 100)),
    x="odometer",
    y="soh",
    color="id",
    trendline="ols",
    trendline_scope="overall",
)
fig.write_html("data_cache/sohs_per_charge.html")
fig.show()

In [None]:
fig = px.scatter(
    soh_per_vehicle.assign(soh=soh_per_vehicle["soh"].sub(1.5)),
    x="odometer",
    y="soh",
    # color="id",
    trendline="ols",
    trendline_scope="overall",
)
fig.write_html("data_cache/sohs_per_vehicle.html")
fig.show()

## Energy consumption over soc and temeperature

In [None]:
processed_cluster["floored_temperature"] = floor_to(processed_cluster["temperature"], 5)
dist_to_plot = (
    processed_cluster
    .query("temperature < 30 & temperature > 0")
    .groupby(["soc", "floored_temperature"])[["energy_added"]]
    .median()
    .reset_index()
    .sort_values(by=["floored_temperature", "soc"], ascending=[False, True])
)
fig = px.line(
    dist_to_plot,
    x="soc",
    y="energy_added",
    color="floored_temperature",
    color_discrete_sequence=px.colors.sequential.Rainbow,
)
fig.write_html("data_cache/energy_consumption_per_soc_and_temperature.html")
fig.show()

In [None]:
# ðŸ‘‰ Slide type of this cell: Fragment

fig = px.box(
    dist_to_plot,
    "floored_temperature",
    "energy_added",
    color="floored_temperature",
    color_discrete_sequence=px.colors.sequential.Rainbow,
)
fig.write_html("data_cache/energy_consumption_per_soc_and_temperature_boxplot.html")
fig.show()

In [None]:
POLYNOMIAL_LINEAR_REGRESSION_PIPELINE = Pipeline([
    ('reshape', FunctionTransformer(lambda x: x.reshape(-1, 1))),
    ('poly_features', PolynomialFeatures(degree=10)),
    ('regressor', LinearRegression())
])

energy_by_soc_per_temp = pd.pivot_table(dist_to_plot, columns=["floored_temperature"], values="energy_added", index="soc")
mean_energy_added = energy_by_soc_per_temp.median(axis=1)
energy_by_soc_per_temp = energy_by_soc_per_temp.apply(lambda col: col - mean_energy_added)
energy_by_soc_per_temp = energy_by_soc_per_temp.unstack()
energy_by_soc_per_temp = energy_by_soc_per_temp.reset_index()
energy_by_soc_per_temp

In [None]:
fig = px.line(
    energy_by_soc_per_temp,
    x="soc",
    y=0,
    color="floored_temperature",
    color_discrete_sequence=px.colors.sequential.Rainbow,
)
#fig.write_html("data_cache/energy_consumption_per_soc_and_temperature.html")
fig.show()

In [None]:
fig = px.box(
    energy_by_soc_per_temp,
    x="floored_temperature",
    y=0,
    color="floored_temperature",
    color_discrete_sequence=px.colors.sequential.Rainbow,
)
#fig.write_html("data_cache/energy_consumption_per_soc_and_temperature.html")
fig.show()

In [None]:
# ! jupyter nbconvert watea_presentation.ipynb --to slides --no-prompt --TagRemovePreprocessor.remove_input_tags={\"to_remove\"} --post serve  --no-input