In [None]:
import logging

import plotly.express as px
import pandas as pd
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import numpy as np

from core.plt_utils import plt_3d_df
from transform.watea.watea_constants import *
from transform.watea.energy_distribution import *
from transform.watea.raw_watea_ts import raw_ts_it

logging.basicConfig(level=logging.INFO)


## Setup

In [None]:
charging_points = get_preprocessed_charging_points(
    # force_update=True, 
    # force_update_extraction=True
)
cluster:DF = (
    charging_points
    .query(f"cluster_idx == {MAIN_CHARGING_REGIME_CLUSTER_IDX}")
    .query("energy_added > 320 & energy_added < 490")
    .query("current < 27.5 & current > 5.8")
    .pipe(estimate_soh)
    .query("soh >= 93 & soh <= 105")
    .eval("range_by_soc = estimated_range / soc")
    .eval("range_by_voltage = estimated_range / soc")
    .eval("soc_by_voltage = soc / voltage")
    .eval("power = current * voltage")
)
cluster["range_voltage_angle"] = np.degrees(np.arctan2(cluster["estimated_range"], cluster["voltage"]))
cluster["charge_soh"] = cluster.groupby("charge_id")['soh'].transform("median")

In [None]:
px.scatter(cluster, x="voltage", y="estimated_range", opacity=0.4, color="temperature", color_continuous_scale="Inferno").update_layout(
    autosize=False,        # Disable autosizing
    width=600,             # Set the figure width
    height=600,            # Set the figure height to match the width (square)
    # xaxis_scaleanchor="y"  # Ensure the x-axis and y-axis scales are the same
)


In [None]:
px.scatter(cluster.query("id == 'bob432'"), x="voltage", y="soc")

In [None]:
px.scatter(cluster.query("id == 'bob432' & current > 18"), x="soc_by_voltage", y="estimated_range", color="power", color_continuous_scale="Rainbow")

In [None]:
px.scatter(cluster.query("id == 'bob432'").eval("range_by_voltage = estimated_range / voltage"), x="date", y="range_by_voltage")

In [None]:
plt_3d_df(cluster, x="soc", y="estimated_range", z="soh", opacity=0.7, color="temperature", colorscale="Inferno")

In [None]:
cluster_charges = cluster.groupby(["id", "charge_id"]).agg({
    "odometer":"median",
    "energy_added":"median",
    "voltage":"median",
    "current":"median",
    "temperature":"median",
    "sec_duration":"median",
    "date":"median",
    "soc":"median",
    "soh":"median",
    "estimated_range": "mean",
    "estimated_range_diff": "mean",
    "range_by_soc": "median",
    #Debugging
    "id":pd.Series.mode,
    "charge_idx":pd.Series.mode,
    "charge_id":pd.Series.mode,
})

In [None]:
cluster.columns

In [None]:
def compute_umap_features(df:DF, target_feature:str, n_components=UMAP_N_COMPONENTS, features=UMAP_INPUT_FEATURE_COLS, n_neighbours=120) -> DF:
    import umap # Import umap inside the function because import is slow (because of tensor flow)
    umap_feature_cols = [f"umap_feature_{i}" for i in range(n_components)]
    umap_feature_cols_to_drop = [col for col in umap_feature_cols if col in df.columns] # Drop umap feature columns if they are already in the df
    df = df.drop(columns=umap_feature_cols_to_drop)
    return (
        Pipeline([
            ('standar_scalar', StandardScaler()),
            ('reducer', umap.UMAP(n_components=n_components, verbose=True, n_neighbors=n_neighbours, random_state=UMAP_RANDOM_STATE)),
            ('to_df', FunctionTransformer(lambda X: DF(X, columns=umap_feature_cols))),
            ('concat_with_og_df', FunctionTransformer(lambda X: pd.concat((X, df.reset_index(drop=True)), axis="columns"))),
        ])
        .fit_transform(
            X=df[features].values,
            y=df[target_feature],
        )
    )

cluster = (
    cluster
    .pipe(compute_umap_features, "charge_soh", features=["estimated_range", "soc", "temperature", "odometer"])
)
cluster.to_parquet("cluster_with_umap_estimated_range.parquet")

In [None]:
cluster = compute_umap_features(cluster, "charge_soh", features=["estimated_range", "soc", "temperature", "odometer"])

In [None]:
plt_3d_df(cluster, x="umap_feature_0", y="umap_feature_1", z="umap_feature_2", opacity=0.7, color="charge_soh", colorscale="Inferno")
cluster.columns