### Imports

In [None]:
import logging

import plotly.express as px
import pandas as pd
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import numpy as np
import umap

from core.plt_utils import plt_3d_df, basic_fig_update
from watea.watea_constants import *
from watea.processed_watea_ts import processed_ts_of, processed_ts_it
from watea.watea_fleet_info import fleet_info_df
from watea.energy_distribution import *

logging.basicConfig(level=logging.INFO)


## Setup

In [None]:
logging.basicConfig(level=logging.INFO)

charging_points = (
    get_raw_fleet_charging_points()
    .pipe(clean_charging_points)
    .pipe(compute_regime_seperation_feature)
)

### Umap dimensionality reduction

In [None]:
N_COMPONENTS = 3
FEATURE_COLS = [
    "current",
    "voltage",
    "regime_seperation_feature",
    "temperature",
    "soc",
]

def dimensionality_reduction(df:DF, n_components=N_COMPONENTS, features=FEATURE_COLS, n_neighbours=120) -> DF:
    umap_feature_cols = [f"umap_feature_{i}" for i in range(N_COMPONENTS)]
    umap_feature_cols_to_drop = [col for col in umap_feature_cols if col in df.columns] #Drop columns if they are already in the df
    df = df.drop(columns=umap_feature_cols_to_drop)
    print(df.dtypes)
    return (
        Pipeline([
            ('standar_scalar', StandardScaler()),
            ('reducer', umap.UMAP(n_components=n_components, verbose=True, n_neighbors=n_neighbours)),
            ('to_df', FunctionTransformer(lambda X: DF(X, columns=umap_feature_cols))),
            ('concat_og_df', FunctionTransformer(lambda X: pd.concat((X, df.reset_index(drop=True)), axis="columns"))),
        ])
        .fit_transform(
            X=df[features].values,
            y=df["energy_added"],
        )
    )

In [None]:
charging_points = dimensionality_reduction(charging_points, n_neighbours=300)

In [None]:
plt_3d_df(charging_points, "umap_feature_0", "umap_feature_1", "umap_feature_2", color="energy_added", colorscale="Rainbow", size=2.5)

## Charge regime clustering

In [None]:
dbscan = DBSCAN(eps=0.5, min_samples=5, metric='euclidean', n_jobs=-1)
umap_feature_cols = charging_points.filter(regex='umap_feature_').columns
charging_points['cluster_idx'] = dbscan.fit_predict(charging_points[umap_feature_cols])

In [None]:
plt_3d_df(charging_points, "umap_feature_0", "umap_feature_1", "umap_feature_2", color="cluster_idx", colorscale="Rainbow", size=2.5)

In [None]:
plt_3d_df(charging_points.query("is_default_100_soh"), "umap_feature_0", "umap_feature_1", "umap_feature_2", color="cluster_idx", colorscale="Rainbow", size=2.5)

In [None]:
plt_3d_df(charging_points, "cluster_idx", "soc", "energy_added", color="cluster_idx", colorscale="Rainbow", size=2.5)

In [None]:
charging_points["is_default_100_soh_int"] = charging_points["is_default_100_soh"].astype(int)
plt_3d_df(charging_points.query("cluster_idx == 5 "), "temperature", "voltage", "energy_added", color="is_default_100_soh_int", colorscale="Bluered", size=2.5)