### Imports

In [None]:
import logging

import plotly.express as px
from core.plt_utils import basic_fig_update
import pandas as pd
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
import umap
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.neighbors import KNeighborsRegressor

from core.plt_utils import plt_3d_df
from watea.watea_constants import *
from watea.processed_watea_ts import processed_ts_of, processed_ts_it
from watea.watea_fleet_info import fleet_info_df
from watea.energy_distribution import *

logging.basicConfig(level=logging.INFO)


## Setup

In [None]:
fleet_energy_points_lst = []
for id, ts in processed_ts_it("has_power_during_charge"):
    fleet_energy_points_lst.append(compute_charging_points(ts, id))


In [None]:
raw_fleet_charging_points = (
    pd.concat(fleet_energy_points_lst, ignore_index=True)
    .set_index("soc", drop=False)
    .sort_index()
)
display(raw_fleet_charging_points)
describe = raw_fleet_charging_points.describe().T
describe["notna_ratio"] = describe["count"] / len(raw_fleet_charging_points)
display(describe)
raw_fleet_charging_points["energy_added"].plot.hist()

In [None]:
fleet_charging_points:DF = (
    raw_fleet_charging_points
    .dropna(how="any")
    .query("energy_added < 502 & energy_added > 100")
    .eval("is_100_default_soh = odometer <= 3000")
)
down_sampled_fleet_charging_points = fleet_charging_points.sample(frac=0.4)

## 3D EDA of dataset trhough 3D scatters

In [None]:
plt_3d_df(fleet_charging_points, "voltage", "soc", "energy_added", color="temperature", colorscale="Bluered", size=2.5)

In [None]:
plt_3d_df(fleet_charging_points, "voltage", "soc", "temperature", color="energy_added", colorscale="Bluered", size=2.5)

In [None]:
plt_3d_df(fleet_charging_points, "voltage", "soc", "current", color="energy_added", colorscale="Rainbow", size=2.5)

## Charging regimes seperation feature

In [None]:
fleet_voltage_by_soc = (
    fleet_charging_points
    .loc[:, ["voltage", "soc"]]
)
median = (
    fleet_voltage_by_soc
    .drop_duplicates()
    .rolling(80, center=True, on="soc")
    .min()
    .rolling(80, center=True)
    .min()
    .dropna()
    .reset_index()
)
fig, ax = plt.subplots(figsize=(15, 9))
fleet_voltage_by_soc.plot.scatter("soc", "voltage", s=0.35, ax=ax)
median.plot.line(x="soc", y="voltage", color="red", ax=ax)
CHARGE_ENERGY_POINTS_TO_DIST_MODEL = Pipeline([
    ('reshape', FunctionTransformer(lambda x: x.reshape(-1, 1))),
    ('poly_features', PolynomialFeatures(degree=4)),
    ('regressor', LinearRegression())
])

fitted_shape_data = (
    CHARGE_ENERGY_POINTS_TO_DIST_MODEL
    .fit(median["soc"].values, median["voltage"].values)
    .predict(SOC_RANGE)
    .squeeze()
)
fitted_shape_series = (
    Series(data=fitted_shape_data, index=pd.Index(SOC_RANGE, name="soc"))
    # .sub(min(fitted_shape_data))
)
fitted_shape_series.plot.line(ax=ax, color="green")
plt.show()

In [None]:
fleet_charging_points:DF = (
    fleet_charging_points
    .assign(min_voltage=fitted_shape_series.loc[fleet_voltage_by_soc["soc"]])
    .eval("soc_voltage_feature = voltage - min_voltage")
)
down_sampled_fleet_charging_points = fleet_charging_points.sample(frac=0.4).dropna(how="any")
plt_3d_df(fleet_charging_points, "temperature", "soc", "current", color="energy_added", colorscale="Rainbow", size=2.5)

In [None]:
import plotly.graph_objects as go

def plt_charges(df, x, y, z, color, colorscale='Bluered', opacity=0.8):
    fig = go.Figure()

    # Add traces for each group (assuming 'charge_id' is used as line_group)
    for charge_id, group_df in df.groupby('charge_id'):
        group_df = group_df.sort_values("date")
        fig.add_trace(go.Scatter3d(
            x=group_df[x],
            y=group_df[y],
            z=group_df[z],
            opacity=opacity,
            mode='lines',
            line=dict(
                color=group_df[color],
                colorscale=colorscale,  # Use the color scale you like
                # colorbar=dict(title="Voltage Feature")
            ),
            # name=f'Charge ID: {charge_id}'
        ))

    fig = basic_fig_update(fig, x, y, z)
    fig.show()
    


In [None]:
plt_charges(down_sampled_fleet_charging_points, "soc_voltage_feature", "soc", "current", "energy_added")

In [None]:
plt_3d_df(fleet_charging_points, "soc", "current", "energy_added", color="soc_voltage_feature", colorscale="Bluered", size=2.5)

## Most common charging regime

In [None]:
fleet_charging_points:DF = (
    fleet_charging_points
    .eval("in_most_common_regime = soc_voltage_feature < 7.5 & current < 22")
)

In [None]:
most_common_charging_points_cleaned = (
    fleet_charging_points
    .query("in_most_common_regime")
    .query("energy_added >= 300")
    .query("temperature > 0 & temperature < 30")
    .drop(columns="in_most_common_regime")
    .set_index(["charge_id", "date"], drop=False)
    .sort_index()
)
display(most_common_charging_points_cleaned.loc[:, "temperature"].plot.hist())
display(most_common_charging_points_cleaned)

In [None]:
print(
    most_common_charging_points_cleaned
    .loc[:, "charge_id"]
    .value_counts()
    .sort_values(ascending=False)
    .head()
)

most_common_charging_points_cleaned_subset = (
    most_common_charging_points_cleaned.xs("mcf122_3", level=0)
)

display(most_common_charging_points_cleaned_subset)

fig = px.line_3d(most_common_charging_points_cleaned_subset, "voltage", "current", "energy_added", line_group="charge_id")
fig = basic_fig_update(fig, "voltage", "current", "energy_added")
fig.show()

In [None]:
plt_3d_df(most_common_charging_points_cleaned, "voltage", "current", "energy_added", color="temperature", colorscale="Bluered", size=2.5)

## Dimensionality reduciton

In [None]:
N_COMPONENTS = 3
FEATURE_COLS = [
    "current",
    "voltage",
    "soc_voltage_feature",
    "temperature",
    "soc",
]

def dimensionality_reduction(df:DF=down_sampled_fleet_charging_points, n_components=N_COMPONENTS, features=FEATURE_COLS, n_neighbours=120) -> DF:
    return (
        Pipeline([
            ('standar_scalar', StandardScaler()),
            ('reducer', umap.UMAP(n_components=n_components, verbose=True, n_neighbors=n_neighbours)),
            ('to_df', FunctionTransformer(lambda X: DF(X, columns=[f"umap_feature_{i}" for i in range(n_components)]))),
            ('concat_og_df', FunctionTransformer(lambda X: pd.concat((X, df.reset_index(drop=True)), axis="columns"))),
        ])
        .fit_transform(
            X=df[features].values,
            y=df["energy_added"],
            
        )
    )

In [None]:
fleet_charging_points = dimensionality_reduction(fleet_charging_points.sample(frac=0.75), n_neighbours=300)

In [None]:
plt_3d_df(fleet_charging_points, "umap_feature_0", "umap_feature_1", "umap_feature_2", color="energy_added", colorscale="Rainbow", size=2.5)

In [None]:
#.query("is_100_default_soh")

fleet_charging_points["is_100_default_soh_color"] = fleet_charging_points["is_100_default_soh"].map({True: 1, False: 0})
fleet_charging_points["is_100_default_soh_size"] = fleet_charging_points["is_100_default_soh"].map({True: 10, False: 5})


plt_3d_df(fleet_charging_points, "umap_feature_0", "umap_feature_1", "umap_feature_2", color="odometer", colorscale="Rainbow", size=2.5)

In [None]:
plt_3d_df(fleet_charging_points.query("is_100_default_soh"), "umap_feature_0", "umap_feature_1", "umap_feature_2", color="energy_added", colorscale="Rainbow", size=2.5)

## soh estimation

In [None]:
# Define the function for training on the train set and evaluating on the test set

fleet_charging_points:DF = (
    fleet_charging_points
    .assign(default_100_soh_energy_added=compute_100_soh_energy_added(fleet_charging_points))
    .eval("soh = 100 * energy_added / default_100_soh_energy_added")
)

fleet_charging_points[["default_100_soh_energy_added", "soh"]].describe()

### plotting

In [None]:
def plot_2d_line(df: pd.DataFrame, x_column: str, y_column: str, line_group_column: str, color: str = None, color_scale: str = None):
    """
    Creates a 2D line plot using Plotly with optional color and color scale.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing the data.
        x_column (str): The column name for the x-axis.
        y_column (str): The column name for the y-axis.
        line_group_column (str): The column name for grouping the lines.
        color (str, optional): The column name to use for the line color. Default is None.
        color_scale (str, optional): The color scale to use. Default is None.

    Returns:
        plotly.graph_objs._figure.Figure: The generated Plotly figure.
    """
    if color:
        # If color is provided, use px.line with color_discrete_sequence
        fig = px.line(
            df,
            x=x_column,
            y=y_column,
            line_group=line_group_column,
            color=color,
            color_discrete_sequence=px.colors.qualitative.Plotly if not color_scale else getattr(px.colors.qualitative, color_scale)
        )
    else:
        # If no color is provided, create a line plot without coloring
        fig = px.line(
            df,
            x=x_column,
            y=y_column,
            line_group=line_group_column
        )
    
    # Update the layout (optional)
    fig.update_layout(
        title=f'2D Line Plot of {y_column} vs {x_column} Grouped by {line_group_column}',
        xaxis_title=x_column,
        yaxis_title=y_column,
        legend_title=line_group_column if not color else color
    )
    
    # Show the plot
    fig.show()



In [None]:
agg_fleet_charging_points = (
    fleet_charging_points
    .groupby("charge_id")
    .agg({
        "odometer":"median",
        "energy_added":"median",
        "voltage":"median",
        "current":"median",
        "temperature":"median",
        "sec_duration":"median",
        "date":"median",
        "soc":"median",
        "min_voltage":"median",
        "soc_voltage_feature":"median",
        "default_100_soh_energy_added":"median",
        "soh":"median",
        #Debugging
        "id":pd.Series.mode,
        "charge_idx":pd.Series.mode,
        "charge_id":pd.Series.mode,
    })
    .sort_values("date")
)
# plot_2d_line(agg_fleet_energy_points_cleaned, "odometer", "soh", "id", "id_idx", color_scale='Viridis')
px.line(agg_fleet_charging_points, x='odometer', y='soh', color='id', symbol="id", markers=False)

In [None]:
agg_fleet_charging_points["soh"].plot.hist(bins=20)

In [None]:
most_common_agg_fleet_charging_points = (
    agg_fleet_charging_points
    .sort_values("odometer")
    .query("soh > 75 & soh < 120")
    .assign(soh_variance=lambda df: df.rolling(on="odometer", window=10, center=True)["soh"].var())
)

In [None]:
import plotly.graph_objects as go
import plotly.express as px

# Generate a color mapping for the categorical column 'id'
color_map = {category: px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)] 
             for i, category in enumerate(most_common_agg_fleet_charging_points['id'].unique())}

# Create the scatter plot trace with mapped colors
scatter_trace = go.Scatter(
    x=most_common_agg_fleet_charging_points['odometer'],
    y=most_common_agg_fleet_charging_points['soh'],
    mode='markers',
    marker=dict(
        color=[color_map[id_value] for id_value in most_common_agg_fleet_charging_points['id']]
    ),
    name='Scatter plot'
)

# Create the line plot trace
line_trace = go.Scatter(
    x=most_common_agg_fleet_charging_points['odometer'],
    y=most_common_agg_fleet_charging_points['soh_variance'],
    mode='lines',
    name='Line plot'
)

# Combine both traces in a single figure
fig = go.Figure(data=[scatter_trace, line_trace])

# Update layout if needed
fig.update_layout(
    title="Scatter and Line Plot Combined",
    xaxis_title="Odometer",
    yaxis_title="Value",
)

# Show the combined plot
fig.show()

In [None]:
most_common_agg_fleet_charging_points.query("soh_variance < 30")["soh_variance"].plot.hist(bins=15)

In [None]:

agg_ffleet_charging_points = (
    fleet_charging_points
    .groupby("charge_id")
    .agg({
        "odometer":"median",
        "energy_added":"median",
        "voltage":"median",
        "current":"median",
        "temperature":"median",
        "sec_duration":"median",
        "date":"median",
        "soc":"median",
        "min_voltage":"median",
        "soc_voltage_feature":"median",
        "default_100_soh_energy_added":"median",
        "soh":"median",
        #Debugging
        "id":pd.Series.mode,
        "charge_idx":pd.Series.mode,
        "charge_id":pd.Series.mode,
    })
    .sort_values("date")
    .set_index("id", drop=False)
)
# plot_2d_line(agg_fleet_energy_points_cleaned, "odometer", "soh", "id", "id_idx", color_scale='Viridis')
IDS = ['mjh312', 'put122', 'xwi432', 'jko652', 'kdh372', 'psk292',
       'niv132', 'qyw992', 'sxp242', 'nap622', 'jpv002', 'xzg662',
       'zqi822', 'oyi432', 'qkw942', 'yhq352', 'vob662', 'egn052',]
downsampled_agg_ffleet_charging_points = (
    fleet_charging_points
    .set_index("id", drop=False)
    .sort_values("date")
    .loc[IDS]
    .query("soh > 90 & soh <= 104")
)
# downsampled_agg_ffleet_charging_points["soh"].plot.hist()
# fig = px.line_3d(downsampled_agg_ffleet_charging_points.loc['mjh312'], x='odometer', y="date", z='soh', markers=True)
# basic_fig_update(fig, x='odometer', y="date", z='soh')
plt_3d_df(downsampled_agg_ffleet_charging_points.loc['mjh312'], x='odometer', y="soh", z='temperature', color="temperature", colorscale="Rainbow")