## Import

In [None]:
import logging

import plotly.express as px
from core.plt_utils import basic_fig_update
import pandas as pd
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
# import umap
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import make_scorer

from core.plt_utils import plt_3d_df, plot_2d_line
from watea.watea_constants import *
from watea.watea_fleet_info import fleet_info_df
from watea.energy_distribution import *

## Setup

In [None]:
logging.basicConfig(level=logging.INFO)

charging_points = (
    extract_raw_fleet_charging_points()
    .pipe(clean_charging_points)
    .pipe(compute_regime_seperation_feature)
)

## Estimator implementation

In [None]:
def neg_median_absolute_difference(y_true, y_pred):
    abs_diff = np.abs(y_true - y_pred)
    return -np.median(abs_diff)

neg_median_abs_diff_scorer = make_scorer(neg_median_absolute_difference, greater_is_better=True)

# Define the function for KNN cross-validation
def knn_cross_validation(df: pd.DataFrame, input_columns: list, target_column: str, n_neighbors: int = 4, cv_splits: int = 5):
    """
    Performs K-Nearest Neighbors cross-validation on the given DataFrame using
    negative median absolute difference as the scoring metric.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing the data.
        input_columns (list): A list of column names to be used as features.
        target_column (str): The name of the target column.
        n_neighbors (int): Number of neighbors to use in KNN. Default is 4.
        cv_splits (int): Number of cross-validation splits. Default is 5.

    Returns:
        dict: A dictionary containing the mean and standard deviation of the cross-validation scores.
    """
    # Extract the input features and target from the DataFrame
    X = df[input_columns].values
    y = df[target_column].values
    
    # Initialize the KNN model
    knn = KNeighborsRegressor(n_neighbors=n_neighbors)
    
    # Set up cross-validation
    kf = KFold(n_splits=cv_splits, shuffle=True, random_state=42)
    
    # Perform cross-validation using the custom scorer
    cv_scores = cross_val_score(knn, X, y, cv=kf, scoring=neg_median_abs_diff_scorer, verbose=True)
    
    # Compute the mean and standard deviation of the scores
    mean_score = cv_scores.mean()
    std_score = cv_scores.std()
    
    # Return the results
    return {
        'median_score': mean_score,
        'std_score': std_score
    }


knn_cross_validation(charging_points, SOH_ESTIMATION_FEATURES, "energy_added")

In [None]:
knn_cross_validation(charging_points.query("is_default_100_soh"), SOH_ESTIMATION_FEATURES, "energy_added")

## soh estimation

In [None]:
charging_points:DF = (
    charging_points
    .pipe(estimate_default_100_soh_energy_added, n_neighbors=100)
    .eval("soh = 100 * energy_added / default_100_soh_energy_added")
)

In [None]:
charges = agg_charging_points_over_charges(charging_points)

px.line(charges.sort_values("odometer"), x='odometer', y='soh', color='id', symbol="id", markers=False)

In [None]:
px.scatter(charges, x='odometer', y='soh', color='id')

In [None]:
charges = (
    charges
    .sort_values("odometer")
    .query("soh > 75 & soh < 120")
    .assign(soh_variance=lambda df: df.rolling(on="odometer", window=10, center=True)["soh"].var())
)

In [None]:
import plotly.graph_objects as go
import plotly.express as px

def plt_soh_and_soh_variance(df):
    # Generate a color mapping for the categorical column 'id'
    color_map = {category: px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)] 
                for i, category in enumerate(df['id'].unique())}

    # Create the scatter plot trace with mapped colors
    scatter_trace = go.Scatter(
        x=df['odometer'],
        y=df['soh'],
        mode='markers',
        marker=dict(
            color=[color_map[id_value] for id_value in df['id']]
        ),
        name='Scatter plot'
    )

    # Create the line plot trace
    line_trace = go.Scatter(
        x=df['odometer'],
        y=df['soh_variance'],
        mode='lines',
        name='Line plot'
    )

    # Combine both traces in a single figure
    fig = go.Figure(data=[scatter_trace, line_trace])

    # Update layout if needed
    fig.update_layout(
        title="Scatter and Line Plot Combined",
        xaxis_title="Odometer",
        yaxis_title="Value",
    )

    # Show the combined plot
    fig.show()
    
plt_soh_and_soh_variance(charges)

In [None]:
charges.query("soh_variance < 30")["soh_variance"].plot.hist(bins=15)

In [None]:
display(charges.query("odometer > 3000")["id"].unique())
px.scatter(charges.query("id == 'bob432'"), x='date', y='soh', color='id')

In [None]:
plt_3d_df(charging_points.query("is_default_100_soh"), "current", "soc", "energy_added", color="temperature", colorscale="Rainbow")