## SISO Support Vector Classification (SVC)

**In order to see the results you should start the MLflow ui separately**:
1. Open a prompt/terminal and navigate to the path of this project
2. Activate the virtual environment:  
    (Windows: ```.venv\eis_data_analytics\Scripts\activate```,  
    Linux/Mac: ```.venv/eis_data_analytics/bin/activate```)
3. Now start MLflow with ```mlflow server --port 1234``` consider to add e.g.: ```--workers=16 --gunicorn-opts='--timeout 600'```
4. Open [http://127.0.0.1:1234](http://127.0.0.1:1234) in your browser

In [None]:
%matplotlib widget 

from modules import dataset_manipulation as dm
from modules import eisplot as eisplot
from modules.eisplot import plt
from modules.eisplot import mpl

import numpy as np
import pandas as pd

from functools import partial
import hyperopt
import mlflow
import shapely
from sklearn.svm import SVC

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

from sklearn.metrics import confusion_matrix
from modules.eisplot import sns

## if you have installed latex and want to use it for plots, uncomment the following 3 lines
# mpl.rcParams.update({"text.usetex": True,'savefig.format':'pdf'})
# mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
# mpl.rc('text.latex', preamble=r'\usepackage{underscore}')

## save figures e.g. with:
# plot_name = "custom_3D_plot"
# plt.savefig(r"./figures/" + name_of_this_run + "_" + plot_name + ".pdf")
# plt.savefig(r"./figures/" + name_of_this_run + "_" + plot_name + ".png", dpi=600)

### Load Data

In [None]:
name_of_this_run = "example_data"

destination_filepath = r"./data/eis_datasets/" + name_of_this_run + ".parquet"
df = pd.read_parquet(destination_filepath)
destination_filepath = r"./data/key_lookup/key_lookup_" + name_of_this_run + ".parquet"
key_lookup_df = pd.read_parquet(destination_filepath)

### Define Input Parameters

In [None]:
# Can be any of the following:
pd.set_option("display.max_columns", None)
df.head(0)

In [None]:
# # Example of one frequency
# input_parameters = []
# frequency = 0.01
# input_parameters += [
#     key_lookup_df["EIS_Z_abs"].loc[
#         np.argmin(np.abs(key_lookup_df["frequency"].values - frequency))
#     ]
# ]
# # Give it a Name (filename friendly)
# input_parameters_name = "Z_abs_0-01hz"

In [None]:
# # Example of one frequency
# input_parameters = []
# frequency = 1
# input_parameters += [
#     key_lookup_df["EIS_Z_abs"].loc[
#         np.argmin(np.abs(key_lookup_df["frequency"].values - frequency))
#     ]
# ]
# # Give it a Name (filename friendly)
# input_parameters_name = "Z_abs_1hz"

In [None]:
# Example of one frequency
input_parameters = []
frequency = 100
input_parameters += [
    key_lookup_df["EIS_Z_abs"].loc[
        np.argmin(np.abs(key_lookup_df["frequency"].values - frequency))
    ]
]
# Give it a Name (filename friendly)
input_parameters_name = "Z_abs_100hz"

### Define Output Parameters

In [None]:
# output_parameters = []
# output_parameters += ["SOC"]
# # Give it a Name (filename friendly)
# output_parameters_name = "SOC"

In [None]:
# output_parameters = []
# output_parameters += ["SOH"]
# # Give it a Name (filename friendly)
# output_parameters_name = "SOH"

In [None]:
output_parameters = []
output_parameters += ["Temperature"]
# Give it a Name (filename friendly)
output_parameters_name = "Temperature"

### Split up the data for training

In [None]:
test_labels = ["Temperature"]

# convert to training arrays
data_set = dm.get_set(
    df,
    output_parameters,
    feature_keys=input_parameters,
    validation_split=0.2,
    test_split=0.1,
    label_for_test_intervals=test_labels,
    label_name=output_parameters_name,
)
x_train, y_train = data_set["train"]
x_validation, y_validation = data_set["validation"]
x_test, y_test = data_set["test"]

# convert everything to float32
x_train = np.float32(x_train)
y_train = np.float32(y_train)
x_validation = np.float32(x_validation)
y_validation = np.float32(y_validation)
x_test = np.float32(x_test)
y_test = np.float32(y_test)

### Scale the data

In [None]:
dm.use_arrhenius_correction = True
dm.use_arrhenius_correction_with_factor = False
dm.arrhenius_b = -15.47
dm.arrhenius_c = 1.30
# [dm.arrhenius_correction_inverse(dm.arrhenius_correction(i))
#  for i in [0.01, 0.1, 1, 10]]

In [None]:
if dm.use_arrhenius_correction:
    x_train = dm.arrhenius_correction(x_train)
    x_validation = dm.arrhenius_correction(x_validation)
    x_test = dm.arrhenius_correction(x_test)

In [None]:
# Select Scaler to use (Only one!)
dm.use_min_max_scaler = True
dm.use_standard_scaler = False
dm.scale_y_data = False  # not supported for SVC
# [dm.inverse_min_max_scaler(dm.min_max_scaler(i, dm.x_min, dm.x_max), dm.x_min, dm.x_max)
#  for i in [0.01, 0.1, 1, 10]]
# [dm.inverse_standard_scaler(dm.standard_scaler(i, dm.x_min, dm.x_max), dm.x_min, dm.x_max)
#  for i in [0.01, 0.1, 1, 10]]
# Standard Scaler
dm.x_mean = np.mean(x_train)
dm.x_std = np.std(x_train)
dm.y_mean = np.mean(y_train)
dm.y_std = np.std(y_train)
# Min Max scaler
dm.x_min = np.min(x_train)
dm.x_max = np.max(x_train)

dm.y_min = np.array(0, dtype=np.float32)
dm.y_max = np.array(1, dtype=np.float32)

In [None]:
if dm.use_min_max_scaler:
    x_train = dm.min_max_scaler(x_train, dm.x_min, dm.x_max)
    x_validation = dm.min_max_scaler(x_validation, dm.x_min, dm.x_max)
    x_test = dm.min_max_scaler(x_test, dm.x_min, dm.x_max)
elif dm.use_standard_scaler:
    x_train = dm.standard_scaler(x_train, dm.x_mean, dm.x_std)
    x_test = dm.standard_scaler(x_test, dm.x_mean, dm.x_std)
    x_validation = dm.standard_scaler(x_validation, dm.x_mean, dm.x_std)

### Create Classes, e.g. by quantization

In [None]:
if output_parameters_name == "Temperature":
    discretize_minimum = -25
    discretize_delta = 5
elif output_parameters_name == "SOC":
    discretize_minimum = 0
    discretize_delta = 5
elif output_parameters_name == "SOH":
    discretize_minimum = 0
    discretize_delta = 5

y_train = dm.quantize_data(y_train, discretize_minimum, discretize_delta)
y_validation = dm.quantize_data(y_validation, discretize_minimum, discretize_delta)
y_test = dm.quantize_data(y_test, discretize_minimum, discretize_delta)

### Classification

In [None]:
# Everything is logged in mlflow, create a new experiment
experiment_name = (
    name_of_this_run
    + "_"
    + "SISO_SVC_"
    + input_parameters_name
    + "_"
    + output_parameters_name
)
mlflow_exp = mlflow.set_experiment(experiment_name=experiment_name)

In [None]:
# Define an objective to be minimized


def objective(params, experiment_id):
    default_params = {
        "log_model": False,
        "plot_fit": False,
        "plot_diag": False,
        "log_plot_type": "png",
        "kernel": "rbf",
        "C": 1,
        "tol": 0.001,
        "coef0": 0.0,
        "gamma": "auto",
        "degree": 3,
        "use_arrhenius_correction": dm.use_arrhenius_correction,
        "use_arrhenius_correction_with_factor": dm.use_arrhenius_correction_with_factor,
        "use_min_max_scaler": dm.use_min_max_scaler,
        "use_standard_scaler": dm.use_standard_scaler,
        "use_scale_y_data": dm.scale_y_data,
        "x_mean": dm.x_mean,
        "x_std": dm.x_std,
        "y_mean": dm.y_mean,
        "y_std": dm.y_std,
        "x_min": dm.x_min,
        "x_max": dm.x_max,
        "y_min": dm.y_min,
        "y_max": dm.y_max,
    }

    default_params.update(params)
    merged_params = default_params
    dm.use_arrhenius_correction = merged_params["use_arrhenius_correction"]
    dm.use_arrhenius_correction_with_factor = merged_params[
        "use_arrhenius_correction_with_factor"
    ]
    dm.use_min_max_scaler = merged_params["use_min_max_scaler"]
    dm.use_standard_scaler = merged_params["use_standard_scaler"]
    dm.scale_y_data = merged_params["use_scale_y_data"]
    dm.x_mean = merged_params["x_mean"]
    dm.x_std = merged_params["x_std"]
    dm.y_mean = merged_params["y_mean"]
    dm.y_std = merged_params["y_std"]
    dm.x_min = merged_params["x_min"]
    dm.x_max = merged_params["x_max"]
    dm.y_min = merged_params["y_min"]
    dm.y_max = merged_params["y_max"]

    with mlflow.start_run(experiment_id=experiment_id, run_name="SVC"):
        if merged_params["log_model"]:
            mlflow.sklearn.autolog()

        mlflow.log_param("kernel", merged_params["kernel"])
        mlflow.log_param("C", merged_params["C"])
        mlflow.log_param("tol", merged_params["tol"])
        mlflow.log_param("gamma", merged_params["gamma"])
        mlflow.log_param("degree", merged_params["degree"])
        mlflow.log_param("coef0", merged_params["coef0"])
        mlflow.log_param("arrhenius_correction", dm.use_arrhenius_correction)
        mlflow.log_param(
            "arrhenius_correction_with_factor", dm.use_arrhenius_correction_with_factor
        )
        mlflow.log_param("min_max_scaler", dm.use_min_max_scaler)
        mlflow.log_param("standard_scaler", dm.use_standard_scaler)
        mlflow.log_param("scale_y_data", dm.scale_y_data)

        model = SVC(
            kernel=merged_params["kernel"],
            C=merged_params["C"],
            tol=merged_params["tol"],
            gamma=merged_params["gamma"],
            degree=merged_params["degree"],
            coef0=merged_params["coef0"],
            cache_size=4000,
            max_iter=1000000,
            probability=False,
        )

        model.fit(x_train, y_train.ravel())
        model.score(x_validation, y_validation.ravel())

        train_maxae_temp = dm.evaluate_max_abs_error(model, x_train, y_train)
        validation_maxae_temp = dm.evaluate_max_abs_error(
            model, x_validation, y_validation
        )
        test_maxae_temp = dm.evaluate_max_abs_error(model, x_test, y_test)
        train_mse_temp = dm.evaluate_mse(model, x_train, y_train)
        validation_mse_temp = dm.evaluate_mse(model, x_validation, y_validation)
        test_mse_temp = dm.evaluate_mse(model, x_test, y_test)
        train_rmse_temp = dm.evaluate_rmse(model, x_train, y_train)
        validation_rmse_temp = dm.evaluate_rmse(model, x_validation, y_validation)
        test_rmse_temp = dm.evaluate_rmse(model, x_test, y_test)

        mlflow.log_metric("train_maxae_temp", train_maxae_temp)
        mlflow.log_metric("validation_maxae_temp", validation_maxae_temp)
        mlflow.log_metric("test_maxae_temp", test_maxae_temp)
        mlflow.log_metric("train_mse_temp", train_mse_temp)
        mlflow.log_metric("validation_mse_temp", validation_mse_temp)
        mlflow.log_metric("test_mse_temp", test_mse_temp)
        mlflow.log_metric("train_rmse_temp", train_rmse_temp)
        mlflow.log_metric("validation_rmse_temp", validation_rmse_temp)
        mlflow.log_metric("test_rmse_temp", test_rmse_temp)

        if merged_params["plot_diag"]:
            dm.plot_diag_during_fitting(
                model,
                name_of_this_run,
                output_parameters,
                x_test,
                x_train,
                x_validation,
                data_set,
                train_rmse_temp,
                validation_rmse_temp,
                test_rmse_temp,
                merged_params,
            )

        if merged_params["plot_fit"]:
            dm.plot_fit_during_fitting(
                model,
                name_of_this_run,
                input_parameters,
                output_parameters,
                x_train,
                x_validation,
                x_test,
                y_train,
                y_validation,
                y_test,
                train_rmse_temp,
                validation_rmse_temp,
                test_rmse_temp,
                merged_params,
            )

        support_vectors = np.prod(np.shape(model.support_vectors_))
        input_values = np.prod(np.shape(x_train))
        support_vectors_not_null = support_vectors != 0
        mlflow.log_metric("support_vectors", support_vectors)
        mlflow.log_metric(
            "support_vectors_percent", support_vectors / input_values * 100
        )
        mlflow.log_metric("support_vectors_not_null", support_vectors_not_null)
        mlflow.log_metric(
            "std_rmse", np.std([train_rmse_temp, validation_rmse_temp, test_rmse_temp])
        )
        mlflow.log_metric(
            "max_rmse", np.max([train_rmse_temp, validation_rmse_temp, test_rmse_temp])
        )
        mlflow.log_metric(
            "std_times_max_rmse",
            np.std([train_rmse_temp, validation_rmse_temp, test_rmse_temp])
            * np.max([train_rmse_temp, validation_rmse_temp, test_rmse_temp]),
        )

        # fmin() minimizes the objective
        weighted_fit_result = np.max(
            [train_rmse_temp, validation_rmse_temp, test_rmse_temp]
        )

        mlflow.log_metric("weighted_fit_result", weighted_fit_result)

    return {"loss": weighted_fit_result, "status": hyperopt.STATUS_OK}

In [None]:
# Define the search space

search_space = hyperopt.hp.choice(
    "SVC",
    [
        {
            "log_model": hyperopt.hp.choice("log_model", [True]),
            "plot_fit": hyperopt.hp.choice("plot_fit", [True]),
            "plot_diag": hyperopt.hp.choice("plot_diag", [True]),
            "log_plot_type": hyperopt.hp.choice("log_plot_type", ["svg"]),
            "gamma": hyperopt.hp.loguniform("gamma", np.log(0.001), np.log(10)),
            "tol": hyperopt.hp.loguniform("tol", np.log(1e-5), np.log(100)),
            "C": hyperopt.hp.loguniform("C", np.log(0.01), np.log(100)),
            "kernel": hyperopt.hp.choice("kernel", ["rbf"]),
            "use_arrhenius_correction": hyperopt.hp.choice(
                "use_arrhenius_correction", [dm.use_arrhenius_correction]
            ),
            "use_arrhenius_correction_with_factor": hyperopt.hp.choice(
                "use_arrhenius_correction_with_factor",
                [dm.use_arrhenius_correction_with_factor],
            ),
            "use_min_max_scaler": hyperopt.hp.choice(
                "use_min_max_scaler", [dm.use_min_max_scaler]
            ),
            "use_standard_scaler": hyperopt.hp.choice(
                "use_standard_scaler", [dm.use_standard_scaler]
            ),
            "use_scale_y_data": hyperopt.hp.choice(
                "use_scale_y_data", [dm.scale_y_data]
            ),
            "x_mean": hyperopt.hp.choice("x_mean", [dm.x_mean]),
            "x_std": hyperopt.hp.choice("x_std", [dm.x_std]),
            "y_mean": hyperopt.hp.choice("y_mean", [dm.y_mean]),
            "y_std": hyperopt.hp.choice("y_std", [dm.y_std]),
            "x_min": hyperopt.hp.choice("x_min", [dm.x_min]),
            "x_max": hyperopt.hp.choice("x_max", [dm.x_max]),
            "y_min": hyperopt.hp.choice("y_min", [dm.y_min]),
            "y_max": hyperopt.hp.choice("y_max", [dm.y_max]),
        }
    ],
)

In [None]:
# Choose an optimization type

# algo=hyperopt.tpe.suggest
algo = hyperopt.rand.suggest

#### Fit the model, you can track it in mlflow: [http://127.0.0.1:1234](http://127.0.0.1:1234)

In [None]:
timout_in_minutes = 24 * 60
max_evals = 10

# if java is installed (only recommended under linux or wsl)
# import pyspark
# spark_trails = hyperopt.SparkTrials(parallelism=16)
# best_result = hyperopt.fmin(
#     fn=partial(objective, experiment_id=mlflow_exp.experiment_id),
#     space=search_space,
#     algo=algo,
#     max_evals=max_evals,
#     timeout=timout_in_minutes * 60,
#     trials=spark_trails,
# )
# if java is not available
best_result = hyperopt.fmin(
    fn=partial(objective, experiment_id=mlflow_exp.experiment_id),
    space=search_space,
    algo=algo,
    max_evals=max_evals,
    timeout=timout_in_minutes * 60,
)

print(hyperopt.space_eval(search_space, best_result))

### Select the best Model and validate

In [None]:
name_of_this_run_eval = name_of_this_run

destination_filepath = r"./data/eis_datasets/" + name_of_this_run_eval + ".parquet"
df_eval = pd.read_parquet(destination_filepath)

In [None]:
# change if needed
input_parameters_eval = input_parameters
input_parameters_name_eval = input_parameters_name
output_parameters_eval = output_parameters
output_parameters_name_eval = output_parameters_name

test_labels_eval = test_labels

In [None]:
data_set_eval = dm.get_set(
    df_eval,
    output_parameters_eval,
    feature_keys=input_parameters_eval,
    validation_split=0.2,
    test_split=0.1,
    label_for_test_intervals=test_labels_eval,
    label_name=output_parameters_name_eval,
)
x_train_eval, y_train_eval = data_set_eval["train"]
x_validation_eval, y_validation_eval = data_set_eval["validation"]
x_test_eval, y_test_eval = data_set_eval["test"]

x_train_eval = np.float32(x_train_eval)
y_train_eval = np.float32(y_train_eval)
x_validation_eval = np.float32(x_validation_eval)
y_validation_eval = np.float32(y_validation_eval)
x_test_eval = np.float32(x_test_eval)
y_test_eval = np.float32(y_test_eval)

In [None]:
y_train_eval = dm.quantize_data(y_train_eval, discretize_minimum, discretize_delta)
y_validation_eval = dm.quantize_data(
    y_validation_eval, discretize_minimum, discretize_delta
)
y_test_eval = dm.quantize_data(y_test_eval, discretize_minimum, discretize_delta)

In [None]:
if dm.use_arrhenius_correction:
    x_train_eval = dm.arrhenius_correction(x_train_eval)
    x_validation_eval = dm.arrhenius_correction(x_validation_eval)
    x_test_eval = dm.arrhenius_correction(x_test_eval)

if dm.use_min_max_scaler:
    x_train_eval = dm.min_max_scaler(x_train_eval, dm.x_min, dm.x_max)
    x_validation_eval = dm.min_max_scaler(x_validation_eval, dm.x_min, dm.x_max)
    x_test_eval = dm.min_max_scaler(x_test_eval, dm.x_min, dm.x_max)
elif dm.use_standard_scaler:
    x_train_eval = dm.standard_scaler(x_train_eval, dm.x_mean, dm.x_std)
    x_test_eval = dm.standard_scaler(x_test_eval, dm.x_mean, dm.x_std)
    x_validation_eval = dm.standard_scaler(x_validation_eval, dm.x_mean, dm.x_std)

Open [http://127.0.0.1:1234](http://127.0.0.1:1234) to select a fitted model. If you click on it, you can extract the run ID. It could look like this: "ad26474e8c324f84906c9fc501928cae"

In [None]:
# you can choose a specific model
# logged_model = 'ad26474e8c324f84906c9fc501928cae'
# or just load the best model
client = mlflow.tracking.MlflowClient()
runs = client.search_runs(
    experiment_ids=[mlflow.get_experiment_by_name(experiment_name).experiment_id],
    order_by=["metrics.max_rmse"],
    filter_string="metrics.support_vectors_not_null = 1",
    max_results=1,
)
logged_model = runs[0].info.run_id

# Load model as a Sklearn.
run_eval = mlflow.get_run(logged_model)
loaded_model = mlflow.sklearn.load_model(run_eval.info.artifact_uri + "/model/")

train_rmse_temp = dm.evaluate_rmse(loaded_model, x_train_eval, y_train_eval)
print("Train RMSE: " + str(train_rmse_temp))
validation_rmse_temp = dm.evaluate_rmse(
    loaded_model, x_validation_eval, y_validation_eval
)
print("Validation RMSE: " + str(validation_rmse_temp))
test_rmse_temp = dm.evaluate_rmse(loaded_model, x_test_eval, y_test_eval)
print("Test RMSE: " + str(test_rmse_temp))

unique_model_name = (
    experiment_name
    + "_"
    + mlflow.get_experiment_by_name(experiment_name).experiment_id
    + "_"
    + logged_model
)

### Plot Results

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10 * eisplot.cm, 10 * eisplot.cm))

cell_list_train = list(set(data_set_eval["df_train"].index.get_level_values(0)))
y_pred_train_eval = loaded_model.predict(x_train_eval)
y_pred_train_eval = y_pred_train_eval.ravel()

cell_list_validation = list(
    set(data_set_eval["df_validation"].index.get_level_values(0))
)
y_pred_validation_eval = loaded_model.predict(x_validation_eval)
y_pred_validation_eval = y_pred_validation_eval.ravel()

cell_list_test = list(set(data_set_eval["df_test"].index.get_level_values(0)))
y_pred_test_eval = loaded_model.predict(x_test_eval)
y_pred_test_eval = y_pred_test_eval.ravel()

y_train_plot = y_train_eval
y_validation_plot = y_validation_eval
y_test_plot = y_test_eval

fig, ax = eisplot.setup_scatter(
    data_set,
    test_rmse_temp,
    title=False,
    legend=False,
    fig=fig,
    ax=ax,
    ax_xlabel=False,
    ax_ylabel=False,
    subplots_adjust=True,
    add_trendline=True,
    label="",
)
ax.plot(
    y_train_plot,
    y_pred_train_eval,
    ".",
    color=eisplot.rwth_colors.colors[("petrol", 100)],
    alpha=0.5,
)
ax.plot(
    y_validation_plot,
    y_pred_validation_eval,
    "1",
    color=eisplot.rwth_colors.colors[("turqoise", 100)],
    alpha=0.5,
)
ax.plot(
    y_test_plot,
    y_pred_test_eval,
    "2",
    color=eisplot.rwth_colors.colors[("blue", 100)],
    alpha=0.5,
)

ax.set_xlabel("actual: Output")
ax.set_ylabel("predicted: Output")

legend_elements = [
    mpl.lines.Line2D(
        [0], [0], color=eisplot.rwth_colors.colors[("green", 100)], label="ideal"
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker=".",
        linestyle="",
        color=eisplot.rwth_colors.colors[("petrol", 100)],
        label="train",
        alpha=0.5,
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker="1",
        linestyle="",
        color=eisplot.rwth_colors.colors[("turqoise", 100)],
        label="validation",
        alpha=0.5,
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker="2",
        linestyle="",
        color=eisplot.rwth_colors.colors[("blue", 100)],
        label="test",
        alpha=0.5,
    ),
]
ax.legend(handles=legend_elements, loc="best", scatterpoints=1, prop={"size": 8})
fig.subplots_adjust(bottom=0.14, left=0.19)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10 * eisplot.cm, 10 * eisplot.cm))

if dm.use_min_max_scaler:
    x_min_plot = dm.inverse_min_max_scaler(
        np.min(np.concatenate((x_train_eval, x_validation_eval, x_test_eval))),
        dm.x_min,
        dm.x_max,
    )
    x_max_plot = dm.inverse_min_max_scaler(
        np.max(np.concatenate((x_train_eval, x_validation_eval, x_test_eval))),
        dm.x_min,
        dm.x_max,
    )
elif dm.use_standard_scaler:
    x_min_plot = dm.inverse_standard_scaler(
        np.min(np.concatenate((x_train_eval, x_validation_eval, x_test_eval))),
        dm.x_mean,
        dm.x_std,
    )
    x_max_plot = dm.inverse_standard_scaler(
        np.max(np.concatenate((x_train_eval, x_validation_eval, x_test_eval))),
        dm.x_mean,
        dm.x_std,
    )
else:
    x_min_plot = np.min(np.concatenate((x_train_eval, x_validation_eval, x_test_eval)))
    x_max_plot = np.max(np.concatenate((x_train_eval, x_validation_eval, x_test_eval)))

if dm.use_arrhenius_correction:
    x_tmp = x_min_plot
    x_min_plot = dm.arrhenius_correction_inverse(x_max_plot)
    x_max_plot = dm.arrhenius_correction_inverse(x_tmp)

x_min_plot = x_min_plot * 0.8
x_max_plot = x_max_plot * 1.2

x_plot = np.linspace(x_min_plot, x_max_plot, 1000, dtype=np.float32)[:, None]

x_plot_arrhenius = x_plot
x_train_arrhenius = x_train_eval
x_validation_arrhenius = x_validation_eval
x_test_arrhenius = x_test_eval

if dm.use_arrhenius_correction:
    x_plot = dm.arrhenius_correction(x_plot)

if dm.use_min_max_scaler:
    x_plot = dm.min_max_scaler(x_plot, dm.x_min, dm.x_max)
    x_train_arrhenius = dm.inverse_min_max_scaler(x_train_arrhenius, dm.x_min, dm.x_max)
    x_validation_arrhenius = dm.inverse_min_max_scaler(
        x_validation_arrhenius, dm.x_min, dm.x_max
    )
    x_test_arrhenius = dm.inverse_min_max_scaler(x_test_arrhenius, dm.x_min, dm.x_max)
elif dm.use_standard_scaler:
    x_plot = dm.standard_scaler(x_plot, dm.x_mean, dm.x_std)
    x_train_arrhenius = dm.inverse_standard_scaler(
        x_train_arrhenius, dm.x_mean, dm.x_std
    )
    x_validation_arrhenius = dm.inverse_standard_scaler(
        x_validation_arrhenius, dm.x_mean, dm.x_std
    )
    x_test_arrhenius = dm.inverse_standard_scaler(x_test_arrhenius, dm.x_mean, dm.x_std)

if dm.use_arrhenius_correction:
    x_train_arrhenius = dm.arrhenius_correction_inverse(x_train_arrhenius)
    x_validation_arrhenius = dm.arrhenius_correction_inverse(x_validation_arrhenius)
    x_test_arrhenius = dm.arrhenius_correction_inverse(x_test_arrhenius)

y_svr = loaded_model.predict(x_plot)

ax.plot(
    x_plot_arrhenius,
    y_svr,
    lw=2,
    label="Regression",
    color=eisplot.rwth_colors.colors[("bordeaux", 100)],
)
ax.scatter(
    x_train_arrhenius,
    y_train_plot,
    marker=".",
    label="train",
    color=eisplot.rwth_colors.colors[("petrol", 100)],
    alpha=0.5,
)
ax.scatter(
    x_validation_arrhenius,
    y_validation_plot,
    marker="1",
    label="validation",
    color=eisplot.rwth_colors.colors[("turqoise", 100)],
    alpha=0.5,
)
ax.scatter(
    x_test_arrhenius,
    y_test_plot,
    marker="2",
    label="test",
    color=eisplot.rwth_colors.colors[("blue", 100)],
    alpha=0.5,
)

ax.set_ylabel("Output Value")
ax.set_xlabel("Input Value")
ax.grid()

ax.legend(loc="best", scatterpoints=1, prop={"size": 8})
fig.subplots_adjust(bottom=0.14, left=0.19)

In [None]:
fig, axes = plt.subplots(
    3,
    1,
    figsize=(7 * eisplot.cm, 16 * eisplot.cm),
    sharex=True,
    sharey=True,
    layout="compressed",
)

vmin = 0
vmax = 1
norm = mpl.colors.Normalize(vmin, vmax)

classes = np.unique(
    np.concatenate(
        [
            y_train_eval.ravel(),
            y_pred_train_eval.ravel(),
            y_validation_eval.ravel(),
            y_pred_validation_eval.ravel(),
            y_test_eval.ravel(),
            y_pred_test_eval.ravel(),
        ]
    )
)

cm_train = confusion_matrix(
    y_train_eval, y_pred_train_eval, normalize="true", labels=classes
)
cm_validation = confusion_matrix(
    y_validation_eval, y_pred_validation_eval, normalize="true", labels=classes
)
cm_test = confusion_matrix(
    y_test_eval, y_pred_test_eval, normalize="true", labels=classes
)

axes[0] = sns.heatmap(
    cm_train,
    cmap="turbo",
    ax=axes[0],
    cbar=False,
    vmin=vmin,
    vmax=vmax,
    square=True,
    annot=cm_train,
    center=0.5,
    yticklabels=classes,
    xticklabels=classes,
)  # ,fmt='.1f')
axes[0].tick_params(axis="y", rotation=0)
axes[0].set_ylabel("True label")
axes[0].title.set_text("Train")
axes[0].set_xlabel("")

axes[1] = sns.heatmap(
    cm_validation,
    cmap="turbo",
    ax=axes[1],
    cbar=False,
    vmin=vmin,
    vmax=vmax,
    square=True,
    annot=cm_validation,
    center=0.5,
    yticklabels=classes,
    xticklabels=classes,
)  # ,fmt='.1f')
axes[1].tick_params(axis="y", rotation=0)
axes[1].set_ylabel("True label")
axes[1].title.set_text("Validation")
axes[1].set_xlabel("")

axes[2] = sns.heatmap(
    cm_test,
    cmap="turbo",
    ax=axes[2],
    cbar=False,
    vmin=vmin,
    vmax=vmax,
    square=True,
    annot=cm_test,
    center=0.5,
    yticklabels=classes,
    xticklabels=classes,
)  # ,fmt='.1f')
axes[2].tick_params(axis="y", rotation=0)
axes[2].tick_params(axis="x", rotation=90)
axes[2].set_ylabel("True label")
axes[2].title.set_text("Test")
axes[2].set_xlabel("Predicted label")


cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.turbo)
cmap.set_array([])
cbar = fig.colorbar(cmap, ax=axes, location="top")

### Convert, Export, Test and Validate with ONNX

In [None]:
input_type = [("float_input", FloatTensorType([None, len(x_train[0])]))]
onnx_filename = "microcontroller_eis_network/onnx_export/" + unique_model_name + ".onnx"
onx = convert_sklearn(loaded_model, initial_types=input_type)
with open(onnx_filename, "wb") as f:
    f.write(onx.SerializeToString())

In [None]:
sess = rt.InferenceSession(onnx_filename, providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: x_test_eval})[0]
y_test_eval_ref = y_test_eval.copy()

diff = pred_onx.ravel() - y_test_eval_ref.ravel()
print(np.max(np.abs(diff)))
print(np.mean(diff))
print(np.std(diff))
print(np.sqrt(np.mean((diff) ** 2)))

In [None]:
x_test_eval_float32 = x_test_eval.astype(np.float32)

pred_sklearn = loaded_model.predict(x_test_eval_float32)
pred_onx = sess.run([label_name], {input_name: x_test_eval_float32})[0].ravel()

diff = pred_sklearn.ravel() - pred_onx.ravel()
print("Max difference between scikit-learn and ONNX predictions:", np.max(np.abs(diff)))
print("Mean difference between scikit-learn and ONNX predictions:", np.mean(diff))
print("RMSE of differences:", np.sqrt(np.mean(diff**2)))

### Testing the SVC on the µC is not yet implemented

### Further comparison of different fits

In [None]:
experiment_list = pd.DataFrame()
experiment_id = mlflow_exp.experiment_id
experiment_id

for exp in mlflow.search_experiments():
    if exp.experiment_id == experiment_id:
        experiment_tmp = mlflow.search_runs(experiment_ids=experiment_id)
        experiment_list = pd.concat([experiment_list, experiment_tmp])

experiment_list = experiment_list.reset_index(drop=True)
experiment_list.head()

In [None]:
scatter_values = [
    experiment_list["metrics.train_maxae_temp"].values,
    experiment_list["metrics.validation_maxae_temp"].values,
    experiment_list["metrics.test_maxae_temp"].values,
    experiment_list["metrics.train_mse_temp"].values,
    experiment_list["metrics.validation_mse_temp"].values,
    experiment_list["metrics.test_mse_temp"].values,
    experiment_list["metrics.train_rmse_temp"].values,
    experiment_list["metrics.validation_rmse_temp"].values,
    experiment_list["metrics.test_rmse_temp"].values,
    experiment_list["params.kernel"].values,
    experiment_list["params.C"].values.astype(np.float64),
    experiment_list["params.tol"].values.astype(np.float64),
    experiment_list["params.gamma"].values.astype(np.float64),
    experiment_list["params.degree"].values.astype(np.float64),
    experiment_list["params.coef0"].values.astype(np.float64),
]
df_experiment = pd.DataFrame(
    np.transpose(scatter_values),
    columns=[
        "Train MAXAE in K",
        "Validation MAXAE in K",
        "Test MAXAE in K",
        "Train MSE in K^2",
        "Validation MSE in K^2",
        "Test MSE in K^2",
        "Train RMSE in K",
        "Validation RMSE in K",
        "Test RMSE in K",
        "Kernel",
        "C",
        "Tolerance",
        "Gamma",
        "Degree",
        "Coef0",
    ],
)

In [None]:
# limit the error if necessary
df_experiment[df_experiment["Test RMSE in K"] > 10] = np.nan
df_experiment = df_experiment.dropna(subset=["Test RMSE in K"])

In [None]:
corr = df_experiment.drop(columns=["Kernel"]).corr()
corr.style.background_gradient(cmap="turbo")

In [None]:
try:
    print(unique_model_name)
except:
    unique_model_name = (
        experiment_name
        + "_"
        + mlflow.get_experiment_by_name(experiment_name).experiment_id
    )

In [None]:
destination_filepath = r"./mlruns/" + unique_model_name + ".parquet"
experiment_list.to_parquet(destination_filepath, compression="gzip", index=True)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12 * eisplot.cm, 12 * eisplot.cm), sharex=True)

variables = ["", "C", "Tolerance", "Gamma"]

for variable_idx, variable in enumerate(variables):
    if variable_idx == 0:
        continue
    plot_column = np.floor(variable_idx / 2).astype("int")
    plot_row = variable_idx - 2 * plot_column

    concave_hull_ratio = 0.25

    min_error = np.min(
        [
            df_experiment["Train RMSE in K"].values,
            df_experiment["Validation RMSE in K"].values,
            df_experiment["Test RMSE in K"].values,
        ]
    )

    points = np.vstack(
        (df_experiment["Train RMSE in K"].values, df_experiment[variable].values)
    ).T
    axs[plot_column, plot_row].scatter(
        df_experiment["Train RMSE in K"].values,
        df_experiment[variable].values,
        c=eisplot.rwth_colors.colors[("petrol", 100)],
        alpha=0.1,
        marker=".",
    )
    # points_hull = np.exp(
    #     np.array(
    #         shapely.concave_hull(
    #             shapely.MultiPoint(np.log(points)), ratio=concave_hull_ratio
    #         ).exterior.coords
    #     )
    # )
    # axs[plot_column, plot_row].fill(
    #     points_hull[:, 0],
    #     points_hull[:, 1],
    #     color=eisplot.rwth_colors.colors[("petrol", 100)],
    #     alpha=0.5,
    # )

    points = np.vstack(
        (df_experiment["Validation RMSE in K"].values, df_experiment[variable].values)
    ).T
    axs[plot_column, plot_row].scatter(
        df_experiment["Validation RMSE in K"].values,
        df_experiment[variable].values,
        c=eisplot.rwth_colors.colors[("turqoise", 100)],
        alpha=0.1,
        marker=".",
    )
    # points_hull = np.exp(
    #     np.array(
    #         shapely.concave_hull(
    #             shapely.MultiPoint(np.log(points)), ratio=concave_hull_ratio
    #         ).exterior.coords
    #     )
    # )
    # axs[plot_column, plot_row].fill(
    #     points_hull[:, 0],
    #     points_hull[:, 1],
    #     color=eisplot.rwth_colors.colors[("turqoise", 100)],
    #     alpha=0.5,
    # )

    points = np.vstack(
        (df_experiment["Test RMSE in K"].values, df_experiment[variable].values)
    ).T
    axs[plot_column, plot_row].scatter(
        df_experiment["Test RMSE in K"].values,
        df_experiment[variable].values,
        c=eisplot.rwth_colors.colors[("blue", 100)],
        alpha=0.1,
        marker=".",
    )
    # points_hull = np.exp(
    #     np.array(
    #         shapely.concave_hull(
    #             shapely.MultiPoint(np.log(points)), ratio=concave_hull_ratio
    #         ).exterior.coords
    #     )
    # )
    # axs[plot_column, plot_row].fill(
    #     points_hull[:, 0],
    #     points_hull[:, 1],
    #     color=eisplot.rwth_colors.colors[("blue", 100)],
    #     alpha=0.5,
    # )

    axs[plot_column, plot_row].set_ylabel(variable)
    axs[plot_column, plot_row].set_yscale("log")
    axs[plot_column, plot_row].grid()


legend_elements = [
    mpl.lines.Line2D(
        [0],
        [0],
        marker="",
        linestyle="-",
        color=eisplot.rwth_colors.colors[("petrol", 100)],
        label="train",
        alpha=0.5,
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker="",
        linestyle="-",
        color=eisplot.rwth_colors.colors[("turqoise", 100)],
        label="validation",
        alpha=0.5,
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker="",
        linestyle="-",
        color=eisplot.rwth_colors.colors[("blue", 100)],
        label="test",
        alpha=0.5,
    ),
    mpl.lines.Line2D(
        [0],
        [0],
        marker="",
        linestyle=":",
        color=eisplot.rwth_colors.colors[("darkred", 100)],
        label="selected value",
        alpha=1.0,
    ),
]
fig.legend(
    handles=legend_elements,
    loc="upper center",
    scatterpoints=1,
    ncol=4,
)


axs[1, 0].set_xlabel("RMSE in K")
axs[1, 1].set_xlabel("RMSE in K")
fig.tight_layout()

x_values = np.array([axs[1, 1].get_xlim()[0] + 1, axs[1, 1].get_xlim()[1] - 1])

axs[0, 0].remove()

axs[0, 1].plot(
    x_values,
    [float(run_eval.data.params["C"]), float(run_eval.data.params["C"])],
    linestyle=":",
    color=eisplot.rwth_colors.colors[("darkred", 100)],
)
axs[1, 0].plot(
    x_values,
    [float(run_eval.data.params["tol"]), float(run_eval.data.params["tol"])],
    linestyle=":",
    color=eisplot.rwth_colors.colors[("darkred", 100)],
)
axs[1, 1].plot(
    x_values,
    [float(run_eval.data.params["gamma"]), float(run_eval.data.params["gamma"])],
    linestyle=":",
    color=eisplot.rwth_colors.colors[("darkred", 100)],
)

fig.subplots_adjust(top=0.9)