In [None]:
from pathlib import Path

import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
results_dir = Path("../data/results").resolve()
fig_dir = Path("../figures").resolve()

# Analysis of the Logistic Regression Results

In [None]:
lr_results = pd.read_csv(
    results_dir / "logistic_regression_cv_results.csv",
    dtype={"param_logreg__regularization": str},
)

# Rename columns
lr_results.columns = lr_results.columns.str.replace("param_logreg__", "", regex=False)
lr_results.columns = lr_results.columns.str.replace(
    "param_win_selector__", "", regex=False
)
lr_results.columns = lr_results.columns.str.replace("param_pca__", "", regex=False)

lr_results["regularization"] = lr_results["regularization"].fillna("None")
lr_results["lam"] = lr_results["lam"].fillna("NA")
lr_results["regularization"] = lr_results["regularization"].replace(
    ["l1", "l2"], ["L1", "L2"]
)

In [None]:
sns.set_theme(style="darkgrid")

In [None]:
# Define logarithmic colormap
cmap = mcolors.LogNorm(vmin=0.01, vmax=10)
colors = plt.cm.viridis(cmap(np.linspace(0.01, 10, 4)))

# Create a dictionary mapping hue levels to colors
hue_dict = {
    0.01: colors[0],
    0.1: colors[1],
    1.0: colors[2],
    10: colors[3],
    "NA": "grey",
}

ax = sns.boxplot(
    y="mean_test_score",
    data=lr_results,
    x="regularization",
    hue="lam",
    palette=hue_dict,
)
# ax.set_title("Logistic Regression\nCV Mean Test Accuracy by Regularization and Lambda")
ax.set_ylabel("CV Mean Test Accuracy")
ax.set_xlabel("Regularization Method")
_ = ax.legend(loc="lower left", title="Lambda")
plt.savefig(
    fig_dir / "lr_accuracy_regularization_lam.png", dpi=600, bbox_inches="tight"
)

In [None]:
# Plot tolerance vs Fit Time
ax = sns.boxplot(y="mean_fit_time", data=lr_results, x="tol", hue="regularization")
ax.set_ylabel("CV Mean Fit Time (s)")
ax.set_xlabel("Loss Change Tolerance")
ax.legend(title="Regularization")
plt.savefig(fig_dir / "lr_fit_time_tolerance.png", dpi=600, bbox_inches="tight")

In [None]:
# Plot tolerance vs accuracy
ax = sns.boxplot(y="mean_test_score", data=lr_results, x="tol", hue="regularization")
ax.set_ylabel("CV Mean Fit Time (s)")
ax.set_xlabel("Loss Change Tolerance")
ax.legend(title="Regularization")

In [None]:
# Win size vs Accuracy
ax = sns.boxplot(x="win_size", y="mean_test_score", data=lr_results)
ax.set_ylabel("CV Mean Test Accuracy")
ax.set_xlabel("Window Size (N_FFT)")
plt.savefig(fig_dir / "lr_accuracy_win_size_uniform.png", dpi=600, bbox_inches="tight")

In [None]:
# Winsize vs accuracy + regularization
ax = sns.boxplot(
    x="win_size", y="mean_test_score", data=lr_results, hue="regularization"
)
ax.set_ylabel("CV Mean Test Accuracy")
ax.set_xlabel("Window Size (N_FFT)")
ax.legend(title="Regularization", loc="lower left")
plt.savefig(
    fig_dir / "lr_accuracy_win_size_by_regularization.png", dpi=600, bbox_inches="tight"
)

In [None]:
# PCA components vs accuracy
ax = sns.boxplot(x="n_components", y="mean_test_score", data=lr_results)
ax.set_ylabel("CV Mean Test Accuracy")
ax.set_xlabel("PCA Components")
# ax.legend(title="Regularization", loc='lower left')
# plt.savefig(fig_dir / "lr_accuracy_win_size_by_regularization.png", dpi = 600, bbox_inches = 'tight')