In [None]:
import matplotlib.pyplot as plt
import pandas as pd

file_svdppag = "gs_results/2025-05-11-11-01-11_SVDppAG_grid_search.csv"
file_svdpp = "gs_results/2025-05-12-08-31-29_SVDpp_grid_search.csv"

results_svdpp = pd.read_csv(file_svdpp)
results_svdppag = pd.read_csv(file_svdppag)

**plot all gs results over embedding dimensions**

In [None]:
plt.figure(figsize=(6, 3))
plt.scatter(
    results_svdpp["n_factors"],
    results_svdpp["validation_rmse"],
    label="SVDpp",
    alpha=0.5,
)
plt.scatter(
    results_svdppag["n_factors"],
    results_svdppag["validation_rmse"],
    label="SVDppAG",
    alpha=0.5,
)
plt.title("Validationset RMSE vs Number of Factors")
plt.xlabel("Number of Factors")
plt.ylabel("RMSE")
plt.axhline(0.857, color="red", linestyle="--", label="Baseline 6")
plt.xscale("log")
plt.legend()
plt.grid()

**plot best gs results by embedding dimensions (for each method)**

In [None]:
plt.figure(figsize=(6, 3))
results_svdpp_best = results_svdpp.loc[results_svdpp.groupby("n_factors")["validation_rmse"].idxmin()]
results_svdppag_best = results_svdppag.loc[results_svdppag.groupby("n_factors")["validation_rmse"].idxmin()]
plt.plot(
    results_svdpp_best["n_factors"],
    results_svdpp_best["validation_rmse"],
    label="SVDpp",
    marker="o",
    alpha=0.5,
)
plt.plot(
    results_svdppag_best["n_factors"],
    results_svdppag_best["validation_rmse"],
    label="SVDppAG",
    marker="o",
    alpha=0.5,
)
plt.title("Validation RMSE vs Number of Factors (Best)")
plt.xlabel("Number of Factors")
plt.ylabel("RMSE")
plt.axhline(0.857, color="red", linestyle="--", label="Baseline 6")
plt.xscale("log")
plt.legend()
plt.grid()

## eval best gs results by embedding dimensions (for each method) on test set

In [None]:
file_svdpp = "gs_results/2025-05-13-08-56-45_SVDpp_best_configs_test_results.csv"
file_svdppag = "gs_results/2025-05-13-08-56-45_SVDppAG_best_configs_test_results.csv"

results_svdpp = pd.read_csv(file_svdpp)
results_svdppag = pd.read_csv(file_svdppag)

In [None]:
plt.figure(figsize=(6, 3))
results_svdpp_best = results_svdpp.loc[results_svdpp.groupby("n_factors")["final_test_rmse"].idxmin()]
results_svdppag_best = results_svdppag.loc[results_svdppag.groupby("n_factors")["final_test_rmse"].idxmin()]
plt.plot(
    results_svdpp_best["n_factors"],
    results_svdpp_best["final_test_rmse"],
    label="SVDpp",
    marker="o",
    alpha=0.5,
)
plt.plot(
    results_svdppag_best["n_factors"],
    results_svdppag_best["final_test_rmse"],
    label="SVDppAG",
    marker="o",
    alpha=0.5,
)
plt.title("Testset RMSE vs Number of Factors (Best)")
plt.xlabel("Number of Factors")
plt.ylabel("RMSE")
plt.axhline(0.857, color="red", linestyle="--", label="Baseline 6")
plt.xscale("log")
plt.legend()
plt.grid()