In [1]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.nearest_neighbours import CosineRecommender
from implicit.evaluation import train_test_split, precision_at_k, ndcg_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="100K"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(943, 1682, 100000)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (943, 1682), Val Shape: (943, 1682), Test Shape: (943, 1682)


In [None]:
results_folder = "results/movielens_100k_knn"
results_filename = "movielens_100k_knn_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "KNN_k=20": lambda: CosineRecommender(K=20),
        "KNN_k=100": lambda: CosineRecommender(K=100),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ndcg_at_k(model, train_mat, val_mat, K=20, show_progress=False)

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    test_ndcg_10 = ndcg_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_precision_10 = precision_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_ndcg_20 = ndcg_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)
    test_precision_20 = precision_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": test_ndcg_10,
        "Test NDCG@20": test_ndcg_20,
        "Test Precision@10": test_precision_10,
        "Test Precision@20": test_precision_20,
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:34,285] A new study created in memory with name: no-name-fcd06269-28bb-4b59-aeb0-11c8cd63c3f2
[I 2026-02-08 12:33:34,404] Trial 0 finished with value: 0.23373554820119585 and parameters: {}. Best is trial 0 with value: 0.23373554820119585.


Running optimization for KNN_k=20 with no_weighting...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,no_weighting,1,0.233736,0.267649,0.283587,0.287811,0.319398,0.021264,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:34,735] A new study created in memory with name: no-name-bef18d5c-b9c7-4b9a-be63-965392f80877


Running optimization for KNN_k=20 with bm25...


[I 2026-02-08 12:33:35,764] Trial 1 finished with value: 0.24587195861968428 and parameters: {'bm25_k1': 753.1413517589766, 'bm25_b': 0.9228021905346769}. Best is trial 1 with value: 0.24587195861968428.
[I 2026-02-08 12:33:35,766] Trial 2 finished with value: 0.25074497533872836 and parameters: {'bm25_k1': 843.4301155557006, 'bm25_b': 0.7633158634919186}. Best is trial 2 with value: 0.25074497533872836.
[I 2026-02-08 12:33:35,786] Trial 0 finished with value: 0.2456440456384295 and parameters: {'bm25_k1': 932.0124068688167, 'bm25_b': 0.9046777790596461}. Best is trial 2 with value: 0.25074497533872836.
[I 2026-02-08 12:33:35,787] Trial 6 finished with value: 0.2459797129244248 and parameters: {'bm25_k1': 769.9126874757816, 'bm25_b': 0.8985004493217074}. Best is trial 2 with value: 0.25074497533872836.
[I 2026-02-08 12:33:35,789] Trial 4 finished with value: 0.24954791412715438 and parameters: {'bm25_k1': 993.759596486624, 'bm25_b': 0.6068562008400674}. Best is trial 2 with value: 0.25

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,bm25,20,0.250745,0.287349,0.300805,0.30381,0.328754,0.019425,"{'bm25_k1': 843.4301155557006, 'bm25_b': 0.763..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)


[I 2026-02-08 12:33:37,705] A new study created in memory with name: no-name-0c73b0b2-36b4-4360-b782-706cfeb4a222
[I 2026-02-08 12:33:37,809] Trial 0 finished with value: 0.2346100478722425 and parameters: {}. Best is trial 0 with value: 0.2346100478722425.


Running optimization for KNN_k=20 with tfidf...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,tfidf,1,0.23461,0.26798,0.283676,0.286492,0.317622,0.023715,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:38,139] A new study created in memory with name: no-name-88e85151-36ab-463c-a223-2e5c8f51a7a6
[I 2026-02-08 12:33:38,234] Trial 0 finished with value: 0.23424453370163362 and parameters: {}. Best is trial 0 with value: 0.23424453370163362.


Running optimization for KNN_k=20 with log...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,log,1,0.234245,0.268971,0.283892,0.288141,0.317622,0.020064,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:38,560] A new study created in memory with name: no-name-31dddd65-6956-496d-9f00-1e1941e94090


Running optimization for KNN_k=20 with confidence...


[I 2026-02-08 12:33:39,591] Trial 3 finished with value: 0.23393133299607183 and parameters: {'conf_alpha': 140.83374492820664}. Best is trial 3 with value: 0.23393133299607183.
[I 2026-02-08 12:33:39,598] Trial 0 finished with value: 0.2342022748590951 and parameters: {'conf_alpha': 23.13588596543061}. Best is trial 0 with value: 0.2342022748590951.
[I 2026-02-08 12:33:39,607] Trial 4 finished with value: 0.23426176252606515 and parameters: {'conf_alpha': 82.44129092521709}. Best is trial 4 with value: 0.23426176252606515.
[I 2026-02-08 12:33:39,607] Trial 1 finished with value: 0.2343300574748102 and parameters: {'conf_alpha': 70.35546636930405}. Best is trial 1 with value: 0.2343300574748102.
[I 2026-02-08 12:33:39,611] Trial 6 finished with value: 0.2342809281609162 and parameters: {'conf_alpha': 31.350555981050736}. Best is trial 1 with value: 0.2343300574748102.
[I 2026-02-08 12:33:39,614] Trial 2 finished with value: 0.23393133299607183 and parameters: {'conf_alpha': 126.2179682

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,confidence,20,0.234762,0.268589,0.283542,0.288141,0.317504,0.019218,{'conf_alpha': 55.13187607675897}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:41,483] A new study created in memory with name: no-name-33e977c2-0f7a-45af-8cec-c360507a8773


Running optimization for KNN_k=20 with power...


[I 2026-02-08 12:33:42,501] Trial 2 finished with value: 0.23250684703426572 and parameters: {'power_p': 0.1301253311960954}. Best is trial 2 with value: 0.23250684703426572.
[I 2026-02-08 12:33:42,506] Trial 4 finished with value: 0.23325571086696603 and parameters: {'power_p': 0.312984077993017}. Best is trial 4 with value: 0.23325571086696603.
[I 2026-02-08 12:33:42,519] Trial 0 finished with value: 0.23273683257979705 and parameters: {'power_p': 0.2331303662939092}. Best is trial 4 with value: 0.23325571086696603.
[I 2026-02-08 12:33:42,531] Trial 6 finished with value: 0.2335647425748064 and parameters: {'power_p': 0.44627778690183406}. Best is trial 6 with value: 0.2335647425748064.
[I 2026-02-08 12:33:42,536] Trial 7 finished with value: 0.23385295725710806 and parameters: {'power_p': 1.1351209594146094}. Best is trial 7 with value: 0.23385295725710806.
[I 2026-02-08 12:33:42,541] Trial 5 finished with value: 0.23308203547579356 and parameters: {'power_p': 0.9597855703389053}. B

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power,20,0.234443,0.268403,0.283895,0.287317,0.318096,0.019508,{'power_p': 0.507006817593925}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:44,427] A new study created in memory with name: no-name-c3b44ff3-d9da-4471-a8e9-0ab59f8b7953
[I 2026-02-08 12:33:44,523] Trial 0 finished with value: 0.24713364294316273 and parameters: {}. Best is trial 0 with value: 0.24713364294316273.


Running optimization for KNN_k=20 with normalized...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,normalized,1,0.247134,0.283596,0.298867,0.301666,0.331715,0.019569,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:44,852] A new study created in memory with name: no-name-61dc4f7c-d549-4640-ac70-da3d07427978
[I 2026-02-08 12:33:44,958] Trial 0 finished with value: 0.24875852135711207 and parameters: {}. Best is trial 0 with value: 0.24875852135711207.


Running optimization for KNN_k=20 with pmi...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,pmi,1,0.248759,0.284989,0.301401,0.304965,0.332662,0.019414,{}


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:45,290] A new study created in memory with name: no-name-caeeddc8-629b-4a54-9bad-af15db7f1d9a


Running optimization for KNN_k=20 with robust_user_centric...


[I 2026-02-08 12:33:46,316] Trial 0 finished with value: 0.22979552292240762 and parameters: {'scale_factor': 2.6717703265899693}. Best is trial 0 with value: 0.22979552292240762.
[I 2026-02-08 12:33:46,343] Trial 2 finished with value: 0.22979552292240762 and parameters: {'scale_factor': 6.0433796177207695}. Best is trial 0 with value: 0.22979552292240762.
[I 2026-02-08 12:33:46,350] Trial 5 finished with value: 0.22979552292240762 and parameters: {'scale_factor': 9.689203893575725}. Best is trial 0 with value: 0.22979552292240762.
[I 2026-02-08 12:33:46,354] Trial 1 finished with value: 0.22979552292240762 and parameters: {'scale_factor': 5.830402440225622}. Best is trial 0 with value: 0.22979552292240762.
[I 2026-02-08 12:33:46,355] Trial 3 finished with value: 0.22979552292240762 and parameters: {'scale_factor': 7.938306041212785}. Best is trial 0 with value: 0.22979552292240762.
[I 2026-02-08 12:33:46,359] Trial 6 finished with value: 0.22979552292240762 and parameters: {'scale_fa

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric,20,0.229796,0.270866,0.285502,0.289461,0.318806,0.019487,{'scale_factor': 2.6717703265899693}


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:48,241] A new study created in memory with name: no-name-bc1f240d-c8c1-4f8e-8352-dd274825721f


Running optimization for KNN_k=20 with robust_user_centric_weight_v2...


[I 2026-02-08 12:33:49,248] Trial 0 finished with value: 0.23169477695735444 and parameters: {'lower_q': 35.61981499132288, 'upper_q': 55.37901032529377}. Best is trial 0 with value: 0.23169477695735444.
[I 2026-02-08 12:33:49,276] Trial 1 finished with value: 0.23524692198182948 and parameters: {'lower_q': 13.009637863954161, 'upper_q': 57.803594736337075}. Best is trial 1 with value: 0.23524692198182948.
[I 2026-02-08 12:33:49,282] Trial 3 finished with value: 0.23235930550199707 and parameters: {'lower_q': 24.56449508218757, 'upper_q': 88.08895234397025}. Best is trial 1 with value: 0.23524692198182948.
[I 2026-02-08 12:33:49,286] Trial 4 finished with value: 0.23178575113986377 and parameters: {'lower_q': 34.17081569458512, 'upper_q': 94.91400220004196}. Best is trial 1 with value: 0.23524692198182948.
[I 2026-02-08 12:33:49,292] Trial 5 finished with value: 0.23357431185959612 and parameters: {'lower_q': 15.534599302013223, 'upper_q': 78.10167595487607}. Best is trial 1 with value

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric_weight_v2,20,0.235247,0.269902,0.284681,0.288471,0.318333,0.020085,"{'lower_q': 13.009637863954161, 'upper_q': 57...."


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:51,191] A new study created in memory with name: no-name-4a1bd885-d884-4f0b-b73a-c3804fb44ae7


Running optimization for KNN_k=20 with sigmoid_propensity...


[I 2026-02-08 12:33:52,211] Trial 2 finished with value: 0.2323271167169136 and parameters: {'p': 0.8267058875644404, 'beta': 0.16722672959869211}. Best is trial 2 with value: 0.2323271167169136.
[I 2026-02-08 12:33:52,218] Trial 1 finished with value: 0.23207096610061473 and parameters: {'p': 1.6802242171590813, 'beta': 0.38874473328550563}. Best is trial 2 with value: 0.2323271167169136.
[I 2026-02-08 12:33:52,233] Trial 6 finished with value: 0.23093274236876896 and parameters: {'p': 3.554905656149754, 'beta': 0.925340643516393}. Best is trial 2 with value: 0.2323271167169136.
[I 2026-02-08 12:33:52,239] Trial 0 finished with value: 0.22970018320572527 and parameters: {'p': 4.733388613450471, 'beta': 0.05004289864898248}. Best is trial 2 with value: 0.2323271167169136.
[I 2026-02-08 12:33:52,243] Trial 4 finished with value: 0.23260270175033237 and parameters: {'p': 1.9260302576609296, 'beta': 0.2967113312777291}. Best is trial 4 with value: 0.23260270175033237.
[I 2026-02-08 12:33:

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,sigmoid_propensity,20,0.234007,0.269108,0.28341,0.288966,0.317622,0.019647,"{'p': 1.0751860580401422, 'beta': 0.0450192037..."


In [18]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:54,120] A new study created in memory with name: no-name-5963d933-a2e9-485c-905b-32bc9a3717a6


Running optimization for KNN_k=20 with power_lift...


[I 2026-02-08 12:33:55,134] Trial 1 finished with value: 0.24408318079457034 and parameters: {'p': 0.9418135129337946}. Best is trial 1 with value: 0.24408318079457034.
[I 2026-02-08 12:33:55,137] Trial 7 finished with value: 0.2396472099492866 and parameters: {'p': 0.29579900914823354}. Best is trial 1 with value: 0.24408318079457034.
[I 2026-02-08 12:33:55,150] Trial 4 finished with value: 0.24645412010678655 and parameters: {'p': 0.874576236478479}. Best is trial 4 with value: 0.24645412010678655.
[I 2026-02-08 12:33:55,175] Trial 6 finished with value: 0.21785501082701297 and parameters: {'p': 1.3615887187722033}. Best is trial 4 with value: 0.24645412010678655.
[I 2026-02-08 12:33:55,178] Trial 2 finished with value: 0.24384528714217696 and parameters: {'p': 0.6414434332562752}. Best is trial 4 with value: 0.24645412010678655.
[I 2026-02-08 12:33:55,193] Trial 3 finished with value: 0.21677944840786428 and parameters: {'p': 1.399401441911709}. Best is trial 4 with value: 0.2464541

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power_lift,20,0.246454,0.283755,0.29838,0.296388,0.324609,0.020371,{'p': 0.874576236478479}


In [19]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:57,053] A new study created in memory with name: no-name-c0dc67af-d273-46c3-bcd1-8ef8c84464a9
[I 2026-02-08 12:33:57,185] Trial 0 finished with value: 0.21864221775713796 and parameters: {}. Best is trial 0 with value: 0.21864221775713796.


Running optimization for KNN_k=100 with no_weighting...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,no_weighting,1,0.218642,0.254108,0.265007,0.263071,0.288015,0.031892,{}


In [20]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:33:57,639] A new study created in memory with name: no-name-864451fe-40b4-4126-8fd0-edbf4a5be30c


Running optimization for KNN_k=100 with bm25...


[I 2026-02-08 12:33:58,752] Trial 4 finished with value: 0.24271047563068338 and parameters: {'bm25_k1': 823.9427042303397, 'bm25_b': 0.5237890449915392}. Best is trial 4 with value: 0.24271047563068338.
[I 2026-02-08 12:33:58,787] Trial 3 finished with value: 0.24980451440448928 and parameters: {'bm25_k1': 757.9705877851912, 'bm25_b': 0.9669378381980822}. Best is trial 3 with value: 0.24980451440448928.
[I 2026-02-08 12:33:58,788] Trial 6 finished with value: 0.2453502969080504 and parameters: {'bm25_k1': 623.9581703553378, 'bm25_b': 0.6042356642714714}. Best is trial 3 with value: 0.24980451440448928.
[I 2026-02-08 12:33:58,792] Trial 5 finished with value: 0.24030166087341193 and parameters: {'bm25_k1': 450.9980713788888, 'bm25_b': 0.4683062841500395}. Best is trial 3 with value: 0.24980451440448928.
[I 2026-02-08 12:33:58,801] Trial 0 finished with value: 0.24452364635088927 and parameters: {'bm25_k1': 951.4134476125347, 'bm25_b': 0.5543700774733485}. Best is trial 3 with value: 0.

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,bm25,20,0.251625,0.293014,0.30985,0.30414,0.330886,0.028216,"{'bm25_k1': 338.7558204049336, 'bm25_b': 0.996..."


In [21]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)


[I 2026-02-08 12:34:00,977] A new study created in memory with name: no-name-7e9750c3-2cc9-473f-bb2f-80262d6543ae
[I 2026-02-08 12:34:01,111] Trial 0 finished with value: 0.2187007673634899 and parameters: {}. Best is trial 0 with value: 0.2187007673634899.


Running optimization for KNN_k=100 with tfidf...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,tfidf,1,0.218701,0.254346,0.265,0.262741,0.287305,0.042669,{}


In [22]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:01,578] A new study created in memory with name: no-name-972c0ac9-a1fe-4307-948d-a38f659bb65a
[I 2026-02-08 12:34:01,714] Trial 0 finished with value: 0.2189509309995027 and parameters: {}. Best is trial 0 with value: 0.2189509309995027.


Running optimization for KNN_k=100 with log...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,log,1,0.218951,0.253354,0.265168,0.261917,0.288134,0.02852,{}


In [23]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:02,165] A new study created in memory with name: no-name-dc8d7ed8-0c10-4c00-9c42-0f7376e72130


Running optimization for KNN_k=100 with confidence...


[I 2026-02-08 12:34:03,311] Trial 6 finished with value: 0.21915835470861375 and parameters: {'conf_alpha': 62.880175408668876}. Best is trial 6 with value: 0.21915835470861375.
[I 2026-02-08 12:34:03,312] Trial 5 finished with value: 0.21917784010281435 and parameters: {'conf_alpha': 49.063046157024544}. Best is trial 5 with value: 0.21917784010281435.
[I 2026-02-08 12:34:03,319] Trial 3 finished with value: 0.2190475167461944 and parameters: {'conf_alpha': 28.984433395167084}. Best is trial 5 with value: 0.21917784010281435.
[I 2026-02-08 12:34:03,326] Trial 0 finished with value: 0.21902047404426914 and parameters: {'conf_alpha': 119.72717385716399}. Best is trial 5 with value: 0.21917784010281435.
[I 2026-02-08 12:34:03,330] Trial 1 finished with value: 0.2187427386304948 and parameters: {'conf_alpha': 23.680018592150567}. Best is trial 5 with value: 0.21917784010281435.
[I 2026-02-08 12:34:03,336] Trial 4 finished with value: 0.21902047404426914 and parameters: {'conf_alpha': 128.

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,confidence,20,0.219214,0.253918,0.265348,0.262246,0.288252,0.029165,{'conf_alpha': 17.627894967793516}


In [24]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:05,505] A new study created in memory with name: no-name-7cf4b0bf-9b71-41f6-8962-08ad20fcadf0


Running optimization for KNN_k=100 with power...


[I 2026-02-08 12:34:06,648] Trial 0 finished with value: 0.21873522086040706 and parameters: {'power_p': 1.491984719269772}. Best is trial 0 with value: 0.21873522086040706.
[I 2026-02-08 12:34:06,653] Trial 5 finished with value: 0.21878062000263718 and parameters: {'power_p': 1.4694064337393604}. Best is trial 5 with value: 0.21878062000263718.
[I 2026-02-08 12:34:06,663] Trial 3 finished with value: 0.21826104257386972 and parameters: {'power_p': 0.15606884043970565}. Best is trial 5 with value: 0.21878062000263718.
[I 2026-02-08 12:34:06,667] Trial 1 finished with value: 0.21777198671718323 and parameters: {'power_p': 0.4223315829444123}. Best is trial 5 with value: 0.21878062000263718.
[I 2026-02-08 12:34:06,668] Trial 4 finished with value: 0.2188387469283711 and parameters: {'power_p': 0.2809019224146784}. Best is trial 4 with value: 0.2188387469283711.
[I 2026-02-08 12:34:06,668] Trial 6 finished with value: 0.21873768335030386 and parameters: {'power_p': 0.27014981398782123}. 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power,20,0.220329,0.254452,0.264948,0.263731,0.287778,0.04732,{'power_p': 0.6384267877860578}


In [25]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:09,039] A new study created in memory with name: no-name-1a3cac60-e103-467d-9d2c-f840a0bd4337
[I 2026-02-08 12:34:09,171] Trial 0 finished with value: 0.2416550544240349 and parameters: {}. Best is trial 0 with value: 0.2416550544240349.


Running optimization for KNN_k=100 with normalized...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,normalized,1,0.241655,0.279204,0.295025,0.287152,0.31324,0.027931,{}


In [26]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:09,629] A new study created in memory with name: no-name-52e44623-6fdd-40c9-8de5-24214e5be53e
[I 2026-02-08 12:34:09,763] Trial 0 finished with value: 0.2487289736907072 and parameters: {}. Best is trial 0 with value: 0.2487289736907072.


Running optimization for KNN_k=100 with pmi...




Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,pmi,1,0.248729,0.29483,0.309707,0.311397,0.335505,0.028297,{}


In [27]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:10,235] A new study created in memory with name: no-name-0edef15e-5ff6-4569-9fde-a87f7d49a468


Running optimization for KNN_k=100 with robust_user_centric...


[I 2026-02-08 12:34:11,374] Trial 0 finished with value: 0.21906530333303553 and parameters: {'scale_factor': 4.261308278397727}. Best is trial 0 with value: 0.21906530333303553.
[I 2026-02-08 12:34:11,394] Trial 6 finished with value: 0.21906530333303553 and parameters: {'scale_factor': 7.0128845547340255}. Best is trial 0 with value: 0.21906530333303553.
[I 2026-02-08 12:34:11,395] Trial 1 finished with value: 0.21906530333303553 and parameters: {'scale_factor': 0.6305344412709322}. Best is trial 0 with value: 0.21906530333303553.
[I 2026-02-08 12:34:11,402] Trial 7 finished with value: 0.21906530333303553 and parameters: {'scale_factor': 5.970719247168275}. Best is trial 0 with value: 0.21906530333303553.
[I 2026-02-08 12:34:11,410] Trial 4 finished with value: 0.21906530333303553 and parameters: {'scale_factor': 1.451401526101793}. Best is trial 0 with value: 0.21906530333303553.
[I 2026-02-08 12:34:11,415] Trial 5 finished with value: 0.21906530333303553 and parameters: {'scale_fa

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,robust_user_centric,20,0.219065,0.255533,0.265849,0.263236,0.286594,0.031745,{'scale_factor': 4.261308278397727}


In [28]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:13,619] A new study created in memory with name: no-name-441df084-6a3d-46d5-9687-a3d5402936c7


Running optimization for KNN_k=100 with robust_user_centric_weight_v2...


[I 2026-02-08 12:34:14,777] Trial 4 finished with value: 0.21963568018030985 and parameters: {'lower_q': 21.63852469842606, 'upper_q': 83.95386162456788}. Best is trial 4 with value: 0.21963568018030985.
[I 2026-02-08 12:34:14,780] Trial 0 finished with value: 0.22275212721923915 and parameters: {'lower_q': 12.938312281812854, 'upper_q': 62.26798419488069}. Best is trial 0 with value: 0.22275212721923915.
[I 2026-02-08 12:34:14,800] Trial 1 finished with value: 0.22082274688167136 and parameters: {'lower_q': 44.66024530997343, 'upper_q': 94.7184229336255}. Best is trial 0 with value: 0.22275212721923915.
[I 2026-02-08 12:34:14,801] Trial 2 finished with value: 0.22054650805458204 and parameters: {'lower_q': 34.81352469580665, 'upper_q': 59.921707570586726}. Best is trial 0 with value: 0.22275212721923915.
[I 2026-02-08 12:34:14,803] Trial 6 finished with value: 0.22043127259756168 and parameters: {'lower_q': 12.754948184240117, 'upper_q': 80.63564227035923}. Best is trial 0 with value:

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,robust_user_centric_weight_v2,20,0.223226,0.25759,0.267439,0.264556,0.286949,0.047773,"{'lower_q': 11.265479498276028, 'upper_q': 56...."


In [29]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:17,110] A new study created in memory with name: no-name-dbc1e77f-c04f-4d8a-b173-bc5a0ecf543d


Running optimization for KNN_k=100 with sigmoid_propensity...


[I 2026-02-08 12:34:18,300] Trial 4 finished with value: 0.2157339523527103 and parameters: {'p': 3.30884089406085, 'beta': 0.5941470513275641}. Best is trial 4 with value: 0.2157339523527103.
[I 2026-02-08 12:34:18,301] Trial 2 finished with value: 0.21502460851150315 and parameters: {'p': 3.8745428588031663, 'beta': 0.27771456326769706}. Best is trial 4 with value: 0.2157339523527103.
[I 2026-02-08 12:34:18,331] Trial 6 finished with value: 0.215086063404078 and parameters: {'p': 4.436286882141767, 'beta': 0.4235073175850246}. Best is trial 4 with value: 0.2157339523527103.
[I 2026-02-08 12:34:18,340] Trial 5 finished with value: 0.2185019279556022 and parameters: {'p': 0.508307166368734, 'beta': 0.3763394992303931}. Best is trial 5 with value: 0.2185019279556022.
[I 2026-02-08 12:34:18,350] Trial 0 finished with value: 0.216796249696129 and parameters: {'p': 2.591881622572627, 'beta': 0.41797751759150004}. Best is trial 5 with value: 0.2185019279556022.
[I 2026-02-08 12:34:18,359] T

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,sigmoid_propensity,20,0.219166,0.252285,0.261865,0.260762,0.281975,0.02931,"{'p': 0.2836829624877627, 'beta': 0.5821600768..."


In [30]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:34:20,652] A new study created in memory with name: no-name-af9e18b3-8d1b-4b37-a0e7-f4c1561fd8d7


Running optimization for KNN_k=100 with power_lift...


[I 2026-02-08 12:34:21,774] Trial 1 finished with value: 0.25273250585038576 and parameters: {'p': 1.1627526935071715}. Best is trial 1 with value: 0.25273250585038576.
[I 2026-02-08 12:34:21,796] Trial 7 finished with value: 0.2488646377035625 and parameters: {'p': 0.7897857108413825}. Best is trial 1 with value: 0.25273250585038576.
[I 2026-02-08 12:34:21,802] Trial 0 finished with value: 0.2449081191242222 and parameters: {'p': 0.710260197636714}. Best is trial 1 with value: 0.25273250585038576.
[I 2026-02-08 12:34:21,807] Trial 6 finished with value: 0.2526530239336184 and parameters: {'p': 1.3381368504167155}. Best is trial 1 with value: 0.25273250585038576.
[I 2026-02-08 12:34:21,813] Trial 4 finished with value: 0.24797590152688875 and parameters: {'p': 0.7986619620273337}. Best is trial 1 with value: 0.25273250585038576.
[I 2026-02-08 12:34:21,824] Trial 5 finished with value: 0.24464378898540878 and parameters: {'p': 0.7019375200966483}. Best is trial 1 with value: 0.252732505

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power_lift,20,0.252733,0.297673,0.314228,0.307439,0.33432,0.028892,{'p': 1.1627526935071715}


In [31]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power_lift,20,0.252733,0.297673,0.314228,0.307439,0.33432,0.028892,{'p': 1.1627526935071715}
0,KNN_k=100,bm25,20,0.251625,0.293014,0.30985,0.30414,0.330886,0.028216,"{'bm25_k1': 338.7558204049336, 'bm25_b': 0.996..."
0,KNN_k=100,pmi,1,0.248729,0.29483,0.309707,0.311397,0.335505,0.028297,{}
0,KNN_k=20,pmi,1,0.248759,0.284989,0.301401,0.304965,0.332662,0.019414,{}
0,KNN_k=20,bm25,20,0.250745,0.287349,0.300805,0.30381,0.328754,0.019425,"{'bm25_k1': 843.4301155557006, 'bm25_b': 0.763..."
0,KNN_k=20,normalized,1,0.247134,0.283596,0.298867,0.301666,0.331715,0.019569,{}
0,KNN_k=20,power_lift,20,0.246454,0.283755,0.29838,0.296388,0.324609,0.020371,{'p': 0.874576236478479}
0,KNN_k=100,normalized,1,0.241655,0.279204,0.295025,0.287152,0.31324,0.027931,{}
0,KNN_k=20,robust_user_centric,20,0.229796,0.270866,0.285502,0.289461,0.318806,0.019487,{'scale_factor': 2.6717703265899693}
0,KNN_k=20,robust_user_centric_weight_v2,20,0.235247,0.269902,0.284681,0.288471,0.318333,0.020085,"{'lower_q': 13.009637863954161, 'upper_q': 57...."
