In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.nearest_neighbours import CosineRecommender
from implicit.evaluation import train_test_split, ranking_metrics_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="20M"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(138493, 26744, 20000263)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (138493, 26744), Val Shape: (138493, 26744), Test Shape: (138493, 26744)


In [None]:
results_folder = "results/movielens_20m_knn"
results_filename = "movielens_20m_knn_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "KNN_k=20": lambda: CosineRecommender(K=20),
        "KNN_k=100": lambda: CosineRecommender(K=100),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:47:09,475] A new study created in memory with name: no-name-5d6002c2-bc57-4b60-b890-41b4cb3bd255


Running optimization for KNN_k=20 with no_weighting...


[I 2026-02-08 12:47:25,965] Trial 0 finished with value: 0.20161055933157881 and parameters: {}. Best is trial 0 with value: 0.20161055933157881.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,no_weighting,1,0.201611,0.237085,0.246183,0.255755,0.278427,4.995014,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:48:21,560] A new study created in memory with name: no-name-02cf61ed-c358-4d54-a8c3-caae0cd94888


Running optimization for KNN_k=20 with bm25...


[I 2026-02-08 12:51:06,953] Trial 6 finished with value: 0.20023386398454598 and parameters: {'bm25_k1': 647.2713673630415, 'bm25_b': 0.8352567704958077}. Best is trial 6 with value: 0.20023386398454598.
[I 2026-02-08 12:51:07,339] Trial 0 finished with value: 0.20098685120780632 and parameters: {'bm25_k1': 256.49896246327427, 'bm25_b': 0.8206597651475507}. Best is trial 0 with value: 0.20098685120780632.
[I 2026-02-08 12:51:07,625] Trial 3 finished with value: 0.20555104797536294 and parameters: {'bm25_k1': 663.4136874102792, 'bm25_b': 0.6732403510227791}. Best is trial 3 with value: 0.20555104797536294.
[I 2026-02-08 12:51:07,880] Trial 7 finished with value: 0.2046493179502178 and parameters: {'bm25_k1': 662.4213147669944, 'bm25_b': 0.6976435283917235}. Best is trial 3 with value: 0.20555104797536294.
[I 2026-02-08 12:51:08,282] Trial 5 finished with value: 0.20971264887806382 and parameters: {'bm25_k1': 347.6403750352989, 'bm25_b': 0.38668962210266455}. Best is trial 5 with value: 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,bm25,20,0.209713,0.243675,0.254152,0.25761,0.278574,4.989317,"{'bm25_k1': 347.6403750352989, 'bm25_b': 0.386..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)


[I 2026-02-08 12:56:03,249] A new study created in memory with name: no-name-e2ac332b-eb1e-41d0-84ff-1f4d45b499b3


Running optimization for KNN_k=20 with tfidf...


[I 2026-02-08 12:56:20,296] Trial 0 finished with value: 0.2002632608959362 and parameters: {}. Best is trial 0 with value: 0.2002632608959362.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,tfidf,1,0.200263,0.234071,0.243943,0.252951,0.276569,5.020372,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:57:15,335] A new study created in memory with name: no-name-235a9052-19af-43cb-9857-a7382462a885


Running optimization for KNN_k=20 with log...


[I 2026-02-08 12:57:32,041] Trial 0 finished with value: 0.2001506525897574 and parameters: {}. Best is trial 0 with value: 0.2001506525897574.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,log,1,0.200151,0.234212,0.24412,0.253176,0.277038,4.990729,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 12:58:26,848] A new study created in memory with name: no-name-0b95f467-31ab-4462-bf72-1db2dc94b0fd


Running optimization for KNN_k=20 with confidence...


[I 2026-02-08 13:01:11,448] Trial 2 finished with value: 0.2001132522141757 and parameters: {'conf_alpha': 14.548242376416349}. Best is trial 2 with value: 0.2001132522141757.
[I 2026-02-08 13:01:11,593] Trial 7 finished with value: 0.20014897038686494 and parameters: {'conf_alpha': 99.45274932149086}. Best is trial 7 with value: 0.20014897038686494.
[I 2026-02-08 13:01:11,616] Trial 5 finished with value: 0.19992252088063983 and parameters: {'conf_alpha': 17.330253425151128}. Best is trial 7 with value: 0.20014897038686494.
[I 2026-02-08 13:01:11,694] Trial 1 finished with value: 0.2001541099135081 and parameters: {'conf_alpha': 111.80178165661334}. Best is trial 1 with value: 0.2001541099135081.
[I 2026-02-08 13:01:11,758] Trial 6 finished with value: 0.2001470314883997 and parameters: {'conf_alpha': 74.63701089558931}. Best is trial 1 with value: 0.2001541099135081.
[I 2026-02-08 13:01:11,800] Trial 3 finished with value: 0.19994082960064824 and parameters: {'conf_alpha': 20.9064454

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,confidence,20,0.200156,0.234223,0.244135,0.253179,0.277049,5.02361,{'conf_alpha': 149.6829166905879}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:06:12,008] A new study created in memory with name: no-name-dc28021e-8e7d-40d9-bd8a-9572bea4c10b


Running optimization for KNN_k=20 with power...


[I 2026-02-08 13:08:57,375] Trial 5 finished with value: 0.19912080484331413 and parameters: {'power_p': 0.26344433914974974}. Best is trial 5 with value: 0.19912080484331413.
[I 2026-02-08 13:08:57,469] Trial 4 finished with value: 0.201032968752732 and parameters: {'power_p': 0.6604868304234328}. Best is trial 4 with value: 0.201032968752732.
[I 2026-02-08 13:08:57,495] Trial 1 finished with value: 0.20172462656013898 and parameters: {'power_p': 0.9122432052463034}. Best is trial 1 with value: 0.20172462656013898.
[I 2026-02-08 13:08:57,564] Trial 0 finished with value: 0.20330265922789031 and parameters: {'power_p': 1.4431310178733618}. Best is trial 0 with value: 0.20330265922789031.
[I 2026-02-08 13:08:57,583] Trial 2 finished with value: 0.19973116888019446 and parameters: {'power_p': 0.4129400247980911}. Best is trial 0 with value: 0.20330265922789031.
[I 2026-02-08 13:08:57,596] Trial 3 finished with value: 0.20213598597748483 and parameters: {'power_p': 1.1222018413436976}. Be

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power,20,0.203449,0.239357,0.247413,0.257714,0.278771,4.999539,{'power_p': 1.4976312890997985}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:13:53,582] A new study created in memory with name: no-name-7b8f7e14-1b06-4a25-ba93-8f56fad6134a


Running optimization for KNN_k=20 with normalized...


[I 2026-02-08 13:14:10,693] Trial 0 finished with value: 0.2052204174846674 and parameters: {}. Best is trial 0 with value: 0.2052204174846674.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,normalized,1,0.20522,0.236056,0.248234,0.24859,0.271035,4.985893,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:15:06,315] A new study created in memory with name: no-name-071c5e45-5f0f-4b35-a26c-48e6985dd3bd


Running optimization for KNN_k=20 with pmi...


[I 2026-02-08 13:15:23,600] Trial 0 finished with value: 0.19956395412129016 and parameters: {}. Best is trial 0 with value: 0.19956395412129016.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,pmi,1,0.199564,0.220762,0.24034,0.234294,0.26742,4.975708,{}


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:16:19,231] A new study created in memory with name: no-name-dca57fff-98cb-49e0-a5f4-c54d38f976a3


Running optimization for KNN_k=20 with robust_user_centric...


[I 2026-02-08 13:19:07,393] Trial 3 finished with value: 0.20206039305367113 and parameters: {'scale_factor': 0.8239382008128486}. Best is trial 3 with value: 0.20206039305367113.
[I 2026-02-08 13:19:07,451] Trial 1 finished with value: 0.20206039305367113 and parameters: {'scale_factor': 6.173598120276049}. Best is trial 3 with value: 0.20206039305367113.
[I 2026-02-08 13:19:07,527] Trial 7 finished with value: 0.20206039305367113 and parameters: {'scale_factor': 4.620375348493361}. Best is trial 3 with value: 0.20206039305367113.
[I 2026-02-08 13:19:07,659] Trial 0 finished with value: 0.20206039305367113 and parameters: {'scale_factor': 9.28812543946419}. Best is trial 3 with value: 0.20206039305367113.
[I 2026-02-08 13:19:07,845] Trial 6 finished with value: 0.20206039305367113 and parameters: {'scale_factor': 7.520329001514316}. Best is trial 3 with value: 0.20206039305367113.
[I 2026-02-08 13:19:07,918] Trial 2 finished with value: 0.20206039305367113 and parameters: {'scale_fact

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric,20,0.20206,0.23622,0.245205,0.254727,0.277244,5.000263,{'scale_factor': 0.8239382008128486}


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:24:09,427] A new study created in memory with name: no-name-8a570280-3408-4d7c-855f-1c2ca1d77e3e


Running optimization for KNN_k=20 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 13:26:57,054] Trial 1 finished with value: 0.20016484605247456 and parameters: {'lower_q': 8.60986426536833, 'upper_q': 78.68101807153731}. Best is trial 1 with value: 0.20016484605247456.
[I 2026-02-08 13:26:57,247] Trial 3 finished with value: 0.20241422598577358 and parameters: {'lower_q': 42.11545993180976, 'upper_q': 87.14707406248831}. Best is trial 3 with value: 0.20241422598577358.
[I 2026-02-08 13:26:57,421] Trial 0 finished with value: 0.1997371011775993 and parameters: {'lower_q': 6.770680127117501, 'upper_q': 94.65665566563028}. Best is trial 3 with value: 0.20241422598577358.
[I 2026-02-08 13:26:57,448] Trial 5 finished with value: 0.20022729164284434 and parameters: {'lower_q': 11.967133820565783, 'upper_q': 77.05581096601682}. Best is trial 3 with value: 0.20241422598577358.
[I 2026-02-08 13:26:57,588] Trial 2 finished with value: 0.20216253417464697 and parameters: {'lower_q': 28.85288097541206, 'upper_q': 81.7979985

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric_weight_v2,20,0.203032,0.238545,0.24651,0.257112,0.277674,4.990932,"{'lower_q': 43.164298457945705, 'upper_q': 73...."


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:32:00,331] A new study created in memory with name: no-name-1237a929-5337-43af-b96a-f0ca7502600f


Running optimization for KNN_k=20 with sigmoid_propensity...


[I 2026-02-08 13:34:45,505] Trial 1 finished with value: 0.20095772003261003 and parameters: {'p': 1.7372353956772113, 'beta': 0.6604026498691302}. Best is trial 1 with value: 0.20095772003261003.
[I 2026-02-08 13:34:45,688] Trial 7 finished with value: 0.20170577683425264 and parameters: {'p': 2.993233534804208, 'beta': 0.12058195657565163}. Best is trial 7 with value: 0.20170577683425264.
[I 2026-02-08 13:34:45,711] Trial 0 finished with value: 0.1995801054786797 and parameters: {'p': 0.7387591859580891, 'beta': 0.8006799912460979}. Best is trial 7 with value: 0.20170577683425264.
[I 2026-02-08 13:34:45,872] Trial 3 finished with value: 0.19923464710347014 and parameters: {'p': 0.6650227823308037, 'beta': 0.45075570493353545}. Best is trial 7 with value: 0.20170577683425264.
[I 2026-02-08 13:34:46,060] Trial 6 finished with value: 0.199995894207031 and parameters: {'p': 1.0318262982679787, 'beta': 0.3718391970021022}. Best is trial 7 with value: 0.20170577683425264.
[I 2026-02-08 13:

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,sigmoid_propensity,20,0.20275,0.239058,0.246281,0.257513,0.27679,4.996864,"{'p': 4.203486577428474, 'beta': 0.11796781174..."


In [18]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:39:41,631] A new study created in memory with name: no-name-21cfa2de-c7f8-4df3-b67e-742bce78059e


Running optimization for KNN_k=20 with power_lift...


[I 2026-02-08 13:42:26,990] Trial 3 finished with value: 0.17320096130367255 and parameters: {'p': 1.4184323915045427}. Best is trial 3 with value: 0.17320096130367255.
[I 2026-02-08 13:42:27,041] Trial 7 finished with value: 0.1760770972889358 and parameters: {'p': 1.330435660266574}. Best is trial 7 with value: 0.1760770972889358.
[I 2026-02-08 13:42:27,107] Trial 4 finished with value: 0.18172995293893404 and parameters: {'p': 1.170804075134214}. Best is trial 4 with value: 0.18172995293893404.
[I 2026-02-08 13:42:27,685] Trial 1 finished with value: 0.19410470135183533 and parameters: {'p': 0.8615730563464263}. Best is trial 1 with value: 0.19410470135183533.
[I 2026-02-08 13:42:27,700] Trial 5 finished with value: 0.19744758711974666 and parameters: {'p': 0.764220797990208}. Best is trial 5 with value: 0.19744758711974666.
[I 2026-02-08 13:42:27,842] Trial 0 finished with value: 0.20169473209990033 and parameters: {'p': 0.6145776487135453}. Best is trial 0 with value: 0.2016947320

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power_lift,20,0.20543,0.239116,0.249684,0.254355,0.276716,5.005671,{'p': 0.2338520880679608}


In [19]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 13:47:22,201] A new study created in memory with name: no-name-460988fe-7fd9-443e-ba6d-4cde886593ef


Running optimization for KNN_k=100 with no_weighting...


[I 2026-02-08 13:47:45,995] Trial 0 finished with value: 0.18631361973579458 and parameters: {}. Best is trial 0 with value: 0.18631361973579458.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,no_weighting,1,0.186314,0.216885,0.225682,0.233052,0.254817,5.255841,{}


In [6]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:07:56,024] A new study created in memory with name: no-name-dc1358a1-16a6-44f6-84f8-8bdf5e8ed1a0


Running optimization for KNN_k=100 with bm25...


[I 2026-02-08 14:13:23,385] Trial 7 finished with value: 0.19723351392676033 and parameters: {'bm25_k1': 440.3971984878147, 'bm25_b': 0.11374092065079178}. Best is trial 7 with value: 0.19723351392676033.
[I 2026-02-08 14:13:24,016] Trial 5 finished with value: 0.20771635287287826 and parameters: {'bm25_k1': 578.2314680998584, 'bm25_b': 0.4090981916433001}. Best is trial 5 with value: 0.20771635287287826.
[I 2026-02-08 14:13:24,221] Trial 14 finished with value: 0.20526643506164596 and parameters: {'bm25_k1': 664.2714768650949, 'bm25_b': 0.30814110651858906}. Best is trial 5 with value: 0.20771635287287826.
[I 2026-02-08 14:13:24,641] Trial 4 finished with value: 0.19806044121155766 and parameters: {'bm25_k1': 195.7936161157222, 'bm25_b': 0.1283873977343687}. Best is trial 5 with value: 0.20771635287287826.
[I 2026-02-08 14:13:24,659] Trial 2 finished with value: 0.20641544328031985 and parameters: {'bm25_k1': 687.5940252315584, 'bm25_b': 0.35195515965564816}. Best is trial 5 with valu

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,bm25,20,0.21432,0.252056,0.25826,0.258544,0.268111,3.121121,"{'bm25_k1': 779.051787547782, 'bm25_b': 0.9510..."


In [7]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)


[I 2026-02-08 14:16:03,834] A new study created in memory with name: no-name-9b861949-14b7-4bc2-b30d-a0b5f242e845


Running optimization for KNN_k=100 with tfidf...


[I 2026-02-08 14:16:26,222] Trial 0 finished with value: 0.18473188064541968 and parameters: {}. Best is trial 0 with value: 0.18473188064541968.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,tfidf,1,0.184732,0.214796,0.224062,0.230453,0.25285,3.143268,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:17:50,512] A new study created in memory with name: no-name-6208c844-0327-4fec-b15e-e8d31534c547


Running optimization for KNN_k=100 with log...


[I 2026-02-08 14:18:12,285] Trial 0 finished with value: 0.18470653258358183 and parameters: {}. Best is trial 0 with value: 0.18470653258358183.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,log,1,0.184707,0.214661,0.223856,0.230532,0.252831,3.174901,{}


In [9]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:19:33,073] A new study created in memory with name: no-name-3b22d2eb-6cfc-41f6-898a-c6fe7cdf9251


Running optimization for KNN_k=100 with confidence...


[I 2026-02-08 14:24:59,741] Trial 15 finished with value: 0.18466803984458127 and parameters: {'conf_alpha': 64.08497447924199}. Best is trial 15 with value: 0.18466803984458127.
[I 2026-02-08 14:24:59,758] Trial 2 finished with value: 0.18464379161330916 and parameters: {'conf_alpha': 15.348361451939823}. Best is trial 15 with value: 0.18466803984458127.
[I 2026-02-08 14:24:59,831] Trial 11 finished with value: 0.18467302701950922 and parameters: {'conf_alpha': 22.631274795554035}. Best is trial 11 with value: 0.18467302701950922.
[I 2026-02-08 14:25:00,350] Trial 10 finished with value: 0.18466570392056542 and parameters: {'conf_alpha': 31.615625512854315}. Best is trial 11 with value: 0.18467302701950922.
[I 2026-02-08 14:25:00,506] Trial 6 finished with value: 0.18465822635875379 and parameters: {'conf_alpha': 53.55389058157818}. Best is trial 11 with value: 0.18467302701950922.
[I 2026-02-08 14:25:00,571] Trial 9 finished with value: 0.18469315280527798 and parameters: {'conf_alph

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,confidence,20,0.184694,0.214683,0.223884,0.230546,0.252857,3.142528,{'conf_alpha': 147.1590520580694}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:27:36,730] A new study created in memory with name: no-name-20f00479-a229-4c99-b0c7-6dd360797c21


Running optimization for KNN_k=100 with power...


[I 2026-02-08 14:33:07,213] Trial 7 finished with value: 0.18259257373758303 and parameters: {'power_p': 0.12375655361335314}. Best is trial 7 with value: 0.18259257373758303.
[I 2026-02-08 14:33:08,561] Trial 15 finished with value: 0.18520810757057524 and parameters: {'power_p': 0.6721726411410178}. Best is trial 15 with value: 0.18520810757057524.
[I 2026-02-08 14:33:08,664] Trial 10 finished with value: 0.18593815399102467 and parameters: {'power_p': 0.8765164553834203}. Best is trial 10 with value: 0.18593815399102467.
[I 2026-02-08 14:33:08,691] Trial 2 finished with value: 0.18392818718160495 and parameters: {'power_p': 0.31707949241102695}. Best is trial 10 with value: 0.18593815399102467.
[I 2026-02-08 14:33:08,702] Trial 13 finished with value: 0.18442002856279738 and parameters: {'power_p': 0.44493575704257415}. Best is trial 10 with value: 0.18593815399102467.
[I 2026-02-08 14:33:09,172] Trial 9 finished with value: 0.18572691309561207 and parameters: {'power_p': 0.80327038

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power,20,0.187465,0.218603,0.226945,0.234914,0.255871,3.193265,{'power_p': 1.4466599452323632}


In [11]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:35:42,075] A new study created in memory with name: no-name-7bd1c263-7812-494b-b9f9-166c8e1d06f2


Running optimization for KNN_k=100 with normalized...


[I 2026-02-08 14:36:04,405] Trial 0 finished with value: 0.20795274700382838 and parameters: {}. Best is trial 0 with value: 0.20795274700382838.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,normalized,1,0.207953,0.24365,0.252219,0.255142,0.273428,3.12393,{}


In [12]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:37:27,704] A new study created in memory with name: no-name-a04d7d85-df40-4cdf-801f-d4fe3b16bc06


Running optimization for KNN_k=100 with pmi...


[I 2026-02-08 14:37:50,499] Trial 0 finished with value: 0.21437081043871326 and parameters: {}. Best is trial 0 with value: 0.21437081043871326.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,pmi,1,0.214371,0.254323,0.262021,0.266261,0.282417,3.138043,{}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:39:13,845] A new study created in memory with name: no-name-94cc5ea0-c03b-4193-9731-78858f36419b


Running optimization for KNN_k=100 with robust_user_centric...


[I 2026-02-08 14:44:38,013] Trial 0 finished with value: 0.18539595383613522 and parameters: {'scale_factor': 0.5923510944912054}. Best is trial 0 with value: 0.18539595383613522.
[I 2026-02-08 14:44:38,983] Trial 12 finished with value: 0.18539595383613522 and parameters: {'scale_factor': 3.475573391664627}. Best is trial 0 with value: 0.18539595383613522.
[I 2026-02-08 14:44:40,820] Trial 7 finished with value: 0.18539595383613522 and parameters: {'scale_factor': 8.929207016705266}. Best is trial 0 with value: 0.18539595383613522.
[I 2026-02-08 14:44:40,832] Trial 13 finished with value: 0.18539595383613522 and parameters: {'scale_factor': 5.5531729916030335}. Best is trial 0 with value: 0.18539595383613522.
[I 2026-02-08 14:44:40,873] Trial 15 finished with value: 0.18539595383613522 and parameters: {'scale_factor': 3.9243483370259744}. Best is trial 0 with value: 0.18539595383613522.
[I 2026-02-08 14:44:40,889] Trial 2 finished with value: 0.18539595383613522 and parameters: {'scal

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,robust_user_centric,20,0.185396,0.215143,0.224695,0.231134,0.253666,3.132499,{'scale_factor': 0.5923510944912054}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:47:14,407] A new study created in memory with name: no-name-4e149c96-6875-4808-a5e2-07b33fd05598


Running optimization for KNN_k=100 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 14:52:38,507] Trial 14 finished with value: 0.18621238330457796 and parameters: {'lower_q': 33.785517838133885, 'upper_q': 60.72180902583462}. Best is trial 14 with value: 0.18621238330457796.
[I 2026-02-08 14:52:39,804] Trial 7 finished with value: 0.18458109913340615 and parameters: {'lower_q': 18.36006207248197, 'upper_q': 83.45274440274224}. Best is trial 14 with value: 0.18621238330457796.
[I 2026-02-08 14:52:40,059] Trial 3 finished with value: 0.18618863414227332 and parameters: {'lower_q': 31.28102161561497, 'upper_q': 65.90262979984409}. Best is trial 14 with value: 0.18621238330457796.
[I 2026-02-08 14:52:40,383] Trial 13 finished with value: 0.1851192114676171 and parameters: {'lower_q': 15.781127684990244, 'upper_q': 59.55349089529126}. Best is trial 14 with value: 0.18621238330457796.
[I 2026-02-08 14:52:40,428] Trial 4 finished with value: 0.18607238448192365 and parameters: {'lower_q': 34.17080870208767, 'upper_q': 71

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,robust_user_centric_weight_v2,20,0.186219,0.216339,0.225347,0.232847,0.254215,3.129948,"{'lower_q': 40.055265747850626, 'upper_q': 74...."


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 14:55:15,203] A new study created in memory with name: no-name-e7ea6ec4-cecd-47ef-a430-4edbbd10b4ab


Running optimization for KNN_k=100 with sigmoid_propensity...


[I 2026-02-08 15:00:37,924] Trial 6 finished with value: 0.18497433819864342 and parameters: {'p': 1.6618124658132982, 'beta': 0.6487584368594473}. Best is trial 6 with value: 0.18497433819864342.
[I 2026-02-08 15:00:38,611] Trial 10 finished with value: 0.1853744839648019 and parameters: {'p': 4.274972694430539, 'beta': 0.9394937386643094}. Best is trial 10 with value: 0.1853744839648019.
[I 2026-02-08 15:00:38,730] Trial 1 finished with value: 0.18309455700384927 and parameters: {'p': 0.38024587577613034, 'beta': 0.05707119928891202}. Best is trial 10 with value: 0.1853744839648019.
[I 2026-02-08 15:00:39,365] Trial 4 finished with value: 0.18556314195370627 and parameters: {'p': 2.1635710889465947, 'beta': 0.019036219321507675}. Best is trial 4 with value: 0.18556314195370627.
[I 2026-02-08 15:00:39,485] Trial 11 finished with value: 0.18542130656963632 and parameters: {'p': 4.254469877622275, 'beta': 0.33765416311588126}. Best is trial 4 with value: 0.18556314195370627.
[I 2026-02-

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,sigmoid_propensity,20,0.185785,0.216541,0.224543,0.232678,0.252898,3.132285,"{'p': 3.376903675920369, 'beta': 0.01728831377..."


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:03:12,855] A new study created in memory with name: no-name-a5f7a5fa-60c7-485c-a2e3-d06003046a68


Running optimization for KNN_k=100 with power_lift...


[I 2026-02-08 15:08:34,718] Trial 11 finished with value: 0.19785927688155897 and parameters: {'p': 0.254455739107415}. Best is trial 11 with value: 0.19785927688155897.
[I 2026-02-08 15:08:34,925] Trial 2 finished with value: 0.20264305276898636 and parameters: {'p': 0.3679544354227915}. Best is trial 2 with value: 0.20264305276898636.
[I 2026-02-08 15:08:35,483] Trial 7 finished with value: 0.2055798451917118 and parameters: {'p': 0.4412244928249359}. Best is trial 7 with value: 0.2055798451917118.
[I 2026-02-08 15:08:35,501] Trial 10 finished with value: 0.2063917133538829 and parameters: {'p': 0.4663026007134703}. Best is trial 10 with value: 0.2063917133538829.
[I 2026-02-08 15:08:35,812] Trial 13 finished with value: 0.19499315377338472 and parameters: {'p': 0.2006912188644832}. Best is trial 10 with value: 0.2063917133538829.
[I 2026-02-08 15:08:35,905] Trial 6 finished with value: 0.2053375743632391 and parameters: {'p': 0.4317232359423233}. Best is trial 10 with value: 0.20639

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power_lift,20,0.214652,0.252595,0.258867,0.259424,0.269484,3.110194,{'p': 0.8730207954998535}


In [17]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,pmi,1,0.214371,0.254323,0.262021,0.266261,0.282417,3.138043,{}
0,KNN_k=100,power_lift,20,0.214652,0.252595,0.258867,0.259424,0.269484,3.110194,{'p': 0.8730207954998535}
0,KNN_k=100,bm25,20,0.21432,0.252056,0.25826,0.258544,0.268111,3.121121,"{'bm25_k1': 779.051787547782, 'bm25_b': 0.9510..."
0,KNN_k=20,bm25,20,0.209713,0.243675,0.254152,0.25761,0.278574,4.989317,"{'bm25_k1': 347.6403750352989, 'bm25_b': 0.386..."
0,KNN_k=100,normalized,1,0.207953,0.24365,0.252219,0.255142,0.273428,3.12393,{}
0,KNN_k=20,power_lift,20,0.20543,0.239116,0.249684,0.254355,0.276716,5.005671,{'p': 0.2338520880679608}
0,KNN_k=20,normalized,1,0.20522,0.236056,0.248234,0.24859,0.271035,4.985893,{}
0,KNN_k=20,power,20,0.203449,0.239357,0.247413,0.257714,0.278771,4.999539,{'power_p': 1.4976312890997985}
0,KNN_k=20,robust_user_centric_weight_v2,20,0.203032,0.238545,0.24651,0.257112,0.277674,4.990932,"{'lower_q': 43.164298457945705, 'upper_q': 73...."
0,KNN_k=20,sigmoid_propensity,20,0.20275,0.239058,0.246281,0.257513,0.27679,4.996864,"{'p': 4.203486577428474, 'beta': 0.11796781174..."
