In [1]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.nearest_neighbours import CosineRecommender
from implicit.evaluation import train_test_split, precision_at_k, ndcg_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
lastfm_df = (
    pd.read_csv(
        "/home/coder/projects/rec-sys-research/data/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv",
        sep="\t",
        header=None,
        usecols=[0, 2, 3],
        names=['user_id', 'item_id', 'play_count'],
    )
    .loc[:, ['user_id', 'item_id', 'play_count']]
    .dropna()
    .rename(columns={'play_count': 'target'})
)
lastfm_df['user_id'].nunique(), lastfm_df['item_id'].nunique(), lastfm_df.shape[0]

(358868, 292363, 17535451)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    lastfm_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (358868, 292363), Val Shape: (358868, 292363), Test Shape: (358868, 292363)


In [5]:
results_folder = "results/lastfm_360k_knn"
results_filename = "lastfm_360k_knn_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "KNN_k=20": lambda: CosineRecommender(K=20),
        "KNN_k=100": lambda: CosineRecommender(K=100),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ndcg_at_k(model, train_mat, val_mat, K=20, show_progress=False)

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    test_ndcg_10 = ndcg_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_precision_10 = precision_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_ndcg_20 = ndcg_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)
    test_precision_20 = precision_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": test_ndcg_10,
        "Test NDCG@20": test_ndcg_20,
        "Test Precision@10": test_precision_10,
        "Test Precision@20": test_precision_20,
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:18:01,569] A new study created in memory with name: no-name-2c94559d-6f73-46ef-8073-b9c4c084ec78


Running optimization for KNN_k=20 with no_weighting...


[I 2026-02-08 15:18:35,643] Trial 0 finished with value: 0.03891400901981175 and parameters: {}. Best is trial 0 with value: 0.03891400901981175.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,no_weighting,1,0.038914,0.04121,0.051573,0.044572,0.06936,3.971238,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:20:44,551] A new study created in memory with name: no-name-a5a5f7aa-19ac-4c26-bd3c-0ed7e77b9f2b


Running optimization for KNN_k=20 with bm25...


[I 2026-02-08 15:32:27,678] Trial 13 finished with value: 0.14082970603183165 and parameters: {'bm25_k1': 36.511207278311026, 'bm25_b': 0.9352037266309827}. Best is trial 13 with value: 0.14082970603183165.
[I 2026-02-08 15:32:42,205] Trial 4 finished with value: 0.1380426872669633 and parameters: {'bm25_k1': 341.5973053569579, 'bm25_b': 0.6666467342282101}. Best is trial 13 with value: 0.14082970603183165.
[I 2026-02-08 15:32:42,846] Trial 2 finished with value: 0.13978728237701288 and parameters: {'bm25_k1': 142.33037790293804, 'bm25_b': 0.7179492615261626}. Best is trial 13 with value: 0.14082970603183165.
[I 2026-02-08 15:32:43,720] Trial 12 finished with value: 0.13929234499055623 and parameters: {'bm25_k1': 101.92527416505283, 'bm25_b': 0.4844542793797221}. Best is trial 13 with value: 0.14082970603183165.
[I 2026-02-08 15:32:45,410] Trial 15 finished with value: 0.13695958465159047 and parameters: {'bm25_k1': 182.741152325566, 'bm25_b': 0.16227066228140397}. Best is trial 13 wit

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,bm25,20,0.14083,0.132367,0.158774,0.142205,0.205453,4.060442,"{'bm25_k1': 36.511207278311026, 'bm25_b': 0.93..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)


[I 2026-02-08 15:37:40,166] A new study created in memory with name: no-name-780d4cbe-5897-4e2e-a685-0fa57ffd4641


Running optimization for KNN_k=20 with tfidf...


[I 2026-02-08 15:38:14,090] Trial 0 finished with value: 0.13026562279973605 and parameters: {}. Best is trial 0 with value: 0.13026562279973605.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,tfidf,1,0.130266,0.125231,0.149681,0.133798,0.19232,3.947442,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:40:20,810] A new study created in memory with name: no-name-2c92fe68-cecf-4b30-890b-2f613ca53a46


Running optimization for KNN_k=20 with log...


[I 2026-02-08 15:40:54,456] Trial 0 finished with value: 0.13983063505203577 and parameters: {}. Best is trial 0 with value: 0.13983063505203577.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,log,1,0.139831,0.131588,0.157917,0.141482,0.204615,3.954885,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:43:00,597] A new study created in memory with name: no-name-800471d5-3b77-4b62-ab65-f80c2b8a332d


Running optimization for KNN_k=20 with confidence...


[I 2026-02-08 15:54:46,441] Trial 0 finished with value: 0.13983053405120946 and parameters: {'conf_alpha': 54.16856872618617}. Best is trial 0 with value: 0.13983053405120946.
[I 2026-02-08 15:55:01,101] Trial 8 finished with value: 0.13983034891485416 and parameters: {'conf_alpha': 57.80900382957736}. Best is trial 0 with value: 0.13983053405120946.
[I 2026-02-08 15:55:03,373] Trial 5 finished with value: 0.1398287643600303 and parameters: {'conf_alpha': 26.001734582863545}. Best is trial 0 with value: 0.13983053405120946.
[I 2026-02-08 15:55:05,294] Trial 14 finished with value: 0.13982893589404005 and parameters: {'conf_alpha': 137.75796062468285}. Best is trial 0 with value: 0.13983053405120946.
[I 2026-02-08 15:55:05,520] Trial 4 finished with value: 0.139828953305032 and parameters: {'conf_alpha': 97.2823822855681}. Best is trial 0 with value: 0.13983053405120946.
[I 2026-02-08 15:55:05,925] Trial 1 finished with value: 0.13982857585777317 and parameters: {'conf_alpha': 118.1530

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,confidence,20,0.139855,0.131616,0.157951,0.141507,0.204655,3.953251,{'conf_alpha': 2.4608035937985364}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:59:51,052] A new study created in memory with name: no-name-713b5d71-4f76-4092-a3b2-9a50e0273e66


Running optimization for KNN_k=20 with power...


[I 2026-02-08 16:11:36,444] Trial 5 finished with value: 0.14034922389626317 and parameters: {'power_p': 0.1744640710799922}. Best is trial 5 with value: 0.14034922389626317.
[I 2026-02-08 16:11:46,515] Trial 2 finished with value: 0.1378023106240934 and parameters: {'power_p': 0.33294066069403605}. Best is trial 5 with value: 0.14034922389626317.
[I 2026-02-08 16:11:48,307] Trial 1 finished with value: 0.13652505511764745 and parameters: {'power_p': 0.3780458348534691}. Best is trial 5 with value: 0.14034922389626317.
[I 2026-02-08 16:11:49,368] Trial 12 finished with value: 0.13600342142697625 and parameters: {'power_p': 0.3945268345720455}. Best is trial 5 with value: 0.14034922389626317.
[I 2026-02-08 16:11:49,724] Trial 3 finished with value: 0.13186130968753132 and parameters: {'power_p': 0.4754249712468033}. Best is trial 5 with value: 0.14034922389626317.
[I 2026-02-08 16:11:50,899] Trial 10 finished with value: 0.13530332481557034 and parameters: {'power_p': 0.4115231914786844

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power,20,0.140349,0.131951,0.15836,0.141837,0.205156,3.939819,{'power_p': 0.1744640710799922}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:16:49,485] A new study created in memory with name: no-name-1da399e1-80d3-447f-858a-fa5b6ff54d6d


Running optimization for KNN_k=20 with normalized...


[I 2026-02-08 16:17:23,386] Trial 0 finished with value: 0.12663819492114045 and parameters: {}. Best is trial 0 with value: 0.12663819492114045.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,normalized,1,0.126638,0.123498,0.146723,0.131563,0.187229,3.938752,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:19:30,257] A new study created in memory with name: no-name-6fd22ac7-eba9-4c74-ac6c-019df0bae308


Running optimization for KNN_k=20 with pmi...


[I 2026-02-08 16:20:03,484] Trial 0 finished with value: 0.14177147690063713 and parameters: {}. Best is trial 0 with value: 0.14177147690063713.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,pmi,1,0.141771,0.133898,0.160193,0.142863,0.205846,3.933298,{}


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:22:09,571] A new study created in memory with name: no-name-a6c682f2-6469-4f87-942e-190078780494


Running optimization for KNN_k=20 with robust_user_centric...


[I 2026-02-08 16:33:30,398] Trial 7 finished with value: 0.14123867248426783 and parameters: {'scale_factor': 9.670932577665486}. Best is trial 7 with value: 0.14123867248426783.
[I 2026-02-08 16:33:39,426] Trial 0 finished with value: 0.14123867248426783 and parameters: {'scale_factor': 2.85699548096334}. Best is trial 7 with value: 0.14123867248426783.
[I 2026-02-08 16:33:42,433] Trial 14 finished with value: 0.14123867248426783 and parameters: {'scale_factor': 6.522003679146763}. Best is trial 7 with value: 0.14123867248426783.
[I 2026-02-08 16:33:52,496] Trial 5 finished with value: 0.14123867248426783 and parameters: {'scale_factor': 4.846854991747204}. Best is trial 7 with value: 0.14123867248426783.
[I 2026-02-08 16:34:02,769] Trial 10 finished with value: 0.14123867248426783 and parameters: {'scale_factor': 7.261142304008497}. Best is trial 7 with value: 0.14123867248426783.
[I 2026-02-08 16:34:03,494] Trial 6 finished with value: 0.14123867248426783 and parameters: {'scale_fac

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric,20,0.141239,0.133655,0.159824,0.142834,0.205546,3.945003,{'scale_factor': 9.670932577665486}


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:39:00,711] A new study created in memory with name: no-name-e1425cc5-ac3c-4c06-8c63-20cd09de1751


Running optimization for KNN_k=20 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
  X.data = X.data / sqrt(bincount(X.row, X.data**2))[X.row]
[I 2026-02-08 16:50:27,106] Trial 14 finished with value: 0.14140699182371433 and parameters: {'lower_q': 31.141799940507532, 'upper_q': 88.66575743325232}. Best is trial 14 with value: 0.14140699182371433.
[I 2026-02-08 16:50:27,531] Trial 2 finished with value: 0.14143972305138172 and parameters: {'lower_q': 11.628513909476936, 'upper_q': 85.75305291070944}. Best is trial 2 with value: 0.14143972305138172.
[I 2026-02-08 16:50:34,953] Trial 0 finished with value: 0.14004037758438187 and parameters: {'lower_q': 33.45198076414229, 'upper_q': 60.59286726031687}. Best is trial 2 with value: 0.14143972305138172.
[I 2026-02-08 16:50:46,307] Trial 8 finished with value: 0.13978497028199707 and parameters: {'lower_q': 37.18360894420195, 'upper_q': 60.272240199787674}. Best is trial 2 with value: 0.14143972305138172.
[I 2026-02-08 16:50:46,599] Trial 6 finished with value: 0.1410781391578431 and

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,robust_user_centric_weight_v2,20,0.141447,0.133301,0.159698,0.142709,0.205955,3.931705,"{'lower_q': 16.371950743562238, 'upper_q': 93...."


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:55:45,164] A new study created in memory with name: no-name-33457e4f-1852-4fb0-bc82-a062d571b2e7


Running optimization for KNN_k=20 with sigmoid_propensity...


[I 2026-02-08 17:07:11,874] Trial 0 finished with value: 0.1350933432626162 and parameters: {'p': 3.176926429394425, 'beta': 0.5606464087302689}. Best is trial 0 with value: 0.1350933432626162.
[I 2026-02-08 17:07:30,690] Trial 3 finished with value: 0.14055722523115424 and parameters: {'p': 0.23486926907318534, 'beta': 0.7029733699531714}. Best is trial 3 with value: 0.14055722523115424.
[I 2026-02-08 17:07:32,411] Trial 9 finished with value: 0.1356641482302921 and parameters: {'p': 2.253042426904779, 'beta': 0.8071647019971318}. Best is trial 3 with value: 0.14055722523115424.
[I 2026-02-08 17:07:34,502] Trial 12 finished with value: 0.13549372787475944 and parameters: {'p': 2.463364217467416, 'beta': 0.579377529462625}. Best is trial 3 with value: 0.14055722523115424.
[I 2026-02-08 17:07:42,178] Trial 1 finished with value: 0.1354533600414651 and parameters: {'p': 2.5184893229046805, 'beta': 0.8057820716188214}. Best is trial 3 with value: 0.14055722523115424.
[I 2026-02-08 17:07:4

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,sigmoid_propensity,20,0.140671,0.131925,0.158438,0.141866,0.205342,3.93066,"{'p': 0.10746634456700904, 'beta': 0.956389357..."


In [18]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=20", n_trials=20, output_dir=results_folder)

[I 2026-02-08 17:12:38,090] A new study created in memory with name: no-name-cd1c88ed-d8f8-4b5f-8977-c84e08cc73d8


Running optimization for KNN_k=20 with power_lift...


[I 2026-02-08 17:23:47,527] Trial 6 finished with value: 0.14110993341275893 and parameters: {'p': 0.23125434477018258}. Best is trial 6 with value: 0.14110993341275893.
[I 2026-02-08 17:24:00,401] Trial 3 finished with value: 0.1408119379826941 and parameters: {'p': 0.3798652918927532}. Best is trial 6 with value: 0.14110993341275893.
[I 2026-02-08 17:24:18,421] Trial 4 finished with value: 0.13694456634009625 and parameters: {'p': 0.6132121625549513}. Best is trial 6 with value: 0.14110993341275893.
[I 2026-02-08 17:24:19,820] Trial 13 finished with value: 0.11498969388002128 and parameters: {'p': 0.9276974554365854}. Best is trial 6 with value: 0.14110993341275893.
[I 2026-02-08 17:24:22,345] Trial 8 finished with value: 0.12368333906866544 and parameters: {'p': 0.8479701361765336}. Best is trial 6 with value: 0.14110993341275893.
[I 2026-02-08 17:24:31,755] Trial 2 finished with value: 0.13993482138280075 and parameters: {'p': 0.4777204129775424}. Best is trial 6 with value: 0.1411

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=20,power_lift,20,0.141126,0.132959,0.159349,0.142517,0.205718,3.918554,{'p': 0.2222570062751384}


In [6]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 17:37:05,670] A new study created in memory with name: no-name-3771e6de-02de-4335-a213-a530aa45aa32


Running optimization for KNN_k=100 with no_weighting...


[I 2026-02-08 17:38:02,667] Trial 0 finished with value: 0.07758088530854262 and parameters: {}. Best is trial 0 with value: 0.07758088530854262.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,no_weighting,1,0.077581,0.078729,0.096624,0.085922,0.128789,5.186037,{}


In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 17:41:46,531] A new study created in memory with name: no-name-513bb89c-588e-4c84-9f9f-09ba03f95c97


Running optimization for KNN_k=100 with bm25...


[I 2026-02-08 17:42:40,880] Trial 0 finished with value: 0.15340658907720658 and parameters: {'bm25_k1': 374.60266483547775, 'bm25_b': 0.9507143064099162}. Best is trial 0 with value: 0.15340658907720658.
[I 2026-02-08 17:43:35,925] Trial 1 finished with value: 0.15076125759389156 and parameters: {'bm25_k1': 732.0207424172239, 'bm25_b': 0.5986584841970366}. Best is trial 0 with value: 0.15340658907720658.
[I 2026-02-08 17:44:29,621] Trial 2 finished with value: 0.1492539331006084 and parameters: {'bm25_k1': 156.10303857839227, 'bm25_b': 0.15599452033620265}. Best is trial 0 with value: 0.15340658907720658.
[I 2026-02-08 17:45:23,048] Trial 3 finished with value: 0.15138847645238257 and parameters: {'bm25_k1': 58.17780380698264, 'bm25_b': 0.8661761457749352}. Best is trial 0 with value: 0.15340658907720658.
[I 2026-02-08 17:46:17,544] Trial 4 finished with value: 0.1516526561785031 and parameters: {'bm25_k1': 601.1549002420345, 'bm25_b': 0.7080725777960455}. Best is trial 0 with value: 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,bm25,20,0.153775,0.14579,0.17287,0.155463,0.220398,5.143113,"{'bm25_k1': 334.8616967056139, 'bm25_b': 0.997..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)


[I 2026-02-08 18:03:57,146] A new study created in memory with name: no-name-53d1bc26-6de8-4a98-8844-fceb50b61810


Running optimization for KNN_k=100 with tfidf...


[I 2026-02-08 18:04:51,189] Trial 0 finished with value: 0.14737895840507367 and parameters: {}. Best is trial 0 with value: 0.14737895840507367.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,tfidf,1,0.147379,0.140411,0.166733,0.150136,0.213241,5.129857,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 18:08:19,782] A new study created in memory with name: no-name-18bc6d33-bf23-4293-a730-2229159935ca


Running optimization for KNN_k=100 with log...


[I 2026-02-08 18:09:11,985] Trial 0 finished with value: 0.15042403688713454 and parameters: {}. Best is trial 0 with value: 0.15042403688713454.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,log,1,0.150424,0.141591,0.168363,0.151866,0.21604,5.116614,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 18:12:32,014] A new study created in memory with name: no-name-a6aacde6-cdbf-4e00-9091-651f75138ba4


Running optimization for KNN_k=100 with confidence...


[I 2026-02-08 18:25:49,557] Trial 11 finished with value: 0.15042705608449175 and parameters: {'conf_alpha': 44.856713918386134}. Best is trial 11 with value: 0.15042705608449175.
[I 2026-02-08 18:25:49,715] Trial 0 finished with value: 0.15042531356564387 and parameters: {'conf_alpha': 131.03918960140268}. Best is trial 11 with value: 0.15042705608449175.
[I 2026-02-08 18:25:49,782] Trial 4 finished with value: 0.15042531356564387 and parameters: {'conf_alpha': 131.40615627200808}. Best is trial 11 with value: 0.15042705608449175.
[I 2026-02-08 18:25:49,834] Trial 1 finished with value: 0.15042453693304841 and parameters: {'conf_alpha': 63.35864425510818}. Best is trial 11 with value: 0.15042705608449175.
[I 2026-02-08 18:25:49,924] Trial 7 finished with value: 0.1504272518031876 and parameters: {'conf_alpha': 17.809098322178766}. Best is trial 7 with value: 0.1504272518031876.
[I 2026-02-08 18:25:50,211] Trial 14 finished with value: 0.15042997901666386 and parameters: {'conf_alpha':

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,confidence,20,0.150445,0.141578,0.16835,0.15186,0.216026,5.137966,{'conf_alpha': 3.4705476625545497}


In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 18:51:52,816] A new study created in memory with name: no-name-6a99df57-3c33-45cc-8e74-d575a7f1615a


Running optimization for KNN_k=100 with normalized...


[I 2026-02-08 18:52:47,124] Trial 0 finished with value: 0.14827831649538598 and parameters: {}. Best is trial 0 with value: 0.14827831649538598.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,normalized,1,0.148278,0.142499,0.169008,0.152028,0.2156,5.067103,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 18:56:18,537] A new study created in memory with name: no-name-db5d7456-f013-4fee-902e-7a34db2094bb


Running optimization for KNN_k=100 with pmi...


[I 2026-02-08 18:57:10,458] Trial 0 finished with value: 0.1538867715533792 and parameters: {}. Best is trial 0 with value: 0.1538867715533792.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,pmi,1,0.153887,0.14597,0.172831,0.155334,0.219784,5.093644,{}


In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

In [6]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 19:41:18,894] A new study created in memory with name: no-name-5263a028-3716-4509-9ec0-e40fb2d1a628


Running optimization for KNN_k=100 with sigmoid_propensity...


[I 2026-02-08 19:54:34,768] Trial 4 finished with value: 0.15055162225074853 and parameters: {'p': 0.2569626793794393, 'beta': 0.9474990766919448}. Best is trial 4 with value: 0.15055162225074853.
[I 2026-02-08 19:54:36,587] Trial 11 finished with value: 0.14683697481816727 and parameters: {'p': 2.3527079916551643, 'beta': 0.024440585230778522}. Best is trial 4 with value: 0.15055162225074853.
[I 2026-02-08 19:54:37,617] Trial 15 finished with value: 0.14883185061247212 and parameters: {'p': 1.0294073666704442, 'beta': 0.971578026462169}. Best is trial 4 with value: 0.15055162225074853.
[I 2026-02-08 19:54:37,740] Trial 13 finished with value: 0.14650195229601348 and parameters: {'p': 2.7634961603168877, 'beta': 0.6364543759897934}. Best is trial 4 with value: 0.15055162225074853.
[I 2026-02-08 19:54:37,908] Trial 2 finished with value: 0.1478664189447198 and parameters: {'p': 1.5006378611326465, 'beta': 0.11238523782695264}. Best is trial 4 with value: 0.15055162225074853.
[I 2026-02-

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,sigmoid_propensity,20,0.150552,0.141705,0.168453,0.151952,0.216151,5.124545,"{'p': 0.2569626793794393, 'beta': 0.9474990766..."


In [7]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="KNN_k=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 20:00:51,839] A new study created in memory with name: no-name-7f6053f6-2443-4d22-9e50-ccd1a0c26465


Running optimization for KNN_k=100 with power_lift...


[I 2026-02-08 20:13:38,015] Trial 13 finished with value: 0.15313939436119925 and parameters: {'p': 0.34771148101691296}. Best is trial 13 with value: 0.15313939436119925.
[I 2026-02-08 20:13:52,272] Trial 10 finished with value: 0.15137803705384506 and parameters: {'p': 0.12552298499386771}. Best is trial 13 with value: 0.15313939436119925.
[I 2026-02-08 20:13:56,498] Trial 8 finished with value: 0.15244584230414904 and parameters: {'p': 0.24977348325347373}. Best is trial 13 with value: 0.15313939436119925.
[I 2026-02-08 20:13:56,842] Trial 1 finished with value: 0.15253073218854007 and parameters: {'p': 0.2624251062312708}. Best is trial 13 with value: 0.15313939436119925.
[I 2026-02-08 20:13:56,889] Trial 12 finished with value: 0.15252941722786498 and parameters: {'p': 0.2625082018330802}. Best is trial 13 with value: 0.15313939436119925.
[I 2026-02-08 20:13:57,079] Trial 0 finished with value: 0.15242052409782988 and parameters: {'p': 0.2471248169495631}. Best is trial 13 with va

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,power_lift,20,0.153139,0.144863,0.171862,0.154642,0.219409,5.111422,{'p': 0.34771148101691296}


In [8]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,KNN_k=100,bm25,20,0.153775,0.14579,0.17287,0.155463,0.220398,5.143113,"{'bm25_k1': 334.8616967056139, 'bm25_b': 0.997..."
0,KNN_k=100,pmi,1,0.153887,0.14597,0.172831,0.155334,0.219784,5.093644,{}
0,KNN_k=100,robust_user_centric_weight_v2,20,0.153671,0.145439,0.17255,0.15542,0.220454,5.087087,"{'lower_q': 23.018757590675904, 'upper_q': 67...."
0,KNN_k=100,robust_user_centric,20,0.153605,0.145302,0.172379,0.155242,0.220212,5.064834,{'scale_factor': 7.238040786011438}
0,KNN_k=100,power_lift,20,0.153139,0.144863,0.171862,0.154642,0.219409,5.111422,{'p': 0.34771148101691296}
0,KNN_k=100,normalized,1,0.148278,0.142499,0.169008,0.152028,0.2156,5.067103,{}
0,KNN_k=100,power,20,0.150817,0.141993,0.168782,0.152198,0.216435,5.08411,{'power_p': 0.15519526328023617}
0,KNN_k=100,sigmoid_propensity,20,0.150552,0.141705,0.168453,0.151952,0.216151,5.124545,"{'p': 0.2569626793794393, 'beta': 0.9474990766..."
0,KNN_k=100,log,1,0.150424,0.141591,0.168363,0.151866,0.21604,5.116614,{}
0,KNN_k=100,confidence,20,0.150445,0.141578,0.16835,0.15186,0.216026,5.137966,{'conf_alpha': 3.4705476625545497}
