In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, precision_at_k, ndcg_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="100K"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(943, 1682, 100000)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (943, 1682), Val Shape: (943, 1682), Test Shape: (943, 1682)


In [None]:
results_folder = "results/movielens_100k"
results_filename = "movielens_100k_experiment_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)
        
        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ndcg_at_k(model, train_mat, val_mat, K=20, show_progress=False)

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    test_ndcg_10 = ndcg_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_precision_10 = precision_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_ndcg_20 = ndcg_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)
    test_precision_20 = precision_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": test_ndcg_10,
        "Test NDCG@20": test_ndcg_20,
        "Test Precision@10": test_precision_10,
        "Test Precision@20": test_precision_20,
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [None]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

# Execution of all experiments
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-29 21:35:49,832] A new study created in memory with name: no-name-768b3392-56c4-480e-897d-8c5116b7b2c9


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-01-29 21:35:50,087] Trial 0 finished with value: 0.27244925287930244 and parameters: {}. Best is trial 0 with value: 0.27244925287930244.
[I 2026-01-29 21:35:50,128] A new study created in memory with name: no-name-f5db5fc9-e47a-4c01-a08c-57c7816623e8
[I 2026-01-29 21:35:50,249] Trial 0 finished with value: 0.2550847356281346 and parameters: {'bm25_k1': 392.9630961200425, 'bm25_b': 0.20702281961533286}. Best is trial 0 with value: 0.2550847356281346.
[I 2026-01-29 21:35:50,257] Trial 1 finished with value: 0.25914799539540434 and parameters: {'bm25_k1': 18.49902926772266, 'bm25_b': 0.7483218884291787}. Best is trial 1 with value: 0.25914799539540434.


Running optimization for ALS_factors=10 with bm25...


[I 2026-01-29 21:35:50,426] Trial 3 finished with value: 0.25441476754844666 and parameters: {'bm25_k1': 997.9611788813592, 'bm25_b': 0.6954266746749062}. Best is trial 1 with value: 0.25914799539540434.
[I 2026-01-29 21:35:50,441] Trial 2 finished with value: 0.2551876705787217 and parameters: {'bm25_k1': 382.11347123530504, 'bm25_b': 0.5964279163182016}. Best is trial 1 with value: 0.25914799539540434.
[I 2026-01-29 21:35:50,608] Trial 5 finished with value: 0.25478032825910524 and parameters: {'bm25_k1': 360.1119500875812, 'bm25_b': 0.48011264799868814}. Best is trial 1 with value: 0.25914799539540434.
[I 2026-01-29 21:35:50,633] Trial 4 finished with value: 0.254138725201777 and parameters: {'bm25_k1': 348.91783013800216, 'bm25_b': 0.2657594477163353}. Best is trial 1 with value: 0.25914799539540434.
[I 2026-01-29 21:35:50,727] Trial 6 finished with value: 0.25069574461272587 and parameters: {'bm25_k1': 687.0021561733502, 'bm25_b': 0.03312977932847816}. Best is trial 1 with value: 

Running optimization for ALS_factors=10 with tfidf...
Running optimization for ALS_factors=10 with log...


[I 2026-01-29 21:35:52,386] Trial 0 finished with value: 0.2748564171886328 and parameters: {}. Best is trial 0 with value: 0.2748564171886328.
[I 2026-01-29 21:35:52,472] A new study created in memory with name: no-name-37ff4c44-8ab5-46f6-907a-0bd78ee5228b


Running optimization for ALS_factors=10 with confidence...


[I 2026-01-29 21:35:52,679] Trial 3 finished with value: 0.09415663429745394 and parameters: {'conf_alpha': 61.53015979933998}. Best is trial 3 with value: 0.09415663429745394.
[I 2026-01-29 21:35:52,679] Trial 0 finished with value: 0.10500642340775568 and parameters: {'conf_alpha': 47.88322692730908}. Best is trial 0 with value: 0.10500642340775568.
[I 2026-01-29 21:35:52,852] Trial 2 finished with value: 0.24532508831293245 and parameters: {'conf_alpha': 2.420960109474791}. Best is trial 2 with value: 0.24532508831293245.
[I 2026-01-29 21:35:52,863] Trial 1 finished with value: 0.10052639050218767 and parameters: {'conf_alpha': 52.63691864172937}. Best is trial 2 with value: 0.24532508831293245.
[I 2026-01-29 21:35:53,047] Trial 4 finished with value: 0.06900719795424501 and parameters: {'conf_alpha': 124.52046796786814}. Best is trial 2 with value: 0.24532508831293245.
[I 2026-01-29 21:35:53,184] Trial 5 finished with value: 0.13148626914410907 and parameters: {'conf_alpha': 30.573

Running optimization for ALS_factors=10 with power...


[I 2026-01-29 21:35:54,794] Trial 1 finished with value: 0.2720824512833477 and parameters: {'power_p': 1.0900200199655208}. Best is trial 0 with value: 0.27596859825659364.
[I 2026-01-29 21:35:54,804] Trial 2 finished with value: 0.26905564381191116 and parameters: {'power_p': 1.2959640146802678}. Best is trial 0 with value: 0.27596859825659364.
[I 2026-01-29 21:35:54,813] Trial 3 finished with value: 0.2754136571799179 and parameters: {'power_p': 0.6659779423757933}. Best is trial 0 with value: 0.27596859825659364.
[I 2026-01-29 21:35:54,926] Trial 4 finished with value: 0.2687064054794161 and parameters: {'power_p': 1.386592060858165}. Best is trial 0 with value: 0.27596859825659364.
[I 2026-01-29 21:35:55,110] Trial 5 finished with value: 0.27653881536448477 and parameters: {'power_p': 0.5876105555196055}. Best is trial 5 with value: 0.27653881536448477.
[I 2026-01-29 21:35:55,117] Trial 7 finished with value: 0.2742886051719168 and parameters: {'power_p': 0.8423687900012309}. Best

Running optimization for ALS_factors=10 with normalized...
Running optimization for ALS_factors=10 with pmi...


[I 2026-01-29 21:35:56,746] A new study created in memory with name: no-name-32907444-5b40-4596-89b9-acdb243f8810


Running optimization for ALS_factors=10 with robust_user_centric...


[I 2026-01-29 21:35:57,816] Trial 0 finished with value: 0.2712777655851746 and parameters: {'scale_factor': 2.6444562181974063}. Best is trial 0 with value: 0.2712777655851746.
[I 2026-01-29 21:35:57,858] Trial 3 finished with value: 0.26281002642050155 and parameters: {'scale_factor': 1.1996727976998802}. Best is trial 0 with value: 0.2712777655851746.
[I 2026-01-29 21:35:58,014] Trial 2 finished with value: 0.2680459527436834 and parameters: {'scale_factor': 5.6423885014627375}. Best is trial 0 with value: 0.2712777655851746.
[I 2026-01-29 21:35:58,027] Trial 1 finished with value: 0.2659875698886756 and parameters: {'scale_factor': 8.014799310281573}. Best is trial 0 with value: 0.2712777655851746.
[I 2026-01-29 21:35:59,046] Trial 5 finished with value: 0.27123550961241494 and parameters: {'scale_factor': 2.6862717026586673}. Best is trial 0 with value: 0.2712777655851746.
[I 2026-01-29 21:35:59,057] Trial 7 finished with value: 0.2687460340676553 and parameters: {'scale_factor': 

Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


[I 2026-01-29 21:36:04,126] Trial 2 finished with value: 0.26064123626996744 and parameters: {'lower_q': 36.8932476986903, 'upper_q': 78.99836030844055}. Best is trial 2 with value: 0.26064123626996744.
[I 2026-01-29 21:36:04,143] Trial 1 finished with value: 0.2602171763377271 and parameters: {'lower_q': 14.651865144590257, 'upper_q': 67.67631586053496}. Best is trial 2 with value: 0.26064123626996744.
[I 2026-01-29 21:36:04,237] Trial 0 finished with value: 0.26187782354462424 and parameters: {'lower_q': 14.514990360593224, 'upper_q': 78.27859918211037}. Best is trial 0 with value: 0.26187782354462424.
[I 2026-01-29 21:36:04,345] Trial 3 finished with value: 0.2617475194277545 and parameters: {'lower_q': 15.336870119033525, 'upper_q': 75.9781621423968}. Best is trial 0 with value: 0.26187782354462424.
[I 2026-01-29 21:36:05,287] Trial 5 finished with value: 0.25916354264691255 and parameters: {'lower_q': 9.812835894182669, 'upper_q': 62.62008641796305}. Best is trial 0 with value: 0.

Running optimization for ALS_factors=10 with sigmoid_propensity...


[I 2026-01-29 21:36:09,385] Trial 5 finished with value: 0.2712850512090783 and parameters: {'p': 0.6712183816042632, 'beta': 0.5680663513485265}. Best is trial 1 with value: 0.27200828800145277.
[I 2026-01-29 21:36:09,576] Trial 4 finished with value: 0.2679033708916786 and parameters: {'p': 4.62774348222985, 'beta': 0.7233697712987768}. Best is trial 1 with value: 0.27200828800145277.
[I 2026-01-29 21:36:09,584] Trial 7 finished with value: 0.2680582659428842 and parameters: {'p': 2.4808171058513713, 'beta': 0.2782292085744993}. Best is trial 1 with value: 0.27200828800145277.
[I 2026-01-29 21:36:09,592] Trial 6 finished with value: 0.2722512431790095 and parameters: {'p': 2.5426792784636576, 'beta': 0.5896496071314119}. Best is trial 6 with value: 0.2722512431790095.
[I 2026-01-29 21:36:09,709] Trial 8 finished with value: 0.26290187607776305 and parameters: {'p': 4.80996985007589, 'beta': 0.06796408065284398}. Best is trial 6 with value: 0.2722512431790095.
[I 2026-01-29 21:36:09,8

Running optimization for ALS_factors=10 with power_lift...


[I 2026-01-29 21:36:11,164] Trial 1 finished with value: 0.27456912179253123 and parameters: {'p': 0.13902328182015336}. Best is trial 1 with value: 0.27456912179253123.
[I 2026-01-29 21:36:11,173] Trial 3 finished with value: 0.275096944280977 and parameters: {'p': 0.20298154336881027}. Best is trial 3 with value: 0.275096944280977.
[I 2026-01-29 21:36:11,341] Trial 5 finished with value: 0.2569837338043629 and parameters: {'p': 0.6177303041200047}. Best is trial 3 with value: 0.275096944280977.
[I 2026-01-29 21:36:11,355] Trial 4 finished with value: 0.16803302303037174 and parameters: {'p': 1.1233494065746912}. Best is trial 3 with value: 0.275096944280977.
[I 2026-01-29 21:36:11,521] Trial 6 finished with value: 0.12735551742295206 and parameters: {'p': 1.337033567573489}. Best is trial 3 with value: 0.275096944280977.
[I 2026-01-29 21:36:11,538] Trial 7 finished with value: 0.2582476670148731 and parameters: {'p': 0.6071344452533265}. Best is trial 3 with value: 0.275096944280977.

Running optimization for ALS_factors=100 with no_weighting...
Running optimization for ALS_factors=100 with bm25...


[I 2026-01-29 21:36:13,210] Trial 2 finished with value: 0.1962296204455114 and parameters: {'bm25_k1': 177.51490727488851, 'bm25_b': 0.895770186073001}. Best is trial 2 with value: 0.1962296204455114.
[I 2026-01-29 21:36:13,217] Trial 3 finished with value: 0.20410427851102889 and parameters: {'bm25_k1': 367.16800832826516, 'bm25_b': 0.42004196262833027}. Best is trial 3 with value: 0.20410427851102889.
[I 2026-01-29 21:36:13,227] Trial 0 finished with value: 0.20487104388630745 and parameters: {'bm25_k1': 250.5554320950748, 'bm25_b': 0.3791608995473471}. Best is trial 0 with value: 0.20487104388630745.
[I 2026-01-29 21:36:13,236] Trial 1 finished with value: 0.20503957534210954 and parameters: {'bm25_k1': 929.0896949855402, 'bm25_b': 0.29348730055408645}. Best is trial 1 with value: 0.20503957534210954.
[I 2026-01-29 21:36:13,492] Trial 4 finished with value: 0.19964041130355376 and parameters: {'bm25_k1': 928.3371651975382, 'bm25_b': 0.5902475768187397}. Best is trial 1 with value: 

Running optimization for ALS_factors=100 with tfidf...


[I 2026-01-29 21:36:15,136] Trial 0 finished with value: 0.21583692741900962 and parameters: {}. Best is trial 0 with value: 0.21583692741900962.
[I 2026-01-29 21:36:15,224] A new study created in memory with name: no-name-a033cd6e-c96a-4e60-a0f3-5dae3c02139e
[I 2026-01-29 21:36:15,304] Trial 0 finished with value: 0.21607614798573896 and parameters: {}. Best is trial 0 with value: 0.21607614798573896.
[I 2026-01-29 21:36:15,392] A new study created in memory with name: no-name-e11b2f4a-9918-4e48-8047-52dbb0371a09


Running optimization for ALS_factors=100 with log...
Running optimization for ALS_factors=100 with confidence...


[I 2026-01-29 21:36:15,650] Trial 0 finished with value: 0.16508622623033764 and parameters: {'conf_alpha': 63.3598069974504}. Best is trial 0 with value: 0.16508622623033764.
[I 2026-01-29 21:36:15,658] Trial 1 finished with value: 0.1468716472992936 and parameters: {'conf_alpha': 103.28303202469351}. Best is trial 0 with value: 0.16508622623033764.
[I 2026-01-29 21:36:15,669] Trial 2 finished with value: 0.13896739723668375 and parameters: {'conf_alpha': 133.81888500693643}. Best is trial 0 with value: 0.16508622623033764.
[I 2026-01-29 21:36:15,839] Trial 3 finished with value: 0.16836836160238874 and parameters: {'conf_alpha': 59.25325476044488}. Best is trial 3 with value: 0.16836836160238874.
[I 2026-01-29 21:36:16,077] Trial 5 finished with value: 0.15491059531207443 and parameters: {'conf_alpha': 87.10004008777787}. Best is trial 3 with value: 0.16836836160238874.
[I 2026-01-29 21:36:16,087] Trial 4 finished with value: 0.137698770496789 and parameters: {'conf_alpha': 137.44829

Running optimization for ALS_factors=100 with power...


[I 2026-01-29 21:36:18,077] Trial 0 finished with value: 0.21692548805057385 and parameters: {'power_p': 1.0227201923681022}. Best is trial 0 with value: 0.21692548805057385.
[I 2026-01-29 21:36:18,258] Trial 2 finished with value: 0.21762481316697546 and parameters: {'power_p': 1.2734523622342635}. Best is trial 2 with value: 0.21762481316697546.
[I 2026-01-29 21:36:18,273] Trial 1 finished with value: 0.2167945661889386 and parameters: {'power_p': 0.4506954565129444}. Best is trial 2 with value: 0.21762481316697546.
[I 2026-01-29 21:36:18,288] Trial 3 finished with value: 0.21386561831521178 and parameters: {'power_p': 0.2436862302325138}. Best is trial 2 with value: 0.21762481316697546.
[I 2026-01-29 21:36:18,559] Trial 4 finished with value: 0.21625676995017237 and parameters: {'power_p': 0.6092999040500637}. Best is trial 2 with value: 0.21762481316697546.
[I 2026-01-29 21:36:18,582] Trial 7 finished with value: 0.21610147325886578 and parameters: {'power_p': 0.51425691048556}. Be

Running optimization for ALS_factors=100 with normalized...
Running optimization for ALS_factors=100 with pmi...


[I 2026-01-29 21:36:20,342] Trial 0 finished with value: 0.19111085700305178 and parameters: {}. Best is trial 0 with value: 0.19111085700305178.
[I 2026-01-29 21:36:20,431] A new study created in memory with name: no-name-6d923ad1-7c01-48f1-aa90-4a55253a4c57


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-01-29 21:36:21,562] Trial 3 finished with value: 0.20431562959598393 and parameters: {'scale_factor': 2.008183389769658}. Best is trial 3 with value: 0.20431562959598393.
[I 2026-01-29 21:36:21,570] Trial 1 finished with value: 0.21171537592866346 and parameters: {'scale_factor': 4.418970079933107}. Best is trial 1 with value: 0.21171537592866346.
[I 2026-01-29 21:36:21,680] Trial 2 finished with value: 0.18875410790747546 and parameters: {'scale_factor': 0.8736277486550135}. Best is trial 1 with value: 0.21171537592866346.
[I 2026-01-29 21:36:21,857] Trial 0 finished with value: 0.2085470139734176 and parameters: {'scale_factor': 2.394833802702738}. Best is trial 1 with value: 0.21171537592866346.
[I 2026-01-29 21:36:22,953] Trial 4 finished with value: 0.21108261153875335 and parameters: {'scale_factor': 4.895088359609936}. Best is trial 1 with value: 0.21171537592866346.
[I 2026-01-29 21:36:22,972] Trial 5 finished with value: 0.21145486635638536 and parameters: {'scale_fact

Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


[I 2026-01-29 21:36:28,486] Trial 3 finished with value: 0.19590423571447374 and parameters: {'lower_q': 30.069096915779863, 'upper_q': 83.53069777740404}. Best is trial 3 with value: 0.19590423571447374.
[I 2026-01-29 21:36:28,588] Trial 0 finished with value: 0.18670428669399255 and parameters: {'lower_q': 41.35173363165335, 'upper_q': 73.88283518818308}. Best is trial 3 with value: 0.19590423571447374.
[I 2026-01-29 21:36:28,608] Trial 2 finished with value: 0.19883123411864595 and parameters: {'lower_q': 19.523713314443093, 'upper_q': 84.42780094904067}. Best is trial 2 with value: 0.19883123411864595.
[I 2026-01-29 21:36:28,703] Trial 1 finished with value: 0.19056978491848084 and parameters: {'lower_q': 34.65764703263888, 'upper_q': 67.50116677912648}. Best is trial 2 with value: 0.19883123411864595.
[I 2026-01-29 21:36:29,774] Trial 4 finished with value: 0.19253017269186548 and parameters: {'lower_q': 11.825564859246981, 'upper_q': 55.66130916254886}. Best is trial 2 with value

Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-01-29 21:36:34,045] Trial 3 finished with value: 0.21978194131601553 and parameters: {'p': 1.267129304001736, 'beta': 0.9081640847987025}. Best is trial 3 with value: 0.21978194131601553.
[I 2026-01-29 21:36:34,062] Trial 1 finished with value: 0.21629484572171764 and parameters: {'p': 2.337177107420177, 'beta': 0.5505157521025191}. Best is trial 3 with value: 0.21978194131601553.
[I 2026-01-29 21:36:34,071] Trial 0 finished with value: 0.2184135609869795 and parameters: {'p': 1.4472188847245229, 'beta': 0.7282328023555357}. Best is trial 3 with value: 0.21978194131601553.
[I 2026-01-29 21:36:34,072] Trial 2 finished with value: 0.19951102407902513 and parameters: {'p': 1.754583261209572, 'beta': 0.05339150693109784}. Best is trial 3 with value: 0.21978194131601553.
[I 2026-01-29 21:36:34,326] Trial 4 finished with value: 0.2085639456465716 and parameters: {'p': 4.498423599870919, 'beta': 0.5092912850902908}. Best is trial 3 with value: 0.21978194131601553.
[I 2026-01-29 21:36:

Running optimization for ALS_factors=100 with power_lift...


[I 2026-01-29 21:36:35,905] Trial 0 finished with value: 0.19491544361222762 and parameters: {'p': 1.0028780566704256}. Best is trial 0 with value: 0.19491544361222762.
[I 2026-01-29 21:36:36,013] Trial 1 finished with value: 0.2124158633050557 and parameters: {'p': 0.253216302866053}. Best is trial 1 with value: 0.2124158633050557.
[I 2026-01-29 21:36:36,121] Trial 2 finished with value: 0.20738866968265482 and parameters: {'p': 0.4657381146829297}. Best is trial 1 with value: 0.2124158633050557.
[I 2026-01-29 21:36:36,131] Trial 3 finished with value: 0.19555704400855548 and parameters: {'p': 1.070329076645179}. Best is trial 1 with value: 0.2124158633050557.
[I 2026-01-29 21:36:36,375] Trial 5 finished with value: 0.18499684180119594 and parameters: {'p': 1.451454831506804}. Best is trial 1 with value: 0.2124158633050557.
[I 2026-01-29 21:36:36,478] Trial 4 finished with value: 0.19525502772577927 and parameters: {'p': 1.159018357297566}. Best is trial 1 with value: 0.21241586330505

Unnamed: 0,Algorithm,Strategy,Best Val NDCG@20,Test NDCG@20,Test Precision@20,Final Train Time (s),Best Params
11,ALS_factors=10,power_lift,0.275097,0.339081,0.373401,0.029398,{'p': 0.20298154336881027}
7,ALS_factors=10,pmi,0.265106,0.332565,0.358479,0.03041,{}
5,ALS_factors=10,power,0.276861,0.331789,0.372335,0.030452,{'power_p': 0.5504693184151779}
3,ALS_factors=10,log,0.274856,0.330926,0.370914,0.032584,{}
10,ALS_factors=10,sigmoid_propensity,0.273607,0.330089,0.370914,0.03045,"{'p': 1.40152528826798, 'beta': 0.568393012901..."
0,ALS_factors=10,no_weighting,0.272449,0.329265,0.36748,0.030377,{}
8,ALS_factors=10,robust_user_centric,0.272076,0.327652,0.367598,0.030441,{'scale_factor': 2.8983133438111786}
1,ALS_factors=10,bm25,0.260228,0.324415,0.358835,0.034649,"{'bm25_k1': 10.924518246974912, 'bm25_b': 0.47..."
2,ALS_factors=10,tfidf,0.258425,0.322167,0.356111,0.033605,{}
4,ALS_factors=10,confidence,0.260652,0.321578,0.355045,0.029398,{'conf_alpha': 1.1958441973755038}


In [None]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results