In [11]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, precision_at_k, ndcg_at_k

import cornac


In [12]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [13]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="20M"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(138493, 26744, 20000263)

In [18]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (138493, 26744), Val Shape: (138493, 26744), Test Shape: (138493, 26744)


In [None]:
results_folder = "results/movielens_20m"
results_filename = "movielens_20m_experiment_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)
        
        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ndcg_at_k(model, train_mat, val_mat, K=20, show_progress=False)

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    test_ndcg_10 = ndcg_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_precision_10 = precision_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_ndcg_20 = ndcg_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)
    test_precision_20 = precision_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": test_ndcg_10,
        "Test NDCG@20": test_ndcg_20,
        "Test Precision@10": test_precision_10,
        "Test Precision@20": test_precision_20,
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [None]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

# Execution of all experiments
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-29 21:47:01,417] A new study created in memory with name: no-name-95fa011a-625e-4a08-b999-5260cef9b94e


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-01-29 21:47:05,852] Trial 0 finished with value: 0.2077362353361567 and parameters: {}. Best is trial 0 with value: 0.2077362353361567.
[I 2026-01-29 21:47:11,378] A new study created in memory with name: no-name-cd2f2006-c9d0-46b0-8413-1860250e32fa


Running optimization for ALS_factors=10 with bm25...


[I 2026-01-29 21:47:28,511] Trial 2 finished with value: 0.1993047029214265 and parameters: {'bm25_k1': 183.90837928197922, 'bm25_b': 0.15769347935927014}. Best is trial 2 with value: 0.1993047029214265.
[I 2026-01-29 21:47:28,536] Trial 3 finished with value: 0.20270874823184418 and parameters: {'bm25_k1': 358.4755935673906, 'bm25_b': 0.5242285788623163}. Best is trial 3 with value: 0.20270874823184418.
[I 2026-01-29 21:47:28,596] Trial 0 finished with value: 0.20292763343505626 and parameters: {'bm25_k1': 789.4936413291072, 'bm25_b': 0.6412531115079463}. Best is trial 0 with value: 0.20292763343505626.
[I 2026-01-29 21:47:28,634] Trial 1 finished with value: 0.2025130614083314 and parameters: {'bm25_k1': 679.7606494994739, 'bm25_b': 0.8189135357396745}. Best is trial 0 with value: 0.20292763343505626.
[I 2026-01-29 21:47:45,630] Trial 7 finished with value: 0.20008185011022017 and parameters: {'bm25_k1': 51.22215890719594, 'bm25_b': 0.18974311382739584}. Best is trial 0 with value: 0

Running optimization for ALS_factors=10 with tfidf...


[I 2026-01-29 21:48:48,407] Trial 0 finished with value: 0.19962967931942496 and parameters: {}. Best is trial 0 with value: 0.19962967931942496.
[I 2026-01-29 21:48:54,778] A new study created in memory with name: no-name-bcaed4ef-e4c6-45a9-bea8-cd28a27a8bf7


Running optimization for ALS_factors=10 with log...


[I 2026-01-29 21:48:59,637] Trial 0 finished with value: 0.2116911582939094 and parameters: {}. Best is trial 0 with value: 0.2116911582939094.
[I 2026-01-29 21:49:05,613] A new study created in memory with name: no-name-29daa16d-6abe-46d5-8c85-1a5942a08d1c


Running optimization for ALS_factors=10 with confidence...


[I 2026-01-29 21:49:22,524] Trial 2 finished with value: 0.07514420880710147 and parameters: {'conf_alpha': 143.47013903152353}. Best is trial 2 with value: 0.07514420880710147.
[I 2026-01-29 21:49:22,540] Trial 0 finished with value: 0.07580825342842462 and parameters: {'conf_alpha': 138.30547485592115}. Best is trial 0 with value: 0.07580825342842462.
[I 2026-01-29 21:49:22,551] Trial 1 finished with value: 0.16865573870362174 and parameters: {'conf_alpha': 8.431531047878433}. Best is trial 1 with value: 0.16865573870362174.
[I 2026-01-29 21:49:22,580] Trial 3 finished with value: 0.1631014495565294 and parameters: {'conf_alpha': 10.355930516357763}. Best is trial 1 with value: 0.16865573870362174.
[I 2026-01-29 21:49:39,660] Trial 5 finished with value: 0.09435249386596956 and parameters: {'conf_alpha': 62.986048360494124}. Best is trial 1 with value: 0.16865573870362174.
[I 2026-01-29 21:49:39,674] Trial 6 finished with value: 0.08864014947778437 and parameters: {'conf_alpha': 77.4

Running optimization for ALS_factors=10 with power...


[I 2026-01-29 21:50:54,089] Trial 1 finished with value: 0.2116771962322772 and parameters: {'power_p': 0.2003463817123427}. Best is trial 1 with value: 0.2116771962322772.
[I 2026-01-29 21:50:54,105] Trial 0 finished with value: 0.20639013524319988 and parameters: {'power_p': 1.1336831451039948}. Best is trial 1 with value: 0.2116771962322772.
[I 2026-01-29 21:50:54,118] Trial 3 finished with value: 0.21164760504526425 and parameters: {'power_p': 0.17337997937642396}. Best is trial 1 with value: 0.2116771962322772.
[I 2026-01-29 21:50:54,158] Trial 2 finished with value: 0.2105726983120653 and parameters: {'power_p': 0.6396764877250907}. Best is trial 1 with value: 0.2116771962322772.
[I 2026-01-29 21:51:10,818] Trial 4 finished with value: 0.2098685605220463 and parameters: {'power_p': 0.7440379120882871}. Best is trial 1 with value: 0.2116771962322772.
[I 2026-01-29 21:51:10,830] Trial 7 finished with value: 0.2028263184834304 and parameters: {'power_p': 1.4274401465664157}. Best is

Running optimization for ALS_factors=10 with normalized...


[I 2026-01-29 21:52:11,883] Trial 0 finished with value: 0.17004916262741435 and parameters: {}. Best is trial 0 with value: 0.17004916262741435.
[I 2026-01-29 21:52:18,057] A new study created in memory with name: no-name-746669e2-923c-4bd3-b489-4105089ff578


Running optimization for ALS_factors=10 with pmi...


[I 2026-01-29 21:52:23,254] Trial 0 finished with value: 0.208702257107758 and parameters: {}. Best is trial 0 with value: 0.208702257107758.
[I 2026-01-29 21:52:29,646] A new study created in memory with name: no-name-8207eddc-36e6-4496-b663-d82e5b4eb7dd


Running optimization for ALS_factors=10 with robust_user_centric...


[I 2026-01-29 21:55:05,227] Trial 1 finished with value: 0.20404377298813248 and parameters: {'scale_factor': 9.046489499262165}. Best is trial 1 with value: 0.20404377298813248.
[I 2026-01-29 21:55:05,285] Trial 2 finished with value: 0.20298807326153195 and parameters: {'scale_factor': 9.96341299103522}. Best is trial 1 with value: 0.20404377298813248.
[I 2026-01-29 21:55:05,337] Trial 0 finished with value: 0.20337327851480266 and parameters: {'scale_factor': 9.605410383849454}. Best is trial 1 with value: 0.20404377298813248.
[I 2026-01-29 21:55:05,359] Trial 3 finished with value: 0.20663539953709725 and parameters: {'scale_factor': 6.58245457008941}. Best is trial 3 with value: 0.20663539953709725.
[I 2026-01-29 21:57:41,456] Trial 5 finished with value: 0.20505188435315083 and parameters: {'scale_factor': 8.220603987468003}. Best is trial 3 with value: 0.20663539953709725.
[I 2026-01-29 21:57:41,469] Trial 4 finished with value: 0.20990195313574497 and parameters: {'scale_factor

Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-29 22:08:33,939] Trial 0 finished with value: 0.20909554717305054 and parameters: {'lower_q': 20.82399887074351, 'upper_q': 80.8749837792601}. Best is trial 0 with value: 0.20909554717305054.
[I 2026-01-29 22:08:33,978] Trial 3 finished with value: 0.20717929035887145 and parameters: {'lower_q': 32.87711758046851, 'upper_q': 60.92732659721834}. Best is trial 0 with value: 0.20909554717305054.
[I 2026-01-29 22:08:34,021] Trial 2 finished with value: 0.20745865123746046 and parameters: {'lower_q': 36.14039609654337, 'upper_q': 71.59133738705216}. Best is trial 0 with value: 0.20909554717305054.
[I 2026-01-29 22:08:34,033] Trial 1 finished with value: 0.2087830507698217 and parameters: {'lower_q': 10.929731035593683, 'upper_q': 60.91864164273161}. Best is trial 0 with value: 0.20909554717305054.
[I 2026-01-29 22:11:08,720] Trial 5 finished with value: 0.2092613883616222 and parameters: {'lower_q': 30.08385409637985, 'upper_q': 91.32674216

Running optimization for ALS_factors=10 with sigmoid_propensity...


[I 2026-01-29 22:19:39,946] Trial 0 finished with value: 0.2091595992569129 and parameters: {'p': 4.132213813221398, 'beta': 0.4853896600431553}. Best is trial 0 with value: 0.2091595992569129.
[I 2026-01-29 22:19:39,958] Trial 2 finished with value: 0.20897774307604255 and parameters: {'p': 4.5145169785883725, 'beta': 0.48629334933675183}. Best is trial 0 with value: 0.2091595992569129.
[I 2026-01-29 22:19:39,969] Trial 3 finished with value: 0.21053055342295923 and parameters: {'p': 1.4288147076029356, 'beta': 0.03624775411897707}. Best is trial 3 with value: 0.21053055342295923.
[I 2026-01-29 22:19:39,993] Trial 1 finished with value: 0.20779683327039708 and parameters: {'p': 4.857710535549534, 'beta': 0.7922035126654461}. Best is trial 3 with value: 0.21053055342295923.
[I 2026-01-29 22:19:57,250] Trial 5 finished with value: 0.21003922981434978 and parameters: {'p': 2.702843375357526, 'beta': 0.3730759653550533}. Best is trial 3 with value: 0.21053055342295923.
[I 2026-01-29 22:19

Running optimization for ALS_factors=10 with power_lift...


[I 2026-01-29 22:21:13,060] Trial 0 finished with value: 0.2034677367747296 and parameters: {'p': 0.4741489204351135}. Best is trial 0 with value: 0.2034677367747296.
[I 2026-01-29 22:21:13,071] Trial 2 finished with value: 0.05753866822905031 and parameters: {'p': 1.4835891926472424}. Best is trial 0 with value: 0.2034677367747296.
[I 2026-01-29 22:21:13,083] Trial 1 finished with value: 0.1361205669746356 and parameters: {'p': 0.9704492849753256}. Best is trial 0 with value: 0.2034677367747296.
[I 2026-01-29 22:21:13,130] Trial 3 finished with value: 0.1704501003925382 and parameters: {'p': 0.8172539706336089}. Best is trial 0 with value: 0.2034677367747296.
[I 2026-01-29 22:21:30,380] Trial 7 finished with value: 0.1990893211959117 and parameters: {'p': 0.5528987417822829}. Best is trial 0 with value: 0.2034677367747296.
[I 2026-01-29 22:21:30,402] Trial 4 finished with value: 0.07853177714352037 and parameters: {'p': 1.3074510839517335}. Best is trial 0 with value: 0.20346773677472

Running optimization for ALS_factors=100 with no_weighting...


[I 2026-01-29 22:22:39,128] Trial 0 finished with value: 0.25279458876934735 and parameters: {}. Best is trial 0 with value: 0.25279458876934735.
[I 2026-01-29 22:22:51,376] A new study created in memory with name: no-name-da1e8848-e1eb-4b71-bf74-f4bf41d95e80


Running optimization for ALS_factors=100 with bm25...


[I 2026-01-29 22:23:32,948] Trial 0 finished with value: 0.24688265511370164 and parameters: {'bm25_k1': 820.9028163391041, 'bm25_b': 0.8430222868215274}. Best is trial 0 with value: 0.24688265511370164.
[I 2026-01-29 22:23:32,969] Trial 3 finished with value: 0.2496982086410271 and parameters: {'bm25_k1': 603.4862718354815, 'bm25_b': 0.4316816365523063}. Best is trial 3 with value: 0.2496982086410271.
[I 2026-01-29 22:23:32,980] Trial 2 finished with value: 0.2480508080728502 and parameters: {'bm25_k1': 610.6586558140339, 'bm25_b': 0.14398010110104498}. Best is trial 3 with value: 0.2496982086410271.
[I 2026-01-29 22:23:33,009] Trial 1 finished with value: 0.24968380915578195 and parameters: {'bm25_k1': 838.126827733848, 'bm25_b': 0.42363790202722607}. Best is trial 3 with value: 0.2496982086410271.
[I 2026-01-29 22:24:14,483] Trial 4 finished with value: 0.24912820606351424 and parameters: {'bm25_k1': 906.1182168953643, 'bm25_b': 0.24275703150056027}. Best is trial 3 with value: 0.24

Running optimization for ALS_factors=100 with tfidf...


[I 2026-01-29 22:26:43,327] Trial 0 finished with value: 0.25643860126145884 and parameters: {}. Best is trial 0 with value: 0.25643860126145884.
[I 2026-01-29 22:26:56,188] A new study created in memory with name: no-name-6003ef3d-d68e-4e07-8152-f553293525bf


Running optimization for ALS_factors=100 with log...


[I 2026-01-29 22:27:07,095] Trial 0 finished with value: 0.23192368355761553 and parameters: {}. Best is trial 0 with value: 0.23192368355761553.
[I 2026-01-29 22:27:19,813] A new study created in memory with name: no-name-30c85555-fc23-428d-9802-1ee142696e56


Running optimization for ALS_factors=100 with confidence...


[I 2026-01-29 22:28:01,176] Trial 0 finished with value: 0.1177009060495326 and parameters: {'conf_alpha': 62.854547909560964}. Best is trial 0 with value: 0.1177009060495326.
[I 2026-01-29 22:28:01,187] Trial 2 finished with value: 0.10435107443887977 and parameters: {'conf_alpha': 107.92163056046819}. Best is trial 0 with value: 0.1177009060495326.
[I 2026-01-29 22:28:01,220] Trial 1 finished with value: 0.11231278339163796 and parameters: {'conf_alpha': 77.3856615963707}. Best is trial 0 with value: 0.1177009060495326.
[I 2026-01-29 22:28:01,239] Trial 3 finished with value: 0.10548574786019664 and parameters: {'conf_alpha': 103.37496694173862}. Best is trial 0 with value: 0.1177009060495326.
[I 2026-01-29 22:28:42,498] Trial 4 finished with value: 0.11127633388605711 and parameters: {'conf_alpha': 80.77645616309456}. Best is trial 0 with value: 0.1177009060495326.
[I 2026-01-29 22:28:42,514] Trial 6 finished with value: 0.12291546055046756 and parameters: {'conf_alpha': 52.71972226

Running optimization for ALS_factors=100 with power...


[I 2026-01-29 22:31:40,540] Trial 3 finished with value: 0.22260233877538677 and parameters: {'power_p': 0.14092829997017842}. Best is trial 3 with value: 0.22260233877538677.
[I 2026-01-29 22:31:40,550] Trial 0 finished with value: 0.2411065140197115 and parameters: {'power_p': 0.5526919676677299}. Best is trial 0 with value: 0.2411065140197115.
[I 2026-01-29 22:31:40,578] Trial 2 finished with value: 0.2432868565175219 and parameters: {'power_p': 0.6160064799707675}. Best is trial 2 with value: 0.2432868565175219.
[I 2026-01-29 22:31:40,591] Trial 1 finished with value: 0.23238329183266979 and parameters: {'power_p': 0.33310264855366123}. Best is trial 2 with value: 0.2432868565175219.
[I 2026-01-29 22:32:21,580] Trial 5 finished with value: 0.24852647787113533 and parameters: {'power_p': 0.7999216324592945}. Best is trial 5 with value: 0.24852647787113533.
[I 2026-01-29 22:32:21,592] Trial 7 finished with value: 0.24471738596620185 and parameters: {'power_p': 0.6617091281266759}. Be

Running optimization for ALS_factors=100 with normalized...


[I 2026-01-29 22:34:48,607] Trial 0 finished with value: 0.11909927656735733 and parameters: {}. Best is trial 0 with value: 0.11909927656735733.
[I 2026-01-29 22:35:01,574] A new study created in memory with name: no-name-e799e490-8986-492f-9afe-33d92adff841


Running optimization for ALS_factors=100 with pmi...


[I 2026-01-29 22:35:12,864] Trial 0 finished with value: 0.25730942264788265 and parameters: {}. Best is trial 0 with value: 0.25730942264788265.
[I 2026-01-29 22:35:26,012] A new study created in memory with name: no-name-19dc73ca-f6cf-45fc-9bc8-26cd54d108d4


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-01-29 22:38:26,196] Trial 0 finished with value: 0.2534015461961339 and parameters: {'scale_factor': 7.866797046618183}. Best is trial 0 with value: 0.2534015461961339.
[I 2026-01-29 22:38:26,246] Trial 3 finished with value: 0.24381826690549635 and parameters: {'scale_factor': 4.543857833787616}. Best is trial 0 with value: 0.2534015461961339.
[I 2026-01-29 22:38:26,292] Trial 1 finished with value: 0.2265780174767227 and parameters: {'scale_factor': 2.619244824475539}. Best is trial 0 with value: 0.2534015461961339.
[I 2026-01-29 22:38:26,292] Trial 2 finished with value: 0.21203618806156793 and parameters: {'scale_factor': 1.8239069232530183}. Best is trial 0 with value: 0.2534015461961339.
[I 2026-01-29 22:41:27,305] Trial 4 finished with value: 0.1874651245050027 and parameters: {'scale_factor': 0.94919316694241}. Best is trial 0 with value: 0.2534015461961339.
[I 2026-01-29 22:41:27,387] Trial 6 finished with value: 0.14321402785219572 and parameters: {'scale_factor': 0.2

Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


[I 2026-01-29 22:54:07,727] Trial 1 finished with value: 0.18909171389261734 and parameters: {'lower_q': 30.064375894919408, 'upper_q': 85.95432745321581}. Best is trial 1 with value: 0.18909171389261734.
[I 2026-01-29 22:54:07,775] Trial 0 finished with value: 0.18957167675548353 and parameters: {'lower_q': 31.106342313349774, 'upper_q': 75.92129671652776}. Best is trial 0 with value: 0.18957167675548353.
[I 2026-01-29 22:54:07,823] Trial 2 finished with value: 0.1897440380214301 and parameters: {'lower_q': 31.626160452983193, 'upper_q': 71.03443614695745}. Best is trial 2 with value: 0.1897440380214301.
[I 2026-01-29 22:54:07,838] Trial 3 finished with value: 0.18915713959974742 and parameters: {'lower_q': 22.15320436922202, 'upper_q': 72.93724575545102}. Best is trial 2 with value: 0.1897440380214301.
[I 2026-01-29 22:57:06,290] Trial 7 finished with value: 0.18832837139483544 and parameters: {'lower_q': 13.789815915594211, 'upper_q': 85.02134377716416}. Best is trial 2 with value: 

Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-01-29 23:07:25,076] Trial 3 finished with value: 0.21303492480230518 and parameters: {'p': 4.272882114962067, 'beta': 0.1486848421398217}. Best is trial 3 with value: 0.21303492480230518.
[I 2026-01-29 23:07:25,088] Trial 0 finished with value: 0.2265921550847041 and parameters: {'p': 0.8071933903223973, 'beta': 0.30326212277823505}. Best is trial 0 with value: 0.2265921550847041.
[I 2026-01-29 23:07:25,134] Trial 1 finished with value: 0.24265763773674653 and parameters: {'p': 3.890162015753506, 'beta': 0.509360797138456}. Best is trial 1 with value: 0.24265763773674653.
[I 2026-01-29 23:07:25,150] Trial 2 finished with value: 0.23181043133613768 and parameters: {'p': 3.908972684779729, 'beta': 0.3425623305739033}. Best is trial 1 with value: 0.24265763773674653.
[I 2026-01-29 23:08:06,680] Trial 4 finished with value: 0.24479076634587388 and parameters: {'p': 3.865086212830957, 'beta': 0.5538473518454491}. Best is trial 4 with value: 0.24479076634587388.
[I 2026-01-29 23:08:0

Running optimization for ALS_factors=100 with power_lift...


[I 2026-01-29 23:11:06,397] Trial 3 finished with value: 0.24200520786631688 and parameters: {'p': 0.5961303364462469}. Best is trial 3 with value: 0.24200520786631688.
[I 2026-01-29 23:11:06,441] Trial 1 finished with value: 0.26479639419525575 and parameters: {'p': 0.3957683875015411}. Best is trial 1 with value: 0.26479639419525575.
[I 2026-01-29 23:11:06,458] Trial 0 finished with value: 0.1305932824975531 and parameters: {'p': 1.4062261136728622}. Best is trial 1 with value: 0.26479639419525575.
[I 2026-01-29 23:11:06,489] Trial 2 finished with value: 0.1580469378195184 and parameters: {'p': 1.1836541061029249}. Best is trial 1 with value: 0.26479639419525575.
[I 2026-01-29 23:11:47,947] Trial 4 finished with value: 0.234959350056575 and parameters: {'p': 0.1068195224966865}. Best is trial 1 with value: 0.26479639419525575.
[I 2026-01-29 23:11:48,011] Trial 7 finished with value: 0.1338665700129183 and parameters: {'p': 1.3788569661864043}. Best is trial 1 with value: 0.2647963941

Unnamed: 0,Algorithm,Strategy,Best Val NDCG@20,Test NDCG@20,Test Precision@20,Final Train Time (s),Best Params
23,ALS_factors=100,power_lift,0.265945,0.337178,0.368855,10.815068,{'p': 0.35035165551041647}
22,ALS_factors=100,sigmoid_propensity,0.259962,0.333955,0.366206,10.79208,"{'p': 1.5152466760045404, 'beta': 0.9959842331..."
12,ALS_factors=100,no_weighting,0.252795,0.324987,0.357838,10.774874,{}
19,ALS_factors=100,pmi,0.257309,0.32391,0.346753,10.830424,{}
20,ALS_factors=100,robust_user_centric,0.254878,0.322906,0.354965,10.681851,{'scale_factor': 9.729574967756513}
17,ALS_factors=100,power,0.254827,0.321246,0.353753,10.822451,{'power_p': 1.305569938241399}
14,ALS_factors=100,tfidf,0.256439,0.318898,0.349704,10.811064,{}
16,ALS_factors=100,confidence,0.255824,0.317208,0.347325,10.843457,{'conf_alpha': 1.281009213528539}
13,ALS_factors=100,bm25,0.249887,0.309938,0.34477,10.824066,"{'bm25_k1': 172.20254865698615, 'bm25_b': 0.33..."
15,ALS_factors=100,log,0.231924,0.307046,0.342517,10.828904,{}


In [None]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results