In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="20M"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(138493, 26744, 20000263)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (138493, 26744), Val Shape: (138493, 26744), Test Shape: (138493, 26744)


In [None]:
results_folder = "results/movielens_20m_als"
results_filename = "movielens_20m_als_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, regularization=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, regularization=10, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:14:40,268] A new study created in memory with name: no-name-4126d4aa-6ee7-452f-b048-b8410ee8af8d


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-02-08 16:14:45,080] Trial 0 finished with value: 0.20777510234035793 and parameters: {}. Best is trial 0 with value: 0.20777510234035793.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,no_weighting,1,0.207775,0.244892,0.253794,0.266058,0.288887,4.169275,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:14:51,765] A new study created in memory with name: no-name-7a49c0f7-1d31-42db-89d1-adac0af3632a


Running optimization for ALS_factors=10 with bm25...


[I 2026-02-08 16:15:08,869] Trial 1 finished with value: 0.20057909359669332 and parameters: {'bm25_k1': 721.0510386104584, 'bm25_b': 0.23820893911473473}. Best is trial 1 with value: 0.20057909359669332.
[I 2026-02-08 16:15:08,905] Trial 2 finished with value: 0.20122551187536702 and parameters: {'bm25_k1': 953.4296962297083, 'bm25_b': 0.29357279678327775}. Best is trial 2 with value: 0.20122551187536702.
[I 2026-02-08 16:15:08,926] Trial 3 finished with value: 0.2028944400068316 and parameters: {'bm25_k1': 902.8883974485827, 'bm25_b': 0.5537459391930623}. Best is trial 3 with value: 0.2028944400068316.
[I 2026-02-08 16:15:08,958] Trial 0 finished with value: 0.2030599975977767 and parameters: {'bm25_k1': 177.45102708171413, 'bm25_b': 0.7563659598974011}. Best is trial 0 with value: 0.2030599975977767.
[I 2026-02-08 16:15:25,887] Trial 6 finished with value: 0.20009166424078775 and parameters: {'bm25_k1': 944.8187514360491, 'bm25_b': 0.20841479384189898}. Best is trial 0 with value: 0

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,bm25,20,0.207061,0.242754,0.253315,0.261605,0.285793,4.319055,"{'bm25_k1': 3.209536795934696, 'bm25_b': 0.887..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)


[I 2026-02-08 16:16:24,844] A new study created in memory with name: no-name-968358a4-10aa-497f-9295-acb72dd72634


Running optimization for ALS_factors=10 with tfidf...


[I 2026-02-08 16:16:29,893] Trial 0 finished with value: 0.19965452106232576 and parameters: {}. Best is trial 0 with value: 0.19965452106232576.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,tfidf,1,0.199655,0.233357,0.243834,0.255206,0.28026,4.313086,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:16:37,324] A new study created in memory with name: no-name-093b6a4c-ed8e-4cb7-9fe9-2ce46d41f17a


Running optimization for ALS_factors=10 with log...


[I 2026-02-08 16:16:42,206] Trial 0 finished with value: 0.21173211232216507 and parameters: {}. Best is trial 0 with value: 0.21173211232216507.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,log,1,0.211732,0.253513,0.260189,0.273624,0.293923,4.282451,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:16:49,446] A new study created in memory with name: no-name-05c44e89-80ee-4ff5-a323-6a7880ddb5b6


Running optimization for ALS_factors=10 with confidence...


[I 2026-02-08 16:17:06,355] Trial 1 finished with value: 0.1808635260352492 and parameters: {'conf_alpha': 4.962123493590843}. Best is trial 1 with value: 0.1808635260352492.
[I 2026-02-08 16:17:06,405] Trial 3 finished with value: 0.0800254732176324 and parameters: {'conf_alpha': 136.4963575208393}. Best is trial 1 with value: 0.1808635260352492.
[I 2026-02-08 16:17:06,415] Trial 0 finished with value: 0.0931623840229756 and parameters: {'conf_alpha': 67.48964691048211}. Best is trial 1 with value: 0.1808635260352492.
[I 2026-02-08 16:17:06,448] Trial 2 finished with value: 0.08591083753801172 and parameters: {'conf_alpha': 99.13222359309616}. Best is trial 1 with value: 0.1808635260352492.
[I 2026-02-08 16:17:23,235] Trial 4 finished with value: 0.09531224483438543 and parameters: {'conf_alpha': 61.00727361041262}. Best is trial 1 with value: 0.1808635260352492.
[I 2026-02-08 16:17:23,265] Trial 5 finished with value: 0.0801157378217363 and parameters: {'conf_alpha': 135.696412596345

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,confidence,20,0.199962,0.232492,0.243446,0.253675,0.278843,4.290487,{'conf_alpha': 1.3064571209932714}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:18:21,430] A new study created in memory with name: no-name-224c60dc-e695-4ef5-a843-c581384516aa


Running optimization for ALS_factors=10 with power...


[I 2026-02-08 16:18:38,050] Trial 2 finished with value: 0.20364212843397028 and parameters: {'power_p': 1.3688111019190021}. Best is trial 2 with value: 0.20364212843397028.
[I 2026-02-08 16:18:38,087] Trial 1 finished with value: 0.21168898499823288 and parameters: {'power_p': 0.2504568804688161}. Best is trial 1 with value: 0.21168898499823288.
[I 2026-02-08 16:18:38,100] Trial 3 finished with value: 0.21157053548807928 and parameters: {'power_p': 0.402180807816915}. Best is trial 1 with value: 0.21168898499823288.
[I 2026-02-08 16:18:38,124] Trial 0 finished with value: 0.20700722735306099 and parameters: {'power_p': 1.0774447903426676}. Best is trial 1 with value: 0.21168898499823288.
[I 2026-02-08 16:18:54,646] Trial 7 finished with value: 0.21159731022783723 and parameters: {'power_p': 0.39447740142510346}. Best is trial 1 with value: 0.21168898499823288.
[I 2026-02-08 16:18:54,662] Trial 4 finished with value: 0.21172622963077645 and parameters: {'power_p': 0.16402163617829885}

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,power,20,0.211734,0.253751,0.260302,0.273796,0.294154,4.306237,{'power_p': 0.16741867171308375}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:19:51,726] A new study created in memory with name: no-name-e792aca4-739d-497d-9d11-083e51e009b7


Running optimization for ALS_factors=10 with normalized...


[I 2026-02-08 16:19:56,939] Trial 0 finished with value: 0.18366968945680442 and parameters: {}. Best is trial 0 with value: 0.18366968945680442.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,normalized,1,0.18367,0.227491,0.23306,0.235661,0.249124,4.312041,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:20:04,564] A new study created in memory with name: no-name-58d80be3-74a3-4a8a-b403-abe591641e7e


Running optimization for ALS_factors=10 with pmi...


[I 2026-02-08 16:20:09,991] Trial 0 finished with value: 0.2085637322464122 and parameters: {}. Best is trial 0 with value: 0.2085637322464122.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,pmi,1,0.208564,0.245183,0.254129,0.260438,0.279934,4.280882,{}


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:20:17,600] A new study created in memory with name: no-name-2bc968b0-ee44-4a66-8a49-027d0aaeb9e8


Running optimization for ALS_factors=10 with robust_user_centric...


[I 2026-02-08 16:20:36,898] Trial 1 finished with value: 0.20590105463868194 and parameters: {'scale_factor': 7.394405158273429}. Best is trial 1 with value: 0.20590105463868194.
[I 2026-02-08 16:20:36,909] Trial 0 finished with value: 0.20958630662080505 and parameters: {'scale_factor': 3.8382037674076233}. Best is trial 0 with value: 0.20958630662080505.
[I 2026-02-08 16:20:36,934] Trial 3 finished with value: 0.20623635155687922 and parameters: {'scale_factor': 7.02373413451053}. Best is trial 0 with value: 0.20958630662080505.
[I 2026-02-08 16:20:36,959] Trial 2 finished with value: 0.20889784885914467 and parameters: {'scale_factor': 4.35015920809455}. Best is trial 0 with value: 0.20958630662080505.
[I 2026-02-08 16:20:56,101] Trial 4 finished with value: 0.20410779867641127 and parameters: {'scale_factor': 9.036713713967576}. Best is trial 0 with value: 0.20958630662080505.
[I 2026-02-08 16:20:56,101] Trial 5 finished with value: 0.20562109019033822 and parameters: {'scale_facto

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric,20,0.210395,0.252003,0.258369,0.272533,0.292143,4.172497,{'scale_factor': 2.691559032559339}


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:22:02,315] A new study created in memory with name: no-name-6bc555fa-d848-46e0-8ac8-da10d16d4dc3


Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 16:22:21,464] Trial 1 finished with value: 0.20827394072159308 and parameters: {'lower_q': 27.558108172612666, 'upper_q': 76.86273198474007}. Best is trial 1 with value: 0.20827394072159308.
[I 2026-02-08 16:22:21,476] Trial 0 finished with value: 0.20891769901382037 and parameters: {'lower_q': 9.090956921300224, 'upper_q': 64.02872464841074}. Best is trial 0 with value: 0.20891769901382037.
[I 2026-02-08 16:22:21,511] Trial 2 finished with value: 0.20852597151247135 and parameters: {'lower_q': 39.11612476991492, 'upper_q': 90.69052211327696}. Best is trial 0 with value: 0.20891769901382037.
[I 2026-02-08 16:22:21,525] Trial 3 finished with value: 0.20860378870712035 and parameters: {'lower_q': 28.77536044852485, 'upper_q': 81.98250039800318}. Best is trial 0 with value: 0.20891769901382037.
[I 2026-02-08 16:22:40,469] Trial 5 finished with value: 0.2090416884998402 and parameters: {'lower_q': 7.517284675714007, 'upper_q': 62.574945

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric_weight_v2,20,0.210001,0.25275,0.258636,0.27288,0.29221,4.172177,"{'lower_q': 10.656734797354833, 'upper_q': 91...."


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:23:46,426] A new study created in memory with name: no-name-a928d282-1e39-422a-80ef-09145113d23c


Running optimization for ALS_factors=10 with sigmoid_propensity...


[I 2026-02-08 16:24:02,910] Trial 2 finished with value: 0.21006924939739172 and parameters: {'p': 2.37560938557613, 'beta': 0.11337236138566964}. Best is trial 2 with value: 0.21006924939739172.
[I 2026-02-08 16:24:02,931] Trial 3 finished with value: 0.20953380119017012 and parameters: {'p': 1.396909438895129, 'beta': 0.790018888335163}. Best is trial 2 with value: 0.21006924939739172.
[I 2026-02-08 16:24:02,950] Trial 1 finished with value: 0.21012693002605123 and parameters: {'p': 1.596388618084342, 'beta': 0.5644982860741996}. Best is trial 1 with value: 0.21012693002605123.
[I 2026-02-08 16:24:02,973] Trial 0 finished with value: 0.2102419153086589 and parameters: {'p': 1.627965997426612, 'beta': 0.5095181557239581}. Best is trial 0 with value: 0.2102419153086589.
[I 2026-02-08 16:24:19,703] Trial 4 finished with value: 0.21010119222009416 and parameters: {'p': 0.6793631098099873, 'beta': 0.5342670326680621}. Best is trial 0 with value: 0.2102419153086589.
[I 2026-02-08 16:24:19,

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,sigmoid_propensity,20,0.210827,0.252812,0.259172,0.27326,0.293364,4.195384,"{'p': 1.679626087296186, 'beta': 0.23159432280..."


In [18]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:25:16,360] A new study created in memory with name: no-name-a2b3b2f5-a652-4272-bf80-819f61788c31


Running optimization for ALS_factors=10 with power_lift...


[I 2026-02-08 16:25:32,965] Trial 0 finished with value: 0.12727317490916526 and parameters: {'p': 1.0032255619898982}. Best is trial 0 with value: 0.12727317490916526.
[I 2026-02-08 16:25:33,005] Trial 3 finished with value: 0.21238778627442542 and parameters: {'p': 0.20754008458361156}. Best is trial 3 with value: 0.21238778627442542.
[I 2026-02-08 16:25:33,006] Trial 2 finished with value: 0.09959062570725065 and parameters: {'p': 1.1423913175021991}. Best is trial 3 with value: 0.21238778627442542.
[I 2026-02-08 16:25:33,044] Trial 1 finished with value: 0.2102507990892509 and parameters: {'p': 0.3613455120557518}. Best is trial 3 with value: 0.21238778627442542.
[I 2026-02-08 16:25:49,617] Trial 5 finished with value: 0.18515315170522398 and parameters: {'p': 0.7226474457003674}. Best is trial 3 with value: 0.21238778627442542.
[I 2026-02-08 16:25:49,632] Trial 4 finished with value: 0.17793031107572546 and parameters: {'p': 0.775740860801753}. Best is trial 3 with value: 0.212387

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,power_lift,20,0.212416,0.254222,0.261111,0.273797,0.294311,4.190032,{'p': 0.15192901017346558}


In [19]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:26:46,682] A new study created in memory with name: no-name-142c1b11-4d85-4ea0-99d7-310d1b935611


Running optimization for ALS_factors=100 with no_weighting...


[I 2026-02-08 16:26:57,110] Trial 0 finished with value: 0.25145416492132167 and parameters: {}. Best is trial 0 with value: 0.25145416492132167.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,no_weighting,1,0.251454,0.310603,0.323905,0.330475,0.357321,10.713596,{}


In [20]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:27:10,639] A new study created in memory with name: no-name-f67791c9-3359-4dba-a93c-c5cbb2dddf34


Running optimization for ALS_factors=100 with bm25...


[I 2026-02-08 16:27:51,996] Trial 0 finished with value: 0.24991998562103546 and parameters: {'bm25_k1': 183.99774154380512, 'bm25_b': 0.6013100971566266}. Best is trial 0 with value: 0.24991998562103546.
[I 2026-02-08 16:27:52,020] Trial 3 finished with value: 0.25007650444369417 and parameters: {'bm25_k1': 355.0458481250353, 'bm25_b': 0.511122550175164}. Best is trial 3 with value: 0.25007650444369417.
[I 2026-02-08 16:27:52,058] Trial 1 finished with value: 0.24996820419750695 and parameters: {'bm25_k1': 441.12094349775805, 'bm25_b': 0.30948270514342346}. Best is trial 3 with value: 0.25007650444369417.
[I 2026-02-08 16:27:52,087] Trial 2 finished with value: 0.24690345822644919 and parameters: {'bm25_k1': 260.74041869088745, 'bm25_b': 0.060241088085782435}. Best is trial 3 with value: 0.25007650444369417.
[I 2026-02-08 16:28:33,364] Trial 7 finished with value: 0.24991995599204028 and parameters: {'bm25_k1': 453.8806013716688, 'bm25_b': 0.5384120861773252}. Best is trial 3 with val

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,bm25,20,0.256914,0.30654,0.322114,0.327671,0.357379,10.878613,"{'bm25_k1': 4.0389018496678375, 'bm25_b': 0.74..."


In [21]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)


[I 2026-02-08 16:30:51,938] A new study created in memory with name: no-name-9308e063-9f5e-43fb-893b-a6cce4c592f3


Running optimization for ALS_factors=100 with tfidf...


[I 2026-02-08 16:31:03,165] Trial 0 finished with value: 0.25647876048547885 and parameters: {}. Best is trial 0 with value: 0.25647876048547885.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,tfidf,1,0.256479,0.303988,0.319119,0.323076,0.350788,10.859503,{}


In [22]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:31:17,415] A new study created in memory with name: no-name-18d66aff-ba1f-466a-91b8-603c2c6642db


Running optimization for ALS_factors=100 with log...


[I 2026-02-08 16:31:28,320] Trial 0 finished with value: 0.23172382270897857 and parameters: {}. Best is trial 0 with value: 0.23172382270897857.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log,1,0.231724,0.297131,0.30672,0.318096,0.342625,10.871518,{}


In [23]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:31:42,414] A new study created in memory with name: no-name-7a74e93b-1d95-474d-af9d-dc7328be12b6


Running optimization for ALS_factors=100 with confidence...


[I 2026-02-08 16:32:23,485] Trial 3 finished with value: 0.11245086410530176 and parameters: {'conf_alpha': 121.08002600167221}. Best is trial 3 with value: 0.11245086410530176.
[I 2026-02-08 16:32:23,509] Trial 1 finished with value: 0.1291680388700691 and parameters: {'conf_alpha': 50.50555981953654}. Best is trial 1 with value: 0.1291680388700691.
[I 2026-02-08 16:32:23,556] Trial 2 finished with value: 0.1155220161753846 and parameters: {'conf_alpha': 98.06348246135849}. Best is trial 1 with value: 0.1291680388700691.
[I 2026-02-08 16:32:23,556] Trial 0 finished with value: 0.11239617638246112 and parameters: {'conf_alpha': 122.15094311394951}. Best is trial 1 with value: 0.1291680388700691.
[I 2026-02-08 16:33:04,636] Trial 7 finished with value: 0.17922378830980992 and parameters: {'conf_alpha': 12.398847690917746}. Best is trial 7 with value: 0.17922378830980992.
[I 2026-02-08 16:33:04,653] Trial 4 finished with value: 0.22555647123287942 and parameters: {'conf_alpha': 4.1172811

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,confidence,20,0.241172,0.272446,0.294024,0.289287,0.321885,10.880188,{'conf_alpha': 2.5438808701236457}


In [24]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:35:22,306] A new study created in memory with name: no-name-930294b7-a8c2-49fc-8a26-682f9afdc687


Running optimization for ALS_factors=100 with power...


[I 2026-02-08 16:36:03,167] Trial 0 finished with value: 0.2534599448811689 and parameters: {'power_p': 1.371802227802872}. Best is trial 0 with value: 0.2534599448811689.
[I 2026-02-08 16:36:03,185] Trial 1 finished with value: 0.2493180628003128 and parameters: {'power_p': 0.8817786315963544}. Best is trial 0 with value: 0.2534599448811689.
[I 2026-02-08 16:36:03,220] Trial 2 finished with value: 0.2345644504837556 and parameters: {'power_p': 0.39609417833234795}. Best is trial 0 with value: 0.2534599448811689.
[I 2026-02-08 16:36:03,220] Trial 3 finished with value: 0.2398704307712006 and parameters: {'power_p': 0.5465659949074971}. Best is trial 0 with value: 0.2534599448811689.
[I 2026-02-08 16:36:44,074] Trial 4 finished with value: 0.24884062523480338 and parameters: {'power_p': 0.8600167366431858}. Best is trial 0 with value: 0.2534599448811689.
[I 2026-02-08 16:36:44,111] Trial 7 finished with value: 0.25299905904952197 and parameters: {'power_p': 1.132478313593858}. Best is t

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power,20,0.253657,0.305791,0.321442,0.325289,0.354485,10.928963,{'power_p': 1.291457459532728}


In [25]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:39:01,024] A new study created in memory with name: no-name-c96c71d9-d3e8-4c8f-9b8e-9fe06b223e22


Running optimization for ALS_factors=100 with normalized...


[I 2026-02-08 16:39:12,153] Trial 0 finished with value: 0.1811017217581636 and parameters: {}. Best is trial 0 with value: 0.1811017217581636.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,normalized,1,0.181102,0.209195,0.228746,0.236683,0.276616,10.869391,{}


In [26]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:39:26,458] A new study created in memory with name: no-name-69d7af4a-aaa7-4524-8eb4-bfef5b3c948d


Running optimization for ALS_factors=100 with pmi...


[I 2026-02-08 16:39:37,735] Trial 0 finished with value: 0.2596701233672352 and parameters: {}. Best is trial 0 with value: 0.2596701233672352.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,pmi,1,0.25967,0.313497,0.326333,0.326654,0.348204,10.849279,{}


In [27]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:39:52,238] A new study created in memory with name: no-name-361eae5a-b60e-4652-b72e-109e654eaab9


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-02-08 16:40:35,629] Trial 2 finished with value: 0.2019173302664301 and parameters: {'scale_factor': 1.245972298822414}. Best is trial 2 with value: 0.2019173302664301.
[I 2026-02-08 16:40:35,648] Trial 0 finished with value: 0.2501423865330952 and parameters: {'scale_factor': 6.702566277353453}. Best is trial 0 with value: 0.2501423865330952.
[I 2026-02-08 16:40:35,668] Trial 3 finished with value: 0.2506963574131939 and parameters: {'scale_factor': 6.967547286869528}. Best is trial 3 with value: 0.2506963574131939.
[I 2026-02-08 16:40:35,711] Trial 1 finished with value: 0.20487356611363558 and parameters: {'scale_factor': 1.3650163540263303}. Best is trial 3 with value: 0.2506963574131939.
[I 2026-02-08 16:41:19,021] Trial 5 finished with value: 0.2523583778421002 and parameters: {'scale_factor': 8.095086065299189}. Best is trial 5 with value: 0.2523583778421002.
[I 2026-02-08 16:41:19,022] Trial 6 finished with value: 0.24061522523850742 and parameters: {'scale_factor': 4.1

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric,20,0.25378,0.307494,0.322533,0.326799,0.355163,10.78088,{'scale_factor': 9.986142834550652}


In [28]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:43:44,834] A new study created in memory with name: no-name-3dbbf7aa-3d13-4b28-b777-6ec3afb95509


Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 16:44:28,083] Trial 0 finished with value: 0.19491332173568188 and parameters: {'lower_q': 21.091717401760533, 'upper_q': 64.59750104551306}. Best is trial 0 with value: 0.19491332173568188.
[I 2026-02-08 16:44:28,084] Trial 1 finished with value: 0.19442413646153844 and parameters: {'lower_q': 18.81921545570537, 'upper_q': 74.02744852546438}. Best is trial 0 with value: 0.19491332173568188.
[I 2026-02-08 16:44:28,125] Trial 3 finished with value: 0.19484523605477244 and parameters: {'lower_q': 33.11174942893927, 'upper_q': 87.08494259241263}. Best is trial 0 with value: 0.19491332173568188.
[I 2026-02-08 16:44:28,157] Trial 2 finished with value: 0.19582773350630872 and parameters: {'lower_q': 39.10534871564269, 'upper_q': 76.56929732387293}. Best is trial 2 with value: 0.19582773350630872.
[I 2026-02-08 16:45:11,383] Trial 7 finished with value: 0.19509806873524932 and parameters: {'lower_q': 18.43028363925113, 'upper_q': 57.45777

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric_weight_v2,20,0.196167,0.256162,0.261419,0.276897,0.297714,10.782338,"{'lower_q': 40.79956136878442, 'upper_q': 62.3..."


In [29]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:47:36,830] A new study created in memory with name: no-name-45b48046-77b6-451f-8578-b2baa9d86d60


Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-02-08 16:48:17,566] Trial 0 finished with value: 0.24834627289111916 and parameters: {'p': 3.4342133223515807, 'beta': 0.5729959526693575}. Best is trial 0 with value: 0.24834627289111916.
[I 2026-02-08 16:48:17,589] Trial 3 finished with value: 0.23260875460310396 and parameters: {'p': 3.5660855363718316, 'beta': 0.30517580087426466}. Best is trial 0 with value: 0.24834627289111916.
[I 2026-02-08 16:48:17,625] Trial 1 finished with value: 0.21342305139495635 and parameters: {'p': 1.1456363616790894, 'beta': 0.13760076360316364}. Best is trial 0 with value: 0.24834627289111916.
[I 2026-02-08 16:48:17,652] Trial 2 finished with value: 0.2595992220633739 and parameters: {'p': 2.356400728078377, 'beta': 0.9747562569783982}. Best is trial 2 with value: 0.2595992220633739.
[I 2026-02-08 16:48:58,436] Trial 4 finished with value: 0.24330743755090758 and parameters: {'p': 0.8628490418682672, 'beta': 0.4395768059773739}. Best is trial 2 with value: 0.2595992220633739.
[I 2026-02-08 16:

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,sigmoid_propensity,20,0.260333,0.323685,0.33386,0.343054,0.366211,10.776389,"{'p': 2.1061539761683687, 'beta': 0.9996379410..."


In [30]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 16:51:14,706] A new study created in memory with name: no-name-628c422b-8d20-481b-876d-eb2e6f4c59a9


Running optimization for ALS_factors=100 with power_lift...


[I 2026-02-08 16:51:55,524] Trial 2 finished with value: 0.1814092102547835 and parameters: {'p': 0.9663073195487073}. Best is trial 2 with value: 0.1814092102547835.
[I 2026-02-08 16:51:55,538] Trial 1 finished with value: 0.18110414294734054 and parameters: {'p': 0.9681489467226551}. Best is trial 2 with value: 0.1814092102547835.
[I 2026-02-08 16:51:55,560] Trial 0 finished with value: 0.10110122464759959 and parameters: {'p': 1.4772749917694024}. Best is trial 2 with value: 0.1814092102547835.
[I 2026-02-08 16:51:55,597] Trial 3 finished with value: 0.15683867771020168 and parameters: {'p': 1.119918102598215}. Best is trial 2 with value: 0.1814092102547835.
[I 2026-02-08 16:52:36,439] Trial 7 finished with value: 0.22646479781909917 and parameters: {'p': 0.6891139487944734}. Best is trial 7 with value: 0.22646479781909917.
[I 2026-02-08 16:52:36,439] Trial 6 finished with value: 0.26613344844200554 and parameters: {'p': 0.34283417487987256}. Best is trial 6 with value: 0.2661334484

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power_lift,20,0.266133,0.325827,0.337535,0.34497,0.369349,10.776485,{'p': 0.34283417487987256}


In [31]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power_lift,20,0.266133,0.325827,0.337535,0.34497,0.369349,10.776485,{'p': 0.34283417487987256}
0,ALS_factors=100,sigmoid_propensity,20,0.260333,0.323685,0.33386,0.343054,0.366211,10.776389,"{'p': 2.1061539761683687, 'beta': 0.9996379410..."
0,ALS_factors=100,pmi,1,0.25967,0.313497,0.326333,0.326654,0.348204,10.849279,{}
0,ALS_factors=100,no_weighting,1,0.251454,0.310603,0.323905,0.330475,0.357321,10.713596,{}
0,ALS_factors=100,robust_user_centric,20,0.25378,0.307494,0.322533,0.326799,0.355163,10.78088,{'scale_factor': 9.986142834550652}
0,ALS_factors=100,bm25,20,0.256914,0.30654,0.322114,0.327671,0.357379,10.878613,"{'bm25_k1': 4.0389018496678375, 'bm25_b': 0.74..."
0,ALS_factors=100,power,20,0.253657,0.305791,0.321442,0.325289,0.354485,10.928963,{'power_p': 1.291457459532728}
0,ALS_factors=100,tfidf,1,0.256479,0.303988,0.319119,0.323076,0.350788,10.859503,{}
0,ALS_factors=100,log,1,0.231724,0.297131,0.30672,0.318096,0.342625,10.871518,{}
0,ALS_factors=100,confidence,20,0.241172,0.272446,0.294024,0.289287,0.321885,10.880188,{'conf_alpha': 2.5438808701236457}
