In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="100K"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(943, 1682, 100000)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (943, 1682), Val Shape: (943, 1682), Test Shape: (943, 1682)


In [None]:
results_folder = "results/movielens_100k_als"
results_filename = "movielens_100k_als_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, regularization=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, regularization=10, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:00,951] A new study created in memory with name: no-name-c6b01e48-abb7-45a1-87a9-a222758a98f0


Running optimization for ALS_factors=10 with no_weighting...


  check_blas_config()
[I 2026-02-08 15:14:01,259] Trial 0 finished with value: 0.27236465394945075 and parameters: {}. Best is trial 0 with value: 0.27236465394945075.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,no_weighting,1,0.272365,0.308936,0.327755,0.332179,0.368427,0.1744,{}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:01,869] A new study created in memory with name: no-name-c96cc735-c32b-4002-97f8-e8ec6508e792


Running optimization for ALS_factors=10 with bm25...


[I 2026-02-08 15:14:02,143] Trial 0 finished with value: 0.25425934457944743 and parameters: {'bm25_k1': 374.60266483547775, 'bm25_b': 0.9507143064099162}. Best is trial 0 with value: 0.25425934457944743.
[I 2026-02-08 15:14:02,481] Trial 1 finished with value: 0.26086253099349127 and parameters: {'bm25_k1': 732.0207424172239, 'bm25_b': 0.5986584841970366}. Best is trial 1 with value: 0.26086253099349127.
[I 2026-02-08 15:14:02,791] Trial 2 finished with value: 0.25889710175837816 and parameters: {'bm25_k1': 156.10303857839227, 'bm25_b': 0.15599452033620265}. Best is trial 1 with value: 0.26086253099349127.
[I 2026-02-08 15:14:03,090] Trial 3 finished with value: 0.25737397982012844 and parameters: {'bm25_k1': 58.17780380698264, 'bm25_b': 0.8661761457749352}. Best is trial 1 with value: 0.26086253099349127.
[I 2026-02-08 15:14:03,439] Trial 4 finished with value: 0.2590693769204739 and parameters: {'bm25_k1': 601.1549002420345, 'bm25_b': 0.7080725777960455}. Best is trial 1 with value:

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,bm25,20,0.263774,0.294402,0.316046,0.318819,0.35694,0.166406,"{'bm25_k1': 150.45291532812632, 'bm25_b': 0.51..."


In [9]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)


[I 2026-02-08 15:14:08,636] A new study created in memory with name: no-name-ca4fddcf-6f76-43d7-bb87-bef2b7eab356


Running optimization for ALS_factors=10 with tfidf...


[I 2026-02-08 15:14:08,953] Trial 0 finished with value: 0.26450041335006436 and parameters: {}. Best is trial 0 with value: 0.26450041335006436.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,tfidf,1,0.2645,0.300131,0.320538,0.321458,0.359782,0.18894,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:09,583] A new study created in memory with name: no-name-79c22a75-cc5c-4be6-9c73-d6a817b7b188


Running optimization for ALS_factors=10 with log...


[I 2026-02-08 15:14:09,878] Trial 0 finished with value: 0.2714375524158216 and parameters: {}. Best is trial 0 with value: 0.2714375524158216.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,log,1,0.271438,0.312291,0.330202,0.338281,0.373046,0.169274,{}


In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:10,484] A new study created in memory with name: no-name-aed2ce60-8139-46c4-873c-2eab0abd29e2


Running optimization for ALS_factors=10 with confidence...


[I 2026-02-08 15:14:10,783] Trial 0 finished with value: 0.10913104808201905 and parameters: {'conf_alpha': 56.80647770825701}. Best is trial 0 with value: 0.10913104808201905.
[I 2026-02-08 15:14:11,105] Trial 1 finished with value: 0.07963767557848277 and parameters: {'conf_alpha': 142.6564316550775}. Best is trial 0 with value: 0.10913104808201905.
[I 2026-02-08 15:14:11,426] Trial 2 finished with value: 0.08853908694588895 and parameters: {'conf_alpha': 110.06709732989935}. Best is trial 0 with value: 0.10913104808201905.
[I 2026-02-08 15:14:11,739] Trial 3 finished with value: 0.0965784325176133 and parameters: {'conf_alpha': 90.20011414535846}. Best is trial 0 with value: 0.10913104808201905.
[I 2026-02-08 15:14:12,041] Trial 4 finished with value: 0.14633629015652536 and parameters: {'conf_alpha': 24.246777425923042}. Best is trial 4 with value: 0.14633629015652536.
[I 2026-02-08 15:14:12,372] Trial 5 finished with value: 0.15059458769699474 and parameters: {'conf_alpha': 24.243

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,confidence,20,0.267608,0.30209,0.32216,0.324592,0.360256,0.166625,{'conf_alpha': 1.2755019674220032}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:17,101] A new study created in memory with name: no-name-90f566b4-5b60-462e-834b-896ff69c4450


Running optimization for ALS_factors=10 with power...


[I 2026-02-08 15:14:17,408] Trial 0 finished with value: 0.27130522576796373 and parameters: {'power_p': 0.6243561663863074}. Best is trial 0 with value: 0.27130522576796373.
[I 2026-02-08 15:14:17,764] Trial 1 finished with value: 0.26567271218443983 and parameters: {'power_p': 1.4310000289738827}. Best is trial 0 with value: 0.27130522576796373.
[I 2026-02-08 15:14:18,057] Trial 2 finished with value: 0.27098563003491694 and parameters: {'power_p': 1.1247915185359671}. Best is trial 0 with value: 0.27130522576796373.
[I 2026-02-08 15:14:18,377] Trial 3 finished with value: 0.2728117953552435 and parameters: {'power_p': 0.9381218778758512}. Best is trial 3 with value: 0.2728117953552435.
[I 2026-02-08 15:14:18,684] Trial 4 finished with value: 0.2712007452861822 and parameters: {'power_p': 0.3184260966194111}. Best is trial 3 with value: 0.2728117953552435.
[I 2026-02-08 15:14:19,002] Trial 5 finished with value: 0.2712007452861822 and parameters: {'power_p': 0.31839232847068366}. Bes

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,power,20,0.273823,0.312029,0.328649,0.335972,0.369493,0.172378,{'power_p': 0.7936779087992564}


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:25,280] A new study created in memory with name: no-name-f2cd18c0-6855-4978-b182-6d5fda72d32b


Running optimization for ALS_factors=10 with normalized...


[I 2026-02-08 15:14:25,578] Trial 0 finished with value: 0.13360704270518714 and parameters: {}. Best is trial 0 with value: 0.13360704270518714.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,normalized,1,0.133607,0.141368,0.154301,0.163286,0.194102,0.146238,{}


In [14]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:26,128] A new study created in memory with name: no-name-856b1f84-81cd-483b-a3ae-e49e451b7e88


Running optimization for ALS_factors=10 with pmi...


[I 2026-02-08 15:14:26,396] Trial 0 finished with value: 0.2703587976166588 and parameters: {}. Best is trial 0 with value: 0.2703587976166588.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,pmi,1,0.270359,0.31259,0.330585,0.326076,0.358006,0.168725,{}


In [15]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:26,979] A new study created in memory with name: no-name-adfbb46f-1f8a-4ac3-8f31-4c4c21803d46


Running optimization for ALS_factors=10 with robust_user_centric...


[I 2026-02-08 15:14:27,259] Trial 0 finished with value: 0.26850871824051764 and parameters: {'scale_factor': 3.807947176588889}. Best is trial 0 with value: 0.26850871824051764.
[I 2026-02-08 15:14:27,603] Trial 1 finished with value: 0.26580247023254466 and parameters: {'scale_factor': 9.51207163345817}. Best is trial 0 with value: 0.26850871824051764.
[I 2026-02-08 15:14:27,877] Trial 2 finished with value: 0.2667898943076209 and parameters: {'scale_factor': 7.34674002393291}. Best is trial 0 with value: 0.26850871824051764.
[I 2026-02-08 15:14:28,226] Trial 3 finished with value: 0.26746793540386177 and parameters: {'scale_factor': 6.026718993550662}. Best is trial 0 with value: 0.26850871824051764.
[I 2026-02-08 15:14:28,530] Trial 4 finished with value: 0.26672588813639614 and parameters: {'scale_factor': 1.6445845403801216}. Best is trial 0 with value: 0.26850871824051764.
[I 2026-02-08 15:14:28,839] Trial 5 finished with value: 0.2667240327508851 and parameters: {'scale_factor'

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric,20,0.269143,0.312328,0.328071,0.336302,0.368072,0.181727,{'scale_factor': 2.6999412614048968}


In [16]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:33,555] A new study created in memory with name: no-name-f2991dba-bb41-47c7-939c-1264401bcad5


Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


[I 2026-02-08 15:14:33,882] Trial 0 finished with value: 0.2585678491434674 and parameters: {'lower_q': 19.9816047538945, 'upper_q': 93.02857225639664}. Best is trial 0 with value: 0.2585678491434674.
  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 15:14:34,170] Trial 1 finished with value: 0.2616614745644782 and parameters: {'lower_q': 34.2797576724562, 'upper_q': 78.94633936788146}. Best is trial 1 with value: 0.2616614745644782.
[I 2026-02-08 15:14:34,448] Trial 2 finished with value: 0.2586642204799025 and parameters: {'lower_q': 11.24074561769746, 'upper_q': 61.23978081344811}. Best is trial 1 with value: 0.2616614745644782.
[I 2026-02-08 15:14:34,733] Trial 3 finished with value: 0.26019722381326693 and parameters: {'lower_q': 7.323344486727978, 'upper_q': 89.6470458309974}. Best is trial 1 with value: 0.2616614745644782.
[I 2026-02-08 15:14:35,042] Trial 4 finished with value: 0.2592889359005378 and parameters: {'lower_q': 29.04460046972835, 'upper_q': 83.32290311184181}. 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric_weight_v2,20,0.261661,0.301697,0.315205,0.324262,0.35469,0.171706,"{'lower_q': 34.2797576724562, 'upper_q': 78.94..."


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:40,241] A new study created in memory with name: no-name-57abdcb5-7d51-4744-a48b-27c8ad582183


Running optimization for ALS_factors=10 with sigmoid_propensity...


[I 2026-02-08 15:14:40,552] Trial 0 finished with value: 0.2733048027626596 and parameters: {'p': 1.9352465823520764, 'beta': 0.9507143064099162}. Best is trial 0 with value: 0.2733048027626596.
[I 2026-02-08 15:14:40,853] Trial 1 finished with value: 0.26528891855032516 and parameters: {'p': 3.6867703148758855, 'beta': 0.5986584841970366}. Best is trial 0 with value: 0.2733048027626596.
[I 2026-02-08 15:14:41,142] Trial 2 finished with value: 0.2677814431653246 and parameters: {'p': 0.864491338167939, 'beta': 0.15599452033620265}. Best is trial 0 with value: 0.2733048027626596.
[I 2026-02-08 15:14:41,434] Trial 3 finished with value: 0.27448673958865427 and parameters: {'p': 0.38460969962417735, 'beta': 0.8661761457749352}. Best is trial 3 with value: 0.27448673958865427.
[I 2026-02-08 15:14:41,738] Trial 4 finished with value: 0.26883228250576796 and parameters: {'p': 3.0454635575417233, 'beta': 0.7080725777960455}. Best is trial 3 with value: 0.27448673958865427.
[I 2026-02-08 15:14

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,sigmoid_propensity,20,0.274759,0.31555,0.331303,0.342405,0.374822,0.166376,"{'p': 0.7936602089465885, 'beta': 0.8296158048..."


In [18]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:46,799] A new study created in memory with name: no-name-599179b3-18a4-4046-8c79-b1b4efbeafc5


Running optimization for ALS_factors=10 with power_lift...


[I 2026-02-08 15:14:47,104] Trial 0 finished with value: 0.2571326150230608 and parameters: {'p': 0.6243561663863074}. Best is trial 0 with value: 0.2571326150230608.
[I 2026-02-08 15:14:47,394] Trial 1 finished with value: 0.12065786277700574 and parameters: {'p': 1.4310000289738827}. Best is trial 0 with value: 0.2571326150230608.
[I 2026-02-08 15:14:47,653] Trial 2 finished with value: 0.17015022714561556 and parameters: {'p': 1.1247915185359671}. Best is trial 0 with value: 0.2571326150230608.
[I 2026-02-08 15:14:47,986] Trial 3 finished with value: 0.2043438446126156 and parameters: {'p': 0.9381218778758512}. Best is trial 0 with value: 0.2571326150230608.
[I 2026-02-08 15:14:48,303] Trial 4 finished with value: 0.2781036967582215 and parameters: {'p': 0.3184260966194111}. Best is trial 4 with value: 0.2781036967582215.
[I 2026-02-08 15:14:48,587] Trial 5 finished with value: 0.2781014416759631 and parameters: {'p': 0.31839232847068366}. Best is trial 4 with value: 0.2781036967582

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,power_lift,20,0.278104,0.325115,0.339257,0.347023,0.375178,0.167367,{'p': 0.3184260966194111}


In [19]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:53,265] A new study created in memory with name: no-name-65dbc4f6-2186-4d9e-a5ce-3338fb88b425


Running optimization for ALS_factors=100 with no_weighting...


[I 2026-02-08 15:14:53,658] Trial 0 finished with value: 0.2472544006159351 and parameters: {}. Best is trial 0 with value: 0.2472544006159351.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,no_weighting,1,0.247254,0.29541,0.318886,0.307768,0.341189,0.252736,{}


In [20]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:14:54,607] A new study created in memory with name: no-name-8189f11a-6a49-4b24-84e8-02df7d4f374a


Running optimization for ALS_factors=100 with bm25...


[I 2026-02-08 15:14:55,043] Trial 0 finished with value: 0.2189275297129027 and parameters: {'bm25_k1': 374.60266483547775, 'bm25_b': 0.9507143064099162}. Best is trial 0 with value: 0.2189275297129027.
[I 2026-02-08 15:14:55,446] Trial 1 finished with value: 0.22419453635856404 and parameters: {'bm25_k1': 732.0207424172239, 'bm25_b': 0.5986584841970366}. Best is trial 1 with value: 0.22419453635856404.
[I 2026-02-08 15:14:55,827] Trial 2 finished with value: 0.23255364341628995 and parameters: {'bm25_k1': 156.10303857839227, 'bm25_b': 0.15599452033620265}. Best is trial 2 with value: 0.23255364341628995.
[I 2026-02-08 15:14:56,243] Trial 3 finished with value: 0.22234990989339987 and parameters: {'bm25_k1': 58.17780380698264, 'bm25_b': 0.8661761457749352}. Best is trial 2 with value: 0.23255364341628995.
[I 2026-02-08 15:14:56,639] Trial 4 finished with value: 0.22257932812161388 and parameters: {'bm25_k1': 601.1549002420345, 'bm25_b': 0.7080725777960455}. Best is trial 2 with value: 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,bm25,20,0.235701,0.275688,0.298869,0.285997,0.31774,0.216398,"{'bm25_k1': 984.8414327079532, 'bm25_b': 0.010..."


In [21]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)


[I 2026-02-08 15:15:03,527] A new study created in memory with name: no-name-15da6436-e53a-4ea8-b4cd-83c5316c2eb2


Running optimization for ALS_factors=100 with tfidf...


[I 2026-02-08 15:15:03,962] Trial 0 finished with value: 0.24207471890741755 and parameters: {}. Best is trial 0 with value: 0.24207471890741755.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,tfidf,1,0.242075,0.289171,0.313828,0.301831,0.335268,0.221366,{}


In [22]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:04,845] A new study created in memory with name: no-name-1ca6ff04-f839-4974-b3eb-6c28943e4fdb


Running optimization for ALS_factors=100 with log...


[I 2026-02-08 15:15:05,247] Trial 0 finished with value: 0.2694546916941212 and parameters: {}. Best is trial 0 with value: 0.2694546916941212.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log,1,0.269455,0.327503,0.347368,0.346198,0.377428,0.216292,{}


In [23]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:06,158] A new study created in memory with name: no-name-77cef3e1-617c-4cec-ba94-6b4c58980d67


Running optimization for ALS_factors=100 with confidence...


[I 2026-02-08 15:15:06,582] Trial 0 finished with value: 0.19405459138712305 and parameters: {'conf_alpha': 56.80647770825701}. Best is trial 0 with value: 0.19405459138712305.
[I 2026-02-08 15:15:06,974] Trial 1 finished with value: 0.17951569320919106 and parameters: {'conf_alpha': 142.6564316550775}. Best is trial 0 with value: 0.19405459138712305.
[I 2026-02-08 15:15:07,350] Trial 2 finished with value: 0.18285106329612022 and parameters: {'conf_alpha': 110.06709732989935}. Best is trial 0 with value: 0.19405459138712305.
[I 2026-02-08 15:15:07,760] Trial 3 finished with value: 0.18661526848656562 and parameters: {'conf_alpha': 90.20011414535846}. Best is trial 0 with value: 0.19405459138712305.
[I 2026-02-08 15:15:08,136] Trial 4 finished with value: 0.20110780387656987 and parameters: {'conf_alpha': 24.246777425923042}. Best is trial 4 with value: 0.20110780387656987.
[I 2026-02-08 15:15:08,534] Trial 5 finished with value: 0.20000246183068096 and parameters: {'conf_alpha': 24.24

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,confidence,20,0.238348,0.283385,0.310228,0.292594,0.32982,0.255573,{'conf_alpha': 1.2755019674220032}


In [24]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:15,068] A new study created in memory with name: no-name-d9ed78a1-edab-42fa-bb27-df49d9135889


Running optimization for ALS_factors=100 with power...


[I 2026-02-08 15:15:15,477] Trial 0 finished with value: 0.25980085241756257 and parameters: {'power_p': 0.6243561663863074}. Best is trial 0 with value: 0.25980085241756257.
[I 2026-02-08 15:15:15,873] Trial 1 finished with value: 0.2376067120504491 and parameters: {'power_p': 1.4310000289738827}. Best is trial 0 with value: 0.25980085241756257.
[I 2026-02-08 15:15:16,283] Trial 2 finished with value: 0.24295219145433292 and parameters: {'power_p': 1.1247915185359671}. Best is trial 0 with value: 0.25980085241756257.
[I 2026-02-08 15:15:16,682] Trial 3 finished with value: 0.24984264497153122 and parameters: {'power_p': 0.9381218778758512}. Best is trial 0 with value: 0.25980085241756257.
[I 2026-02-08 15:15:17,102] Trial 4 finished with value: 0.27024283174975927 and parameters: {'power_p': 0.3184260966194111}. Best is trial 4 with value: 0.27024283174975927.
[I 2026-02-08 15:15:17,516] Trial 5 finished with value: 0.2702485100601984 and parameters: {'power_p': 0.31839232847068366}. 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power,20,0.276468,0.340469,0.356622,0.360877,0.385599,0.241904,{'power_p': 0.10258860908987122}


In [25]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:23,982] A new study created in memory with name: no-name-545a646e-80cd-4aca-a9c5-1d105fed83d6


Running optimization for ALS_factors=100 with normalized...


[I 2026-02-08 15:15:24,348] Trial 0 finished with value: 0.13360704270518714 and parameters: {}. Best is trial 0 with value: 0.13360704270518714.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,normalized,1,0.133607,0.141368,0.154301,0.163286,0.194102,0.184903,{}


In [26]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:25,248] A new study created in memory with name: no-name-1fa8356e-974e-4ce9-b4ea-947eba97403c


Running optimization for ALS_factors=100 with pmi...


[I 2026-02-08 15:15:25,661] Trial 0 finished with value: 0.25503041773054685 and parameters: {}. Best is trial 0 with value: 0.25503041773054685.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,pmi,1,0.25503,0.306554,0.326158,0.317335,0.341663,0.227065,{}


In [27]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:26,600] A new study created in memory with name: no-name-e78d400d-2cf6-461a-b335-ddf5dbb6261a


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-02-08 15:15:27,007] Trial 0 finished with value: 0.2645799568921726 and parameters: {'scale_factor': 3.807947176588889}. Best is trial 0 with value: 0.2645799568921726.
[I 2026-02-08 15:15:27,392] Trial 1 finished with value: 0.2418449588027471 and parameters: {'scale_factor': 9.51207163345817}. Best is trial 0 with value: 0.2645799568921726.
[I 2026-02-08 15:15:27,788] Trial 2 finished with value: 0.24855915161953773 and parameters: {'scale_factor': 7.34674002393291}. Best is trial 0 with value: 0.2645799568921726.
[I 2026-02-08 15:15:28,174] Trial 3 finished with value: 0.25321508743340315 and parameters: {'scale_factor': 6.026718993550662}. Best is trial 0 with value: 0.2645799568921726.
[I 2026-02-08 15:15:28,565] Trial 4 finished with value: 0.28049617386335185 and parameters: {'scale_factor': 1.6445845403801216}. Best is trial 4 with value: 0.28049617386335185.
[I 2026-02-08 15:15:28,981] Trial 5 finished with value: 0.2805693727496052 and parameters: {'scale_factor': 1.6

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric,20,0.282612,0.34531,0.359464,0.367145,0.393534,0.21623,{'scale_factor': 1.9548472736312617}


In [28]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:35,349] A new study created in memory with name: no-name-c146d7c2-393f-44c4-9575-39b2dec42415


Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


[I 2026-02-08 15:15:35,764] Trial 0 finished with value: 0.26591745159511665 and parameters: {'lower_q': 19.9816047538945, 'upper_q': 93.02857225639664}. Best is trial 0 with value: 0.26591745159511665.
  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-02-08 15:15:36,187] Trial 1 finished with value: 0.2657911789316968 and parameters: {'lower_q': 34.2797576724562, 'upper_q': 78.94633936788146}. Best is trial 0 with value: 0.26591745159511665.
[I 2026-02-08 15:15:36,543] Trial 2 finished with value: 0.26548010936241345 and parameters: {'lower_q': 11.24074561769746, 'upper_q': 61.23978081344811}. Best is trial 0 with value: 0.26591745159511665.
[I 2026-02-08 15:15:36,965] Trial 3 finished with value: 0.26513707700924577 and parameters: {'lower_q': 7.323344486727978, 'upper_q': 89.6470458309974}. Best is trial 0 with value: 0.26591745159511665.
[I 2026-02-08 15:15:37,344] Trial 4 finished with value: 0.2663246184241595 and parameters: {'lower_q': 29.04460046972835, 'upper_q': 83.32290311184

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric_weight_v2,20,0.266325,0.326791,0.338397,0.345374,0.372454,0.214991,"{'lower_q': 29.04460046972835, 'upper_q': 83.3..."


In [29]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:44,187] A new study created in memory with name: no-name-45d31a3a-9947-4549-b62c-a6c19cca6c69


Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-02-08 15:15:44,602] Trial 0 finished with value: 0.2680768995445226 and parameters: {'p': 1.9352465823520764, 'beta': 0.9507143064099162}. Best is trial 0 with value: 0.2680768995445226.
[I 2026-02-08 15:15:44,995] Trial 1 finished with value: 0.27535860411261975 and parameters: {'p': 3.6867703148758855, 'beta': 0.5986584841970366}. Best is trial 1 with value: 0.27535860411261975.
[I 2026-02-08 15:15:45,400] Trial 2 finished with value: 0.2798692617041042 and parameters: {'p': 0.864491338167939, 'beta': 0.15599452033620265}. Best is trial 2 with value: 0.2798692617041042.
[I 2026-02-08 15:15:45,810] Trial 3 finished with value: 0.27098979165206855 and parameters: {'p': 0.38460969962417735, 'beta': 0.8661761457749352}. Best is trial 2 with value: 0.2798692617041042.
[I 2026-02-08 15:15:46,224] Trial 4 finished with value: 0.2717239729024352 and parameters: {'p': 3.0454635575417233, 'beta': 0.7080725777960455}. Best is trial 2 with value: 0.2798692617041042.
[I 2026-02-08 15:15:4

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,sigmoid_propensity,20,0.284796,0.348182,0.362325,0.371103,0.396495,0.215761,"{'p': 2.2165305913463675, 'beta': 0.2912291401..."


In [30]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-08 15:15:53,159] A new study created in memory with name: no-name-191b2cdb-298c-4b5a-aaa0-a96903807254


Running optimization for ALS_factors=100 with power_lift...


[I 2026-02-08 15:15:53,566] Trial 0 finished with value: 0.22955972061894467 and parameters: {'p': 0.6243561663863074}. Best is trial 0 with value: 0.22955972061894467.
[I 2026-02-08 15:15:53,947] Trial 1 finished with value: 0.18977273215455273 and parameters: {'p': 1.4310000289738827}. Best is trial 0 with value: 0.22955972061894467.
[I 2026-02-08 15:15:54,354] Trial 2 finished with value: 0.20027416658825473 and parameters: {'p': 1.1247915185359671}. Best is trial 0 with value: 0.22955972061894467.
[I 2026-02-08 15:15:54,799] Trial 3 finished with value: 0.2117695026612302 and parameters: {'p': 0.9381218778758512}. Best is trial 0 with value: 0.22955972061894467.
[I 2026-02-08 15:15:55,194] Trial 4 finished with value: 0.2619514005702197 and parameters: {'p': 0.3184260966194111}. Best is trial 4 with value: 0.2619514005702197.
[I 2026-02-08 15:15:55,592] Trial 5 finished with value: 0.2619527923686673 and parameters: {'p': 0.31839232847068366}. Best is trial 5 with value: 0.26195279

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power_lift,20,0.278785,0.33859,0.354887,0.359723,0.385955,0.216009,{'p': 0.11118613978140188}


In [31]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,sigmoid_propensity,20,0.284796,0.348182,0.362325,0.371103,0.396495,0.215761,"{'p': 2.2165305913463675, 'beta': 0.2912291401..."
0,ALS_factors=100,robust_user_centric,20,0.282612,0.34531,0.359464,0.367145,0.393534,0.21623,{'scale_factor': 1.9548472736312617}
0,ALS_factors=100,power,20,0.276468,0.340469,0.356622,0.360877,0.385599,0.241904,{'power_p': 0.10258860908987122}
0,ALS_factors=100,power_lift,20,0.278785,0.33859,0.354887,0.359723,0.385955,0.216009,{'p': 0.11118613978140188}
0,ALS_factors=100,log,1,0.269455,0.327503,0.347368,0.346198,0.377428,0.216292,{}
0,ALS_factors=10,power_lift,20,0.278104,0.325115,0.339257,0.347023,0.375178,0.167367,{'p': 0.3184260966194111}
0,ALS_factors=100,robust_user_centric_weight_v2,20,0.266325,0.326791,0.338397,0.345374,0.372454,0.214991,"{'lower_q': 29.04460046972835, 'upper_q': 83.3..."
0,ALS_factors=10,sigmoid_propensity,20,0.274759,0.31555,0.331303,0.342405,0.374822,0.166376,"{'p': 0.7936602089465885, 'beta': 0.8296158048..."
0,ALS_factors=10,pmi,1,0.270359,0.31259,0.330585,0.326076,0.358006,0.168725,{}
0,ALS_factors=10,log,1,0.271438,0.312291,0.330202,0.338281,0.373046,0.169274,{}
