In [1]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, confidence_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, precision_at_k, ndcg_at_k


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
lastfm_df = (
    pd.read_csv(
        "/home/coder/projects/rec-sys-research/data/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv",
        sep="\t",
        header=None,
        usecols=[0, 2, 3],
        names=['user_id', 'item_id', 'play_count'],
    )
    .loc[:, ['user_id', 'item_id', 'play_count']]
    .dropna()
    .rename(columns={'play_count': 'target'})
)
lastfm_df['user_id'].nunique(), lastfm_df['item_id'].nunique(), lastfm_df.shape[0]

(358868, 292363, 17535451)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    lastfm_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (358868, 292363), Val Shape: (358868, 292363), Test Shape: (358868, 292363)


In [None]:
results_folder = "results/lastfm_360k"
results_filename = "lastfm_360k_experiment_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "confidence",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "confidence":
            weighted = confidence_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "confidence":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)
        
        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ndcg_at_k(model, train_mat, val_mat, K=20, show_progress=False)

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "confidence", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    test_ndcg_10 = ndcg_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_precision_10 = precision_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    test_ndcg_20 = ndcg_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)
    test_precision_20 = precision_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": test_ndcg_10,
        "Test NDCG@20": test_ndcg_20,
        "Test Precision@10": test_precision_10,
        "Test Precision@20": test_precision_20,
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [None]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

# Execution of all experiments
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="confidence", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 09:25:14,851] A new study created in memory with name: no-name-af5ef919-2b93-421d-a4fd-849484d241b9


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-01-30 09:25:25,418] Trial 0 finished with value: 0.09808161938868308 and parameters: {}. Best is trial 0 with value: 0.09808161938868308.
[I 2026-01-30 09:25:42,413] A new study created in memory with name: no-name-499a5779-4433-46d4-87e8-647884361337


Running optimization for ALS_factors=10 with bm25...


[I 2026-01-30 09:26:21,753] Trial 1 finished with value: 0.09645463137925213 and parameters: {'bm25_k1': 225.48082597129576, 'bm25_b': 0.5287138717868045}. Best is trial 1 with value: 0.09645463137925213.
[I 2026-01-30 09:26:21,814] Trial 3 finished with value: 0.11396118964270216 and parameters: {'bm25_k1': 23.043610429295562, 'bm25_b': 0.08553331531117592}. Best is trial 3 with value: 0.11396118964270216.
[I 2026-01-30 09:26:22,187] Trial 0 finished with value: 0.09083202639244789 and parameters: {'bm25_k1': 390.3740339836404, 'bm25_b': 0.8429585315007274}. Best is trial 3 with value: 0.11396118964270216.
[I 2026-01-30 09:26:22,662] Trial 2 finished with value: 0.08696873586782118 and parameters: {'bm25_k1': 730.9996598439025, 'bm25_b': 0.3629667005323126}. Best is trial 3 with value: 0.11396118964270216.
[I 2026-01-30 09:27:00,568] Trial 4 finished with value: 0.08377728399514059 and parameters: {'bm25_k1': 999.0718547570991, 'bm25_b': 0.9304015463815763}. Best is trial 3 with value

Running optimization for ALS_factors=10 with tfidf...


[I 2026-01-30 09:29:28,352] Trial 0 finished with value: 0.1146309868035351 and parameters: {}. Best is trial 0 with value: 0.1146309868035351.
[I 2026-01-30 09:29:46,019] A new study created in memory with name: no-name-4233da7f-b9ae-433b-8f51-5ba6212e0b60


Running optimization for ALS_factors=10 with log...


[I 2026-01-30 09:29:56,637] Trial 0 finished with value: 0.10210944790580578 and parameters: {}. Best is trial 0 with value: 0.10210944790580578.
[I 2026-01-30 09:30:14,119] A new study created in memory with name: no-name-d48e733b-7631-465d-a79f-36b10077d074


Running optimization for ALS_factors=10 with confidence...


[I 2026-01-30 09:30:53,878] Trial 2 finished with value: 0.07350820068458522 and parameters: {'conf_alpha': 42.33838823372816}. Best is trial 2 with value: 0.07350820068458522.
[I 2026-01-30 09:30:53,900] Trial 1 finished with value: 0.05774193219206872 and parameters: {'conf_alpha': 70.98950379116994}. Best is trial 2 with value: 0.07350820068458522.
[I 2026-01-30 09:30:53,987] Trial 3 finished with value: 0.11447390417293594 and parameters: {'conf_alpha': 3.4723751724861236}. Best is trial 3 with value: 0.11447390417293594.
[I 2026-01-30 09:30:54,777] Trial 0 finished with value: 0.11321030775236014 and parameters: {'conf_alpha': 4.807304457674045}. Best is trial 3 with value: 0.11447390417293594.
[I 2026-01-30 09:31:33,130] Trial 6 finished with value: 0.043196260985178224 and parameters: {'conf_alpha': 114.93416101961503}. Best is trial 3 with value: 0.11447390417293594.
[I 2026-01-30 09:31:33,385] Trial 5 finished with value: 0.07322683406489643 and parameters: {'conf_alpha': 42.7

Running optimization for ALS_factors=10 with power...


[I 2026-01-30 09:34:29,148] Trial 0 finished with value: 0.10667022772820259 and parameters: {'power_p': 0.8870287414332618}. Best is trial 0 with value: 0.10667022772820259.
[I 2026-01-30 09:34:29,170] Trial 1 finished with value: 0.10830827288019904 and parameters: {'power_p': 0.8563317979394065}. Best is trial 1 with value: 0.10830827288019904.
[I 2026-01-30 09:34:29,529] Trial 3 finished with value: 0.06351875931855616 and parameters: {'power_p': 1.2855158061510616}. Best is trial 1 with value: 0.10830827288019904.
[I 2026-01-30 09:34:32,302] Trial 2 finished with value: 0.11239626472652683 and parameters: {'power_p': 0.6938791834954882}. Best is trial 2 with value: 0.11239626472652683.
[I 2026-01-30 09:35:07,539] Trial 4 finished with value: 0.11019462873652298 and parameters: {'power_p': 0.8131235114248311}. Best is trial 2 with value: 0.11239626472652683.
[I 2026-01-30 09:35:07,916] Trial 5 finished with value: 0.09227084439569026 and parameters: {'power_p': 1.0586126065280625}.

Running optimization for ALS_factors=10 with normalized...


[I 2026-01-30 09:37:34,484] Trial 0 finished with value: 0.05012925938321616 and parameters: {}. Best is trial 0 with value: 0.05012925938321616.
[I 2026-01-30 09:37:52,213] A new study created in memory with name: no-name-bde56cd8-ef1f-4b70-90a9-e0a3f37f97f9


Running optimization for ALS_factors=10 with pmi...


[I 2026-01-30 09:38:03,241] Trial 0 finished with value: 0.10324318466406207 and parameters: {}. Best is trial 0 with value: 0.10324318466406207.
[I 2026-01-30 09:38:21,224] A new study created in memory with name: no-name-933bc0b1-efcd-4a55-8ff5-8da1d1331939


Running optimization for ALS_factors=10 with robust_user_centric...


[I 2026-01-30 09:42:56,799] Trial 0 finished with value: 0.08547782638469505 and parameters: {'scale_factor': 1.4182502870876916}. Best is trial 0 with value: 0.08547782638469505.
[I 2026-01-30 09:42:58,281] Trial 3 finished with value: 0.09528779818527776 and parameters: {'scale_factor': 3.6456394258394362}. Best is trial 3 with value: 0.09528779818527776.
[I 2026-01-30 09:42:58,789] Trial 1 finished with value: 0.10264422496993518 and parameters: {'scale_factor': 9.07697856748529}. Best is trial 1 with value: 0.10264422496993518.
[I 2026-01-30 09:43:03,296] Trial 2 finished with value: 0.05471041411490816 and parameters: {'scale_factor': 0.4218659044936013}. Best is trial 1 with value: 0.10264422496993518.
[I 2026-01-30 09:47:32,397] Trial 4 finished with value: 0.09152540317933021 and parameters: {'scale_factor': 2.3969404656359985}. Best is trial 1 with value: 0.10264422496993518.
[I 2026-01-30 09:47:32,514] Trial 5 finished with value: 0.10179194093325622 and parameters: {'scale_f

Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-30 10:06:52,238] Trial 1 finished with value: 0.07497702148224367 and parameters: {'lower_q': 17.054684474727154, 'upper_q': 88.75828970088341}. Best is trial 1 with value: 0.07497702148224367.
[I 2026-01-30 10:06:55,517] Trial 3 finished with value: 0.07512645065702193 and parameters: {'lower_q': 16.889947575148724, 'upper_q': 86.55849972121413}. Best is trial 3 with value: 0.07512645065702193.
[I 2026-01-30 10:06:56,069] Trial 2 finished with value: 0.07467974826296521 and parameters: {'lower_q': 37.83213114104956, 'upper_q': 94.72632974475508}. Best is trial 3 with value: 0.07512645065702193.
[I 2026-01-30 10:07:02,538] Trial 0 finished with value: 0.07543340575300593 and parameters: {'lower_q': 37.49866032450562, 'upper_q': 84.19654698523715}. Best is trial 0 with value: 0.07543340575300593.
[I 2026-01-30 10:11:17,485] Trial 4 finished with value: 0.07545638013718374 and parameters: {'lower_q': 15.316824786816197, 'upper_q': 80.426

Running optimization for ALS_factors=10 with sigmoid_propensity...


[I 2026-01-30 10:25:55,476] Trial 0 finished with value: 0.09070460993271304 and parameters: {'p': 2.6409149955034046, 'beta': 0.1608085873047289}. Best is trial 0 with value: 0.09070460993271304.
[I 2026-01-30 10:25:55,792] Trial 3 finished with value: 0.10094162269150586 and parameters: {'p': 0.8064415679769084, 'beta': 0.9635278769639519}. Best is trial 3 with value: 0.10094162269150586.
[I 2026-01-30 10:25:55,913] Trial 1 finished with value: 0.09036387168510387 and parameters: {'p': 4.381397672869023, 'beta': 0.18618750426686959}. Best is trial 3 with value: 0.10094162269150586.
[I 2026-01-30 10:25:56,503] Trial 2 finished with value: 0.09137222734886385 and parameters: {'p': 3.9988433140702058, 'beta': 0.22502888419299583}. Best is trial 3 with value: 0.10094162269150586.
[I 2026-01-30 10:26:34,909] Trial 4 finished with value: 0.09897519036076431 and parameters: {'p': 2.5894810791033165, 'beta': 0.804469539681696}. Best is trial 3 with value: 0.10094162269150586.
[I 2026-01-30 1

Running optimization for ALS_factors=10 with power_lift...


[I 2026-01-30 10:29:31,130] Trial 1 finished with value: 0.09840643667446695 and parameters: {'p': 0.1962671404952741}. Best is trial 1 with value: 0.09840643667446695.
[I 2026-01-30 10:29:31,377] Trial 3 finished with value: 0.0763305445809577 and parameters: {'p': 0.508414931172264}. Best is trial 1 with value: 0.09840643667446695.
[I 2026-01-30 10:29:31,669] Trial 2 finished with value: 0.09949741569704201 and parameters: {'p': 0.24499007526353878}. Best is trial 2 with value: 0.09949741569704201.
[I 2026-01-30 10:29:32,484] Trial 0 finished with value: 0.010682942532240104 and parameters: {'p': 0.95035287912468}. Best is trial 2 with value: 0.09949741569704201.
[I 2026-01-30 10:30:10,668] Trial 4 finished with value: 2.5881599814219454e-05 and parameters: {'p': 1.333679099885325}. Best is trial 2 with value: 0.09949741569704201.
[I 2026-01-30 10:30:11,100] Trial 5 finished with value: 0.09955811776923734 and parameters: {'p': 0.25456267182413783}. Best is trial 5 with value: 0.0995

Running optimization for ALS_factors=100 with no_weighting...


[I 2026-01-30 10:32:48,659] Trial 0 finished with value: 0.131321756222197 and parameters: {}. Best is trial 0 with value: 0.131321756222197.
[I 2026-01-30 10:33:20,621] A new study created in memory with name: no-name-63adced9-7601-4f58-a199-24eb87ec5cf4


Running optimization for ALS_factors=100 with bm25...


[I 2026-01-30 10:34:41,049] Trial 2 finished with value: 0.10433326624727622 and parameters: {'bm25_k1': 362.8004977575351, 'bm25_b': 0.34130229611178364}. Best is trial 2 with value: 0.10433326624727622.
[I 2026-01-30 10:34:41,407] Trial 1 finished with value: 0.10035880185612596 and parameters: {'bm25_k1': 370.2812088759295, 'bm25_b': 0.7468545165189714}. Best is trial 2 with value: 0.10433326624727622.
[I 2026-01-30 10:34:41,799] Trial 3 finished with value: 0.10049439233833427 and parameters: {'bm25_k1': 574.1764021562484, 'bm25_b': 0.38702549642510187}. Best is trial 2 with value: 0.10433326624727622.




[I 2026-01-30 10:35:13,987] Trial 0 finished with value: 0.17104148728927204 and parameters: {'bm25_k1': 5.807540265751959, 'bm25_b': 0.8488361531754721}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:36:01,704] Trial 6 finished with value: 0.10973767475979215 and parameters: {'bm25_k1': 136.80461240309162, 'bm25_b': 0.9570032618853551}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:36:01,818] Trial 4 finished with value: 0.10037991754453696 and parameters: {'bm25_k1': 483.4495264907254, 'bm25_b': 0.5693567947535958}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:36:01,916] Trial 5 finished with value: 0.09502526606803706 and parameters: {'bm25_k1': 781.991850434663, 'bm25_b': 0.7462910696433608}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:36:35,544] Trial 7 finished with value: 0.11519962988638557 and parameters: {'bm25_k1': 137.3641307274724, 'bm25_b': 0.6221113843408214}. Best is trial 0 with value: 0



[I 2026-01-30 10:37:20,732] Trial 8 finished with value: 0.10642616142451355 and parameters: {'bm25_k1': 321.2481140839611, 'bm25_b': 0.05949917038546537}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:37:21,554] Trial 9 finished with value: 0.14094418804139433 and parameters: {'bm25_k1': 40.81471317976915, 'bm25_b': 0.6315561788530554}. Best is trial 0 with value: 0.17104148728927204.




[I 2026-01-30 10:37:44,316] Trial 10 finished with value: 0.09408343974526687 and parameters: {'bm25_k1': 522.894368243669, 'bm25_b': 0.8929551597870157}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:38:00,360] Trial 11 finished with value: 0.11532696500573271 and parameters: {'bm25_k1': 143.3116764818584, 'bm25_b': 0.5096736740136217}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:38:41,363] Trial 12 finished with value: 0.09578951293414362 and parameters: {'bm25_k1': 863.8274290647827, 'bm25_b': 0.666493511497507}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:38:41,541] Trial 13 finished with value: 0.0892498108901033 and parameters: {'bm25_k1': 977.7216894678162, 'bm25_b': 0.925945355878379}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:39:03,812] Trial 14 finished with value: 0.1225127252439474 and parameters: {'bm25_k1': 87.59579911489763, 'bm25_b': 0.7730824544790905}. Best is trial 0 with value: 0



[I 2026-01-30 10:39:19,459] Trial 15 finished with value: 0.15976554816154415 and parameters: {'bm25_k1': 16.01111296315748, 'bm25_b': 0.8012693957423095}. Best is trial 0 with value: 0.17104148728927204.
[I 2026-01-30 10:40:01,091] Trial 17 finished with value: 0.1718996025203015 and parameters: {'bm25_k1': 3.6915452916512734, 'bm25_b': 0.8011480654017011}. Best is trial 17 with value: 0.1718996025203015.
[I 2026-01-30 10:40:02,067] Trial 16 finished with value: 0.15517520124481943 and parameters: {'bm25_k1': 20.485686024082682, 'bm25_b': 0.8274386808102782}. Best is trial 17 with value: 0.1718996025203015.
[I 2026-01-30 10:40:03,964] Trial 18 finished with value: 0.14039519581874707 and parameters: {'bm25_k1': 43.524800295519356, 'bm25_b': 0.3501732467897569}. Best is trial 17 with value: 0.1718996025203015.
[I 2026-01-30 10:40:05,706] Trial 19 finished with value: 0.10259164476509278 and parameters: {'bm25_k1': 251.47947008000654, 'bm25_b': 0.8522347992135624}. Best is trial 17 with

Running optimization for ALS_factors=100 with tfidf...


[I 2026-01-30 10:40:59,271] Trial 0 finished with value: 0.16187185624065334 and parameters: {}. Best is trial 0 with value: 0.16187185624065334.
[I 2026-01-30 10:41:31,716] A new study created in memory with name: no-name-a161e34d-acd6-4836-84bc-9a0c269d7e49


Running optimization for ALS_factors=100 with log...


[I 2026-01-30 10:41:52,635] Trial 0 finished with value: 0.14743598210264203 and parameters: {}. Best is trial 0 with value: 0.14743598210264203.
[I 2026-01-30 10:42:24,966] A new study created in memory with name: no-name-324ea7db-bcfc-47d4-9a2d-1db581e5e3b0


Running optimization for ALS_factors=100 with confidence...


[I 2026-01-30 10:43:44,919] Trial 0 finished with value: 0.04569656325981567 and parameters: {'conf_alpha': 149.60243924624308}. Best is trial 0 with value: 0.04569656325981567.
[I 2026-01-30 10:43:45,579] Trial 2 finished with value: 0.1425744298894083 and parameters: {'conf_alpha': 5.996994494514863}. Best is trial 2 with value: 0.1425744298894083.
[I 2026-01-30 10:43:45,916] Trial 3 finished with value: 0.0772757558170844 and parameters: {'conf_alpha': 36.90874032350009}. Best is trial 2 with value: 0.1425744298894083.




[I 2026-01-30 10:44:18,228] Trial 1 finished with value: 0.06846924461623177 and parameters: {'conf_alpha': 51.15587742332924}. Best is trial 2 with value: 0.1425744298894083.
[I 2026-01-30 10:45:05,889] Trial 4 finished with value: 0.052016342418598364 and parameters: {'conf_alpha': 108.95255247086911}. Best is trial 2 with value: 0.1425744298894083.
[I 2026-01-30 10:45:06,488] Trial 5 finished with value: 0.08312389999906447 and parameters: {'conf_alpha': 30.86190481567694}. Best is trial 2 with value: 0.1425744298894083.
[I 2026-01-30 10:45:06,545] Trial 6 finished with value: 0.061259315437127426 and parameters: {'conf_alpha': 69.47095862601171}. Best is trial 2 with value: 0.1425744298894083.




[I 2026-01-30 10:45:40,527] Trial 7 finished with value: 0.1699253146311345 and parameters: {'conf_alpha': 1.449770352422187}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:46:26,051] Trial 9 finished with value: 0.061371423373381626 and parameters: {'conf_alpha': 69.12224334979567}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:46:26,229] Trial 8 finished with value: 0.062314402860519216 and parameters: {'conf_alpha': 65.9458651363438}. Best is trial 7 with value: 0.1699253146311345.




[I 2026-01-30 10:46:32,117] Trial 10 finished with value: 0.058357482657754124 and parameters: {'conf_alpha': 79.39218709654341}. Best is trial 7 with value: 0.1699253146311345.




[I 2026-01-30 10:47:04,740] Trial 11 finished with value: 0.07412505261708756 and parameters: {'conf_alpha': 40.740732076323724}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:47:45,375] Trial 12 finished with value: 0.04894933420580839 and parameters: {'conf_alpha': 127.25782642376019}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:47:45,527] Trial 13 finished with value: 0.13404371417058486 and parameters: {'conf_alpha': 7.7442270538372355}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:47:46,149] Trial 14 finished with value: 0.15147899107645152 and parameters: {'conf_alpha': 4.391428374574824}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:48:27,888] Trial 15 finished with value: 0.15218335710159864 and parameters: {'conf_alpha': 4.276674126411225}. Best is trial 7 with value: 0.1699253146311345.
[I 2026-01-30 10:49:05,257] Trial 17 finished with value: 0.1464170949593133 and parameters: {'conf_alpha': 5.28

Running optimization for ALS_factors=100 with power...


[I 2026-01-30 10:51:05,042] Trial 2 finished with value: 0.1356286845550872 and parameters: {'power_p': 0.960289959637353}. Best is trial 2 with value: 0.1356286845550872.
[I 2026-01-30 10:51:05,509] Trial 1 finished with value: 0.15780987511642197 and parameters: {'power_p': 0.47714255920378423}. Best is trial 1 with value: 0.15780987511642197.
[I 2026-01-30 10:51:05,510] Trial 0 finished with value: 0.10972195173895206 and parameters: {'power_p': 1.1722625832953815}. Best is trial 1 with value: 0.15780987511642197.




[I 2026-01-30 10:51:23,895] Trial 3 finished with value: 0.11934578004492365 and parameters: {'power_p': 1.0985968182309662}. Best is trial 1 with value: 0.15780987511642197.




[I 2026-01-30 10:52:25,740] Trial 5 finished with value: 0.1592873584855438 and parameters: {'power_p': 0.5623046594177248}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:52:26,179] Trial 6 finished with value: 0.09145731698450607 and parameters: {'power_p': 1.3045344079469692}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:52:26,191] Trial 4 finished with value: 0.10798027031275803 and parameters: {'power_p': 1.1842898894116718}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:52:58,929] Trial 7 finished with value: 0.07578791796277239 and parameters: {'power_p': 1.4145332202460854}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:53:45,814] Trial 8 finished with value: 0.06647854608126946 and parameters: {'power_p': 1.4816193279343628}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:53:46,141] Trial 10 finished with value: 0.15439129335618698 and parameters: {'power_p': 0.7369662801732209}. Best



[I 2026-01-30 10:54:22,945] Trial 11 finished with value: 0.14501953300314907 and parameters: {'power_p': 0.284845946817299}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:55:05,392] Trial 12 finished with value: 0.06488104409092349 and parameters: {'power_p': 1.4932093616198119}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:55:05,478] Trial 13 finished with value: 0.12524940413541577 and parameters: {'power_p': 0.13261112015334553}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:55:27,375] Trial 14 finished with value: 0.15359761017564305 and parameters: {'power_p': 0.38711240362760047}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:55:46,632] Trial 15 finished with value: 0.15887325751871959 and parameters: {'power_p': 0.5214898865596391}. Best is trial 5 with value: 0.1592873584855438.
[I 2026-01-30 10:56:25,244] Trial 17 finished with value: 0.15927257942981854 and parameters: {'power_p': 0.5517207151290688

Running optimization for ALS_factors=100 with normalized...


[I 2026-01-30 10:57:23,173] Trial 0 finished with value: 0.06250734770997242 and parameters: {}. Best is trial 0 with value: 0.06250734770997242.
[I 2026-01-30 10:57:55,648] A new study created in memory with name: no-name-b0a8f3c3-ce37-436b-83cf-26c3315e1136


Running optimization for ALS_factors=100 with pmi...


[I 2026-01-30 10:58:16,929] Trial 0 finished with value: 0.16560076612843566 and parameters: {}. Best is trial 0 with value: 0.16560076612843566.
[I 2026-01-30 10:58:49,599] A new study created in memory with name: no-name-0d47374f-3523-4fc3-b2d6-9a90b9fe1fc4


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-01-30 11:03:55,590] Trial 0 finished with value: 0.15142964547107038 and parameters: {'scale_factor': 9.065984039553442}. Best is trial 0 with value: 0.15142964547107038.
[I 2026-01-30 11:03:56,484] Trial 3 finished with value: 0.14899793676767595 and parameters: {'scale_factor': 7.861911944382816}. Best is trial 0 with value: 0.15142964547107038.
[I 2026-01-30 11:03:56,989] Trial 2 finished with value: 0.06752354322839166 and parameters: {'scale_factor': 0.47558205414023025}. Best is trial 0 with value: 0.15142964547107038.
[I 2026-01-30 11:04:06,061] Trial 1 finished with value: 0.15021172594419657 and parameters: {'scale_factor': 8.39261796407882}. Best is trial 0 with value: 0.15142964547107038.
[I 2026-01-30 11:09:05,047] Trial 6 finished with value: 0.15055512619577718 and parameters: {'scale_factor': 8.599142416921612}. Best is trial 0 with value: 0.15142964547107038.
[I 2026-01-30 11:09:05,258] Trial 4 finished with value: 0.13737954973615119 and parameters: {'scale_fac

Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


[I 2026-01-30 11:30:51,368] Trial 2 finished with value: 0.08438849921913427 and parameters: {'lower_q': 26.34900373196878, 'upper_q': 71.60687887281625}. Best is trial 2 with value: 0.08438849921913427.
[I 2026-01-30 11:30:53,952] Trial 0 finished with value: 0.08512784804145294 and parameters: {'lower_q': 14.330779970503267, 'upper_q': 58.90078507184058}. Best is trial 0 with value: 0.08512784804145294.
[I 2026-01-30 11:30:54,171] Trial 1 finished with value: 0.08471607170634295 and parameters: {'lower_q': 26.455419523061657, 'upper_q': 68.22022299128602}. Best is trial 0 with value: 0.08512784804145294.
[I 2026-01-30 11:30:59,529] Trial 3 finished with value: 0.08357802443440866 and parameters: {'lower_q': 24.798791420847685, 'upper_q': 80.28784355338509}. Best is trial 0 with value: 0.08512784804145294.
[I 2026-01-30 11:35:54,024] Trial 4 finished with value: 0.08363375102450993 and parameters: {'lower_q': 20.37644374585193, 'upper_q': 78.55880219961159}. Best is trial 0 with value

Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-01-30 11:53:45,526] Trial 1 finished with value: 0.12078354274577353 and parameters: {'p': 3.628770426847002, 'beta': 0.24771655849619778}. Best is trial 1 with value: 0.12078354274577353.
[I 2026-01-30 11:53:46,327] Trial 0 finished with value: 0.13177678826253036 and parameters: {'p': 4.7582468198341346, 'beta': 0.4416555016462006}. Best is trial 0 with value: 0.13177678826253036.
[I 2026-01-30 11:53:46,513] Trial 3 finished with value: 0.13831898613552995 and parameters: {'p': 0.24324239161417902, 'beta': 0.5293857482443787}. Best is trial 3 with value: 0.13831898613552995.
[I 2026-01-30 11:54:16,912] Trial 2 finished with value: 0.09506790251420066 and parameters: {'p': 3.960819043084531, 'beta': 0.04517744800326684}. Best is trial 3 with value: 0.13831898613552995.
[I 2026-01-30 11:55:05,668] Trial 5 finished with value: 0.12102564932176163 and parameters: {'p': 0.9634456916023173, 'beta': 0.2609920256379977}. Best is trial 3 with value: 0.13831898613552995.
[I 2026-01-30 

Running optimization for ALS_factors=100 with power_lift...


[I 2026-01-30 12:01:04,688] Trial 0 finished with value: 0.12213412208407058 and parameters: {'p': 0.7490343578987698}. Best is trial 0 with value: 0.12213412208407058.
[I 2026-01-30 12:01:05,255] Trial 3 finished with value: 0.07204701760401597 and parameters: {'p': 1.0744590879869769}. Best is trial 0 with value: 0.12213412208407058.
[I 2026-01-30 12:01:05,619] Trial 2 finished with value: 0.15220963690150588 and parameters: {'p': 0.4739872057682206}. Best is trial 2 with value: 0.15220963690150588.




[I 2026-01-30 12:01:37,029] Trial 1 finished with value: 0.049552013984328326 and parameters: {'p': 1.1834452998390856}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:02:25,194] Trial 6 finished with value: 0.09249927921171584 and parameters: {'p': 0.9577574089410725}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:02:25,500] Trial 4 finished with value: 0.06897392903647954 and parameters: {'p': 1.0897309935535406}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:02:25,926] Trial 5 finished with value: 0.06869352898628774 and parameters: {'p': 1.0911271058071499}. Best is trial 2 with value: 0.15220963690150588.




[I 2026-01-30 12:03:00,581] Trial 7 finished with value: 0.1459453359365008 and parameters: {'p': 0.5482787262548102}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:03:45,400] Trial 8 finished with value: 0.12986284434802156 and parameters: {'p': 0.6937382556898682}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:03:46,234] Trial 9 finished with value: 0.07151848607469667 and parameters: {'p': 1.0772871172029963}. Best is trial 2 with value: 0.15220963690150588.




[I 2026-01-30 12:04:09,518] Trial 10 finished with value: 0.14696740462337582 and parameters: {'p': 0.5363860783899831}. Best is trial 2 with value: 0.15220963690150588.
[I 2026-01-30 12:04:24,132] Trial 11 finished with value: 0.15389283625134473 and parameters: {'p': 0.19791840640680897}. Best is trial 11 with value: 0.15389283625134473.
[I 2026-01-30 12:05:06,023] Trial 12 finished with value: 0.14168749223925628 and parameters: {'p': 0.5931048796695528}. Best is trial 11 with value: 0.15389283625134473.
[I 2026-01-30 12:05:06,550] Trial 13 finished with value: 0.1360437326135611 and parameters: {'p': 0.12303263007428455}. Best is trial 11 with value: 0.15389283625134473.
[I 2026-01-30 12:05:29,740] Trial 14 finished with value: 0.15219265334406368 and parameters: {'p': 0.19041461526917858}. Best is trial 11 with value: 0.15389283625134473.




[I 2026-01-30 12:05:41,156] Trial 15 finished with value: 0.13636529567094358 and parameters: {'p': 0.12422404133762173}. Best is trial 11 with value: 0.15389283625134473.




[I 2026-01-30 12:06:26,586] Trial 16 finished with value: 0.14144831396653526 and parameters: {'p': 0.1440733558007744}. Best is trial 11 with value: 0.15389283625134473.
[I 2026-01-30 12:06:27,308] Trial 17 finished with value: 0.16445197183219615 and parameters: {'p': 0.3200297249072065}. Best is trial 17 with value: 0.16445197183219615.
[I 2026-01-30 12:06:30,181] Trial 18 finished with value: 0.00790366358894014 and parameters: {'p': 1.4826345150802687}. Best is trial 17 with value: 0.16445197183219615.
[I 2026-01-30 12:06:31,833] Trial 19 finished with value: 0.1621389483598752 and parameters: {'p': 0.3561350679055588}. Best is trial 17 with value: 0.16445197183219615.


Unnamed: 0,Algorithm,Strategy,Best Val NDCG@20,Test NDCG@20,Test Precision@20,Final Train Time (s),Best Params
13,ALS_factors=100,bm25,0.1719,0.19316,0.25224,11.137091,"{'bm25_k1': 3.6915452916512734, 'bm25_b': 0.80..."
16,ALS_factors=100,confidence,0.169925,0.189744,0.25075,11.147264,{'conf_alpha': 1.449770352422187}
19,ALS_factors=100,pmi,0.165601,0.185581,0.240786,11.131036,{}
23,ALS_factors=100,power_lift,0.164452,0.183018,0.238844,11.150032,{'p': 0.3200297249072065}
14,ALS_factors=100,tfidf,0.161872,0.179705,0.24295,11.146544,{}
17,ALS_factors=100,power,0.159287,0.178433,0.235119,11.150032,{'power_p': 0.5623046594177248}
22,ALS_factors=100,sigmoid_propensity,0.153111,0.174393,0.227104,11.128924,"{'p': 0.2279499441848788, 'beta': 0.9773243639..."
20,ALS_factors=100,robust_user_centric,0.15284,0.173146,0.225228,10.704933,{'scale_factor': 9.971762914520497}
15,ALS_factors=100,log,0.147436,0.167481,0.217905,11.14096,{}
12,ALS_factors=100,no_weighting,0.131322,0.142881,0.204102,11.130598,{}


In [None]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results