In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, log_idf_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k

import cornac


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [4]:
steam_df = (
    pd.read_csv(
        "/home/coder/projects/rec-sys-research/data/steam/steam_recommendations.csv",
        usecols=['user_id', 'app_id', 'hours'],
    )
    .loc[:, ['user_id', 'app_id', 'hours']]
    .drop_duplicates()
    .dropna()
    .rename(columns={'app_id': 'item_id', 'hours': 'target'})
)
steam_df['user_id'].nunique(), steam_df['item_id'].nunique(), steam_df.shape[0]

(13781059, 37610, 41154773)

In [5]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    steam_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (13781059, 37610), Val Shape: (13781059, 37610), Test Shape: (13781059, 37610)


In [None]:
results_folder = "results/steam_als"
results_filename = "steam_als_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, regularization=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, regularization=10, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "log_idf",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "log_idf":
            weighted = log_idf_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "log_idf":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)
        
        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "log_idf", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [7]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:39:29,376] A new study created in memory with name: no-name-9192945e-1a52-4b46-a873-2c9b0fadc627


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-01-30 15:39:51,926] Trial 0 finished with value: 0.03824927046174752 and parameters: {}. Best is trial 0 with value: 0.03824927046174752.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,no_weighting,1,0.038249,0.030247,0.040574,0.056035,0.093654,14.714479,{}


In [9]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:40:43,978] A new study created in memory with name: no-name-39af1d71-0195-4593-a483-f426cd09ca55


Running optimization for ALS_factors=100 with no_weighting...


[I 2026-01-30 15:41:43,351] Trial 0 finished with value: 0.04569430417941584 and parameters: {}. Best is trial 0 with value: 0.04569430417941584.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,no_weighting,1,0.045694,0.03699,0.046245,0.065501,0.10069,52.191789,{}


In [10]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:43:26,592] A new study created in memory with name: no-name-6669479c-9792-42ba-bfe9-a4713081bdfa


Running optimization for ALS_factors=10 with bm25...


[I 2026-01-30 15:44:52,802] Trial 0 finished with value: 0.041667996073901106 and parameters: {'bm25_k1': 496.0887822875346, 'bm25_b': 0.8611074987417343}. Best is trial 0 with value: 0.041667996073901106.
[I 2026-01-30 15:44:52,880] Trial 2 finished with value: 0.03593618570460748 and parameters: {'bm25_k1': 776.1985693542257, 'bm25_b': 0.12324435392286648}. Best is trial 0 with value: 0.041667996073901106.
[I 2026-01-30 15:44:52,906] Trial 3 finished with value: 0.04159174268959521 and parameters: {'bm25_k1': 581.4437568935733, 'bm25_b': 0.8274505606126228}. Best is trial 0 with value: 0.041667996073901106.
[I 2026-01-30 15:44:52,922] Trial 1 finished with value: 0.042359809396355134 and parameters: {'bm25_k1': 179.37519632146103, 'bm25_b': 0.4967744784684035}. Best is trial 1 with value: 0.042359809396355134.
[I 2026-01-30 15:46:18,516] Trial 4 finished with value: 0.03948946073692226 and parameters: {'bm25_k1': 623.2600267280279, 'bm25_b': 0.3566237615915543}. Best is trial 1 with 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,bm25,20,0.052554,0.043694,0.056092,0.07879,0.124467,14.996163,"{'bm25_k1': 4.207673145032096, 'bm25_b': 0.990..."


In [14]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 21:16:37,432] A new study created in memory with name: no-name-7cbda211-732f-4ffe-bdbb-90a83adb7c92


Running optimization for ALS_factors=100 with bm25...


[I 2026-01-30 21:17:36,196] Trial 0 finished with value: 0.0327591199298915 and parameters: {'bm25_k1': 477.9571640077998, 'bm25_b': 0.18198696882652543}. Best is trial 0 with value: 0.0327591199298915.
[I 2026-01-30 21:18:35,264] Trial 1 finished with value: 0.03942975655628422 and parameters: {'bm25_k1': 567.0176501347212, 'bm25_b': 0.26623926019029454}. Best is trial 1 with value: 0.03942975655628422.
[I 2026-01-30 21:19:34,189] Trial 2 finished with value: 0.038322482293737775 and parameters: {'bm25_k1': 536.6005444147751, 'bm25_b': 0.75595350858687}. Best is trial 1 with value: 0.03942975655628422.
[I 2026-01-30 21:20:33,413] Trial 3 finished with value: 0.045099505196489885 and parameters: {'bm25_k1': 316.3234843884074, 'bm25_b': 0.42739603037885443}. Best is trial 3 with value: 0.045099505196489885.




[I 2026-01-30 21:21:32,201] Trial 4 finished with value: 0.03961619130269501 and parameters: {'bm25_k1': 612.7450836616656, 'bm25_b': 0.7615843602069128}. Best is trial 3 with value: 0.045099505196489885.
[I 2026-01-30 21:22:31,174] Trial 5 finished with value: 0.029386076019762453 and parameters: {'bm25_k1': 433.1723895419889, 'bm25_b': 0.5521661254618896}. Best is trial 3 with value: 0.045099505196489885.
[I 2026-01-30 21:23:30,099] Trial 6 finished with value: 0.030573405538264656 and parameters: {'bm25_k1': 676.0849204939974, 'bm25_b': 0.16573605863253993}. Best is trial 3 with value: 0.045099505196489885.
[I 2026-01-30 21:24:29,072] Trial 7 finished with value: 0.03363582000173788 and parameters: {'bm25_k1': 740.2608930112854, 'bm25_b': 0.26168582890531067}. Best is trial 3 with value: 0.045099505196489885.
[I 2026-01-30 21:25:27,744] Trial 8 finished with value: 0.03547211659903023 and parameters: {'bm25_k1': 310.38121213278066, 'bm25_b': 0.42784055942860133}. Best is trial 3 wit



[I 2026-01-30 21:31:20,016] Trial 14 finished with value: 0.04361176293907011 and parameters: {'bm25_k1': 988.9956532161812, 'bm25_b': 0.9531234442922418}. Best is trial 12 with value: 0.06076606499238145.
[I 2026-01-30 21:32:18,686] Trial 15 finished with value: 0.03841868132754067 and parameters: {'bm25_k1': 187.26924137455597, 'bm25_b': 0.9963828016352553}. Best is trial 12 with value: 0.06076606499238145.
[I 2026-01-30 21:33:17,596] Trial 16 finished with value: 0.04429568203576435 and parameters: {'bm25_k1': 158.38206393097866, 'bm25_b': 0.8842940923610091}. Best is trial 12 with value: 0.06076606499238145.
[I 2026-01-30 21:34:16,287] Trial 17 finished with value: 0.055206594586966126 and parameters: {'bm25_k1': 21.217090299597036, 'bm25_b': 0.8409407371182303}. Best is trial 12 with value: 0.06076606499238145.




[I 2026-01-30 21:35:15,033] Trial 18 finished with value: 0.0401675617763036 and parameters: {'bm25_k1': 220.43419241967703, 'bm25_b': 0.8011068871958604}. Best is trial 12 with value: 0.06076606499238145.
[I 2026-01-30 21:36:13,770] Trial 19 finished with value: 0.040799266430780086 and parameters: {'bm25_k1': 119.48846670939363, 'bm25_b': 0.6370385681710332}. Best is trial 12 with value: 0.06076606499238145.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,bm25,20,0.060766,0.054619,0.065655,0.093985,0.136512,49.606564,"{'bm25_k1': 3.464734272938415, 'bm25_b': 0.973..."


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:55:22,157] A new study created in memory with name: no-name-dcbd23b8-4538-4e65-ab84-915b4f5d200c


Running optimization for ALS_factors=10 with tfidf...


[I 2026-01-30 15:55:45,821] Trial 0 finished with value: 0.04796588820864551 and parameters: {}. Best is trial 0 with value: 0.04796588820864551.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,tfidf,1,0.047966,0.039133,0.050901,0.071277,0.114584,14.837221,{}


In [13]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:56:54,790] A new study created in memory with name: no-name-f0d47129-f80a-4df6-90f8-131ec553ce09


Running optimization for ALS_factors=100 with tfidf...


[I 2026-01-30 15:57:55,770] Trial 0 finished with value: 0.054623041268643 and parameters: {}. Best is trial 0 with value: 0.054623041268643.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,tfidf,1,0.054623,0.050008,0.06076,0.086595,0.128038,52.58611,{}


In [14]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 15:59:39,525] A new study created in memory with name: no-name-6f916aca-d9f0-46cc-939d-ad07c81bdfdd


Running optimization for ALS_factors=10 with log...


[I 2026-01-30 16:00:03,123] Trial 0 finished with value: 0.045315447507524156 and parameters: {}. Best is trial 0 with value: 0.045315447507524156.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,log,1,0.045315,0.037112,0.048103,0.065989,0.105739,15.070171,{}


In [15]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:00:56,500] A new study created in memory with name: no-name-045bbfdb-6b6e-4eab-b814-d3705f7abbf9


Running optimization for ALS_factors=100 with log...


[I 2026-01-30 16:01:58,169] Trial 0 finished with value: 0.04906792757327218 and parameters: {}. Best is trial 0 with value: 0.04906792757327218.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log,1,0.049068,0.032056,0.04019,0.055392,0.086068,53.062292,{}


In [16]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log_idf", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:03:42,284] A new study created in memory with name: no-name-2cb791f2-62ca-4289-a6ca-53af2b44160a


Running optimization for ALS_factors=10 with log_idf...


[I 2026-01-30 16:05:07,577] Trial 0 finished with value: 0.023363435766378015 and parameters: {'conf_alpha': 95.27044339546991}. Best is trial 0 with value: 0.023363435766378015.
[I 2026-01-30 16:05:07,735] Trial 1 finished with value: 0.022855189065980996 and parameters: {'conf_alpha': 102.55136155499844}. Best is trial 0 with value: 0.023363435766378015.
[I 2026-01-30 16:05:07,757] Trial 3 finished with value: 0.022918585068671907 and parameters: {'conf_alpha': 101.70064672395974}. Best is trial 0 with value: 0.023363435766378015.
[I 2026-01-30 16:05:07,794] Trial 2 finished with value: 0.027551004638078323 and parameters: {'conf_alpha': 54.91068308786028}. Best is trial 2 with value: 0.027551004638078323.
[I 2026-01-30 16:06:32,057] Trial 4 finished with value: 0.021824907324303994 and parameters: {'conf_alpha': 120.08890011270547}. Best is trial 2 with value: 0.027551004638078323.
[I 2026-01-30 16:06:32,085] Trial 6 finished with value: 0.03353680666146343 and parameters: {'conf_al

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,log_idf,20,0.050574,0.041375,0.053683,0.075378,0.120844,14.992001,{'conf_alpha': 2.7052621584068817}


In [15]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log_idf", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 21:37:52,379] A new study created in memory with name: no-name-e48d3814-c211-4b88-aae7-12a5f7cd8449


Running optimization for ALS_factors=100 with log_idf...


[I 2026-01-30 21:38:50,638] Trial 0 finished with value: 0.016766720583370757 and parameters: {'conf_alpha': 99.35065085484638}. Best is trial 0 with value: 0.016766720583370757.
[I 2026-01-30 21:39:48,910] Trial 1 finished with value: 0.01900090933243443 and parameters: {'conf_alpha': 73.48284136026886}. Best is trial 1 with value: 0.01900090933243443.
[I 2026-01-30 21:40:47,159] Trial 2 finished with value: 0.018496323200194172 and parameters: {'conf_alpha': 92.96930500929183}. Best is trial 1 with value: 0.01900090933243443.




[I 2026-01-30 21:41:45,380] Trial 3 finished with value: 0.016443948597773726 and parameters: {'conf_alpha': 111.78669665395817}. Best is trial 1 with value: 0.01900090933243443.
[I 2026-01-30 21:42:43,579] Trial 4 finished with value: 0.018524874804148297 and parameters: {'conf_alpha': 101.90033796511676}. Best is trial 1 with value: 0.01900090933243443.




[I 2026-01-30 21:43:41,772] Trial 5 finished with value: 0.0157230886968202 and parameters: {'conf_alpha': 135.05147689903697}. Best is trial 1 with value: 0.01900090933243443.




[I 2026-01-30 21:44:39,961] Trial 6 finished with value: 0.020367110251144183 and parameters: {'conf_alpha': 51.63449752110568}. Best is trial 6 with value: 0.020367110251144183.
[I 2026-01-30 21:45:38,171] Trial 7 finished with value: 0.0212714451214372 and parameters: {'conf_alpha': 65.75375205918432}. Best is trial 7 with value: 0.0212714451214372.
[I 2026-01-30 21:46:36,405] Trial 8 finished with value: 0.022232125582345216 and parameters: {'conf_alpha': 19.87771347920511}. Best is trial 8 with value: 0.022232125582345216.




[I 2026-01-30 21:47:34,636] Trial 9 finished with value: 0.016850826498160266 and parameters: {'conf_alpha': 124.4644735516791}. Best is trial 8 with value: 0.022232125582345216.
[I 2026-01-30 21:48:32,838] Trial 10 finished with value: 0.05919544242066093 and parameters: {'conf_alpha': 1.4332196930253005}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:49:31,099] Trial 11 finished with value: 0.04791436597588239 and parameters: {'conf_alpha': 3.162695707574189}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:50:29,321] Trial 12 finished with value: 0.04197264533739763 and parameters: {'conf_alpha': 4.961306458612306}. Best is trial 10 with value: 0.05919544242066093.




[I 2026-01-30 21:51:27,624] Trial 13 finished with value: 0.02637229852981141 and parameters: {'conf_alpha': 28.131238314086744}. Best is trial 10 with value: 0.05919544242066093.




[I 2026-01-30 21:52:25,913] Trial 14 finished with value: 0.021665361025225285 and parameters: {'conf_alpha': 39.473439515081196}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:53:24,135] Trial 15 finished with value: 0.057445479228438914 and parameters: {'conf_alpha': 1.0450116201501345}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:54:22,385] Trial 16 finished with value: 0.03391424814313465 and parameters: {'conf_alpha': 20.932770932874934}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:55:20,641] Trial 17 finished with value: 0.019889644822474777 and parameters: {'conf_alpha': 46.37743836932448}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:56:18,805] Trial 18 finished with value: 0.045570813688514795 and parameters: {'conf_alpha': 2.8117340738288923}. Best is trial 10 with value: 0.05919544242066093.
[I 2026-01-30 21:57:16,997] Trial 19 finished with value: 0.01926314312603989 and parameters: {'c

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log_idf,20,0.059195,0.050841,0.061604,0.087671,0.129272,49.558887,{'conf_alpha': 1.4332196930253005}


In [8]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:14:47,061] A new study created in memory with name: no-name-7e25088b-6809-406d-b9b3-44087f723a52


Running optimization for ALS_factors=10 with power...


[I 2026-01-30 16:16:10,416] Trial 1 finished with value: 0.04442033076963947 and parameters: {'power_p': 0.7318014462482192}. Best is trial 1 with value: 0.04442033076963947.
[I 2026-01-30 16:16:10,477] Trial 3 finished with value: 0.04349645105292983 and parameters: {'power_p': 0.1868452846876548}. Best is trial 1 with value: 0.04442033076963947.
[I 2026-01-30 16:16:10,515] Trial 0 finished with value: 0.037662666073793885 and parameters: {'power_p': 1.0177939413539965}. Best is trial 1 with value: 0.04442033076963947.
[I 2026-01-30 16:16:10,581] Trial 2 finished with value: 0.04518755172504818 and parameters: {'power_p': 0.6026050749276384}. Best is trial 2 with value: 0.04518755172504818.
[I 2026-01-30 16:17:33,812] Trial 5 finished with value: 0.04353197917107197 and parameters: {'power_p': 0.19083780674618975}. Best is trial 2 with value: 0.04518755172504818.
[I 2026-01-30 16:17:33,815] Trial 4 finished with value: 0.04440154359264878 and parameters: {'power_p': 0.3061809586168473

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,power,20,0.045252,0.037303,0.048092,0.065969,0.104961,14.987071,{'power_p': 0.5524939769068045}


In [16]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 21:58:54,649] A new study created in memory with name: no-name-b7e25bd0-401f-4579-b19c-ba42155bbff9


Running optimization for ALS_factors=100 with power...


[I 2026-01-30 21:59:52,587] Trial 0 finished with value: 0.04468890958144243 and parameters: {'power_p': 0.9409933106816465}. Best is trial 0 with value: 0.04468890958144243.
[I 2026-01-30 22:00:50,600] Trial 1 finished with value: 0.043489281837584735 and parameters: {'power_p': 1.0563110238961442}. Best is trial 0 with value: 0.04468890958144243.
[I 2026-01-30 22:01:48,536] Trial 2 finished with value: 0.04815851319864208 and parameters: {'power_p': 0.872547395830662}. Best is trial 2 with value: 0.04815851319864208.
[I 2026-01-30 22:02:46,411] Trial 3 finished with value: 0.049156812594798864 and parameters: {'power_p': 0.924359758731717}. Best is trial 3 with value: 0.049156812594798864.
[I 2026-01-30 22:03:44,291] Trial 4 finished with value: 0.052567242496524225 and parameters: {'power_p': 0.5725744009704944}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:04:42,136] Trial 5 finished with value: 0.046628012494856534 and parameters: {'power_p': 0.27911911087055



[I 2026-01-30 22:06:38,036] Trial 7 finished with value: 0.029435264261666306 and parameters: {'power_p': 1.4293775013976697}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:07:35,878] Trial 8 finished with value: 0.05236744203318813 and parameters: {'power_p': 0.6046019664217197}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:08:33,739] Trial 9 finished with value: 0.04106964890919388 and parameters: {'power_p': 1.0453155825750875}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:09:31,612] Trial 10 finished with value: 0.05214023497975154 and parameters: {'power_p': 0.5838924250753191}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:10:29,545] Trial 11 finished with value: 0.05121208303065599 and parameters: {'power_p': 0.5493180976828171}. Best is trial 4 with value: 0.052567242496524225.
[I 2026-01-30 22:11:27,464] Trial 12 finished with value: 0.03910884682027885 and parameters: {'power_p': 0.102634375

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power,20,0.053205,0.046338,0.056201,0.080263,0.117913,49.611519,{'power_p': 0.7138199050943289}


In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

In [11]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:26:36,670] A new study created in memory with name: no-name-63dbe312-e6d8-49eb-8263-2c6dd0c63d1e


Running optimization for ALS_factors=10 with pmi...


  pmi = log((X.data * N) / denominator) # we could use np.power(X.data, p) instead of log for a softer effect
[I 2026-01-30 16:27:01,290] Trial 0 finished with value: 0.04769436467070988 and parameters: {}. Best is trial 0 with value: 0.04769436467070988.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,pmi,1,0.047694,0.039062,0.050912,0.071159,0.114757,14.978303,{}


In [18]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 22:22:26,451] A new study created in memory with name: no-name-eb87e8f4-8608-4d4b-aef8-b8cb04323497


Running optimization for ALS_factors=100 with pmi...


  pmi = log((X.data * N) / denominator) # we could use np.power(X.data, p) instead of log for a softer effect
[I 2026-01-30 22:23:25,515] Trial 0 finished with value: 0.05844964324480874 and parameters: {}. Best is trial 0 with value: 0.05844964324480874.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,pmi,1,0.05845,0.052168,0.062659,0.089908,0.130247,49.521188,{}


In [12]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:27:55,757] A new study created in memory with name: no-name-bba63f14-62ca-467b-824f-07e657d4ee4b


Running optimization for ALS_factors=10 with robust_user_centric...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-30 16:29:28,333] Trial 1 finished with value: 0.04377872886579006 and parameters: {'scale_factor': 3.232190798581721}. Best is trial 1 with value: 0.04377872886579006.
[I 2026-01-30 16:29:28,420] Trial 0 finished with value: 0.03284957446168678 and parameters: {'scale_factor': 1.286277092846803}. Best is trial 1 with value: 0.04377872886579006.
[I 2026-01-30 16:29:28,466] Trial 2 finished with value: 0.0310802257942641 and parameters: {'scale_factor': 1.0204997309814483}. Best is trial 1 with value: 0.04377872886579006.
[I 2026-01-30 16:29:28,590] Trial 3 finished with value: 0.039298627980381036 and parameters: {'scale_factor': 1.8894673789759455}. Best is trial 1 with value: 0.04377872886579006.
[I 2026-01-30 16:31:00,894] Trial 4 finished with value: 0.03282415243492771 and parameters: {'scale_factor': 1.2213033181763802}. Best is trial 1 with value: 0.04377872886579006.
[I 2026-01-30 16:31:00,973] Trial 5 finished with value: 0.046

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric,20,0.047819,0.039118,0.050901,0.070622,0.113674,14.950878,{'scale_factor': 9.879081226043372}


In [19]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 22:25:03,957] A new study created in memory with name: no-name-9285cb2d-f9ef-4460-995c-4bda86ec0186


Running optimization for ALS_factors=100 with robust_user_centric...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-30 22:26:07,901] Trial 0 finished with value: 0.05257084127908052 and parameters: {'scale_factor': 9.14084552337091}. Best is trial 0 with value: 0.05257084127908052.
[I 2026-01-30 22:27:11,877] Trial 1 finished with value: 0.04445954292187609 and parameters: {'scale_factor': 4.096684427636314}. Best is trial 0 with value: 0.05257084127908052.
[I 2026-01-30 22:28:15,722] Trial 2 finished with value: 0.03752616591850959 and parameters: {'scale_factor': 2.2460404400693648}. Best is trial 0 with value: 0.05257084127908052.
[I 2026-01-30 22:29:19,528] Trial 3 finished with value: 0.05296912550901698 and parameters: {'scale_factor': 9.458622911916773}. Best is trial 3 with value: 0.05296912550901698.
[I 2026-01-30 22:30:24,245] Trial 4 finished with value: 0.050601679887730976 and parameters: {'scale_factor': 7.1921305042484205}. Best is trial 3 with value: 0.05296912550901698.
[I 2026-01-30 22:31:28,429] Trial 5 finished with value: 0.0348

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric,20,0.053304,0.046057,0.055862,0.079321,0.116859,49.531484,{'scale_factor': 9.976651717233675}


In [13]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

[I 2026-01-30 16:36:38,686] A new study created in memory with name: no-name-bf2e5b4b-8333-45f2-b8af-578d5c547780


Running optimization for ALS_factors=10 with robust_user_centric_weight_v2...


  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-30 16:38:11,087] Trial 2 finished with value: 0.030410633896958618 and parameters: {'lower_q': 35.20098531382598, 'upper_q': 92.67833744045595}. Best is trial 2 with value: 0.030410633896958618.
[I 2026-01-30 16:38:11,169] Trial 0 finished with value: 0.03090667119894906 and parameters: {'lower_q': 12.126485682155028, 'upper_q': 74.97336299936772}. Best is trial 0 with value: 0.03090667119894906.
[I 2026-01-30 16:38:11,185] Trial 3 finished with value: 0.03030168970247301 and parameters: {'lower_q': 6.6848124770025485, 'upper_q': 91.36121229778456}. Best is trial 0 with value: 0.03090667119894906.
[I 2026-01-30 16:38:11,211] Trial 1 finished with value: 0.03094962748121891 and parameters: {'lower_q': 9.812413134433262, 'upper_q': 70.94401974514017}. Best is trial 1 with value: 0.03094962748121891.
[I 2026-01-30 16:39:43,126] Trial 4 finished with value: 0.030783707408189028 and parameters: {'lower_q': 13.736257655028332, 'upper_q': 80.

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,robust_user_centric_weight_v2,20,0.031903,0.024617,0.03352,0.046155,0.07796,14.940431,"{'lower_q': 44.796934534412074, 'upper_q': 55...."


In [20]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-01-30 22:48:07,175] A new study created in memory with name: no-name-97eead40-3592-4788-b4f9-06e4c688162e


Running optimization for ALS_factors=100 with robust_user_centric_weight_v2...


[I 2026-01-30 22:49:10,901] Trial 0 finished with value: 0.03453882003375528 and parameters: {'lower_q': 13.750371790181077, 'upper_q': 68.32057662723072}. Best is trial 0 with value: 0.03453882003375528.
  weights = 1 / (1 + np.exp(-z_scores))
[I 2026-01-30 22:50:14,594] Trial 1 finished with value: 0.035035381720773924 and parameters: {'lower_q': 23.418432411383144, 'upper_q': 67.56940102229336}. Best is trial 1 with value: 0.035035381720773924.
[I 2026-01-30 22:51:18,295] Trial 2 finished with value: 0.03514546820418699 and parameters: {'lower_q': 20.403398988794798, 'upper_q': 84.9355830289397}. Best is trial 2 with value: 0.03514546820418699.
[I 2026-01-30 22:52:21,971] Trial 3 finished with value: 0.0345522671616429 and parameters: {'lower_q': 16.714388726586083, 'upper_q': 67.71981018384545}. Best is trial 2 with value: 0.03514546820418699.
[I 2026-01-30 22:53:25,606] Trial 4 finished with value: 0.03559565668391803 and parameters: {'lower_q': 22.37996803461394, 'upper_q': 72.77

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric_weight_v2,20,0.035596,0.029174,0.037284,0.051717,0.08176,49.476779,"{'lower_q': 22.37996803461394, 'upper_q': 72.7..."


In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=20, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

In [22]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power_lift,20,0.066747,0.05916,0.071397,0.102919,0.1499,49.503478,{'p': 0.37512124004563385}
0,ALS_factors=100,bm25,20,0.060766,0.054619,0.065655,0.093985,0.136512,49.606564,"{'bm25_k1': 3.464734272938415, 'bm25_b': 0.973..."
0,ALS_factors=100,pmi,1,0.05845,0.052168,0.062659,0.089908,0.130247,49.521188,{}
0,ALS_factors=100,log_idf,20,0.059195,0.050841,0.061604,0.087671,0.129272,49.558887,{'conf_alpha': 1.4332196930253005}
0,ALS_factors=100,tfidf,1,0.054623,0.050008,0.06076,0.086595,0.128038,52.58611,{}
0,ALS_factors=100,power,20,0.053205,0.046338,0.056201,0.080263,0.117913,49.611519,{'power_p': 0.7138199050943289}
0,ALS_factors=10,bm25,20,0.052554,0.043694,0.056092,0.07879,0.124467,14.996163,"{'bm25_k1': 4.207673145032096, 'bm25_b': 0.990..."
0,ALS_factors=100,robust_user_centric,20,0.053304,0.046057,0.055862,0.079321,0.116859,49.531484,{'scale_factor': 9.976651717233675}
0,ALS_factors=10,log_idf,20,0.050574,0.041375,0.053683,0.075378,0.120844,14.992001,{'conf_alpha': 2.7052621584068817}
0,ALS_factors=10,pmi,1,0.047694,0.039062,0.050912,0.071159,0.114757,14.978303,{}
