In [1]:
import numpy as np
import pandas as pd
import optuna
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, log_idf_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k


In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
taste_profile_df = (
    pd.read_table(
        "/home/coder/projects/rec-sys-research/data/The Echo Nest Taste Profile Subset.txt",
        sep="\t",
        header=None,
        usecols=[0, 1, 2],
        names=['user_id', 'item_id', 'target'],
    )
)
taste_profile_df['user_id'].nunique(), taste_profile_df['item_id'].nunique(), taste_profile_df.shape[0]

(1019318, 384546, 48373586)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    taste_profile_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (1019318, 384546), Val Shape: (1019318, 384546), Test Shape: (1019318, 384546)


In [5]:
del taste_profile_df
import gc
gc.collect()

10

In [6]:
results_folder = "results/taste_profile_als"
results_filename = "taste_profile_als_results.csv"

import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    algorithms = {
        "ALS_factors=10": lambda: AlternatingLeastSquares(factors=10, regularization=10, random_state=42),
        "ALS_factors=100": lambda: AlternatingLeastSquares(factors=100, regularization=10, random_state=42),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "log_idf",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "log_idf":
            weighted = log_idf_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "log_idf":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        weighted_train = get_weighted_matrix(train_mat, params)

        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "log_idf", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [14]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

[I 2026-02-22 21:54:14,799] A new study created in memory with name: no-name-3dee4026-4e1c-4c7e-bc97-e620989b296e


Running optimization for ALS_factors=10 with no_weighting...


[I 2026-02-22 21:54:44,551] Trial 0 finished with value: 0.04405841353270863 and parameters: {}. Best is trial 0 with value: 0.04405841353270863.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,no_weighting,1,0.044058,0.04127,0.047673,0.044539,0.056604,11.273902,{}


In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

[I 2026-02-22 21:59:25,543] A new study created in memory with name: no-name-1681754a-8bff-45e7-a327-66a3935d5002


Running optimization for ALS_factors=10 with bm25...


[I 2026-02-22 22:01:20,903] Trial 2 finished with value: 0.043047298361158275 and parameters: {'bm25_k1': 274.38497691221886, 'bm25_b': 0.1561926806341577}. Best is trial 2 with value: 0.043047298361158275.
[I 2026-02-22 22:01:21,181] Trial 0 finished with value: 0.043627022754123834 and parameters: {'bm25_k1': 213.40520014859055, 'bm25_b': 0.48703405124523436}. Best is trial 0 with value: 0.043627022754123834.
[I 2026-02-22 22:01:21,384] Trial 1 finished with value: 0.04335742629597086 and parameters: {'bm25_k1': 750.8604118211713, 'bm25_b': 0.5515889417651066}. Best is trial 0 with value: 0.043627022754123834.
[I 2026-02-22 22:01:21,550] Trial 3 finished with value: 0.04376599221201923 and parameters: {'bm25_k1': 481.7983914459974, 'bm25_b': 0.7448578378217223}. Best is trial 3 with value: 0.04376599221201923.
[I 2026-02-22 22:03:15,477] Trial 6 finished with value: 0.04339670664581396 and parameters: {'bm25_k1': 207.8343821382418, 'bm25_b': 0.2690348430776016}. Best is trial 3 with 

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=10,bm25,10,0.043901,0.039983,0.04713,0.043252,0.056571,10.336504,"{'bm25_k1': 744.2431968068297, 'bm25_b': 0.822..."


In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)


In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log_idf", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=10", n_trials=10, output_dir=results_folder)

In [18]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 23:26:00,922] A new study created in memory with name: no-name-a692f179-c126-49d7-a466-e97861de59d3


Running optimization for ALS_factors=100 with no_weighting...


[I 2026-02-22 23:27:01,206] Trial 0 finished with value: 0.07916333083349544 and parameters: {}. Best is trial 0 with value: 0.07916333083349544.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,no_weighting,1,0.079163,0.077948,0.088249,0.086483,0.107405,29.40326,{}


In [9]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 22:17:00,586] A new study created in memory with name: no-name-f9ebb57e-0b5b-41c9-a9e2-ad651578b13e


Running optimization for ALS_factors=100 with bm25...


[I 2026-02-22 22:21:00,559] Trial 0 finished with value: 0.08865445342654185 and parameters: {'bm25_k1': 889.5738194879151, 'bm25_b': 0.030534956051233353}. Best is trial 0 with value: 0.08865445342654185.
[I 2026-02-22 22:21:01,354] Trial 2 finished with value: 0.08841556365974577 and parameters: {'bm25_k1': 986.5069434565154, 'bm25_b': 0.4343522334057117}. Best is trial 0 with value: 0.08865445342654185.
[I 2026-02-22 22:21:02,141] Trial 1 finished with value: 0.08844835994650561 and parameters: {'bm25_k1': 936.7778397863993, 'bm25_b': 0.5238094258481095}. Best is trial 0 with value: 0.08865445342654185.
[I 2026-02-22 22:21:04,150] Trial 3 finished with value: 0.08930050414685285 and parameters: {'bm25_k1': 147.00977890121922, 'bm25_b': 0.09065573016491513}. Best is trial 3 with value: 0.08930050414685285.
[I 2026-02-22 22:24:58,994] Trial 5 finished with value: 0.08935035764815939 and parameters: {'bm25_k1': 398.48066323512023, 'bm25_b': 0.9003420630197941}. Best is trial 5 with val

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,bm25,10,0.089699,0.084338,0.097639,0.093081,0.118595,29.470862,"{'bm25_k1': 315.9103178875338, 'bm25_b': 0.983..."


In [13]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)


[I 2026-02-22 22:54:52,972] A new study created in memory with name: no-name-3a0e7bba-0446-46d9-b9fb-fd729761c6e7


Running optimization for ALS_factors=100 with tfidf...


[I 2026-02-22 22:55:54,785] Trial 0 finished with value: 0.09625468081891206 and parameters: {}. Best is trial 0 with value: 0.09625468081891206.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,tfidf,1,0.096255,0.092376,0.105904,0.102961,0.130058,29.615792,{}


In [14]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 22:57:34,175] A new study created in memory with name: no-name-2efca9bc-e330-4fd9-84c6-e0d0f2f0bb41


Running optimization for ALS_factors=100 with log...


[I 2026-02-22 22:58:35,539] Trial 0 finished with value: 0.060883399378239444 and parameters: {}. Best is trial 0 with value: 0.060883399378239444.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log,1,0.060883,0.063452,0.071144,0.074246,0.091436,29.457849,{}


In [15]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log_idf", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 23:00:14,333] A new study created in memory with name: no-name-9d560bcc-1f37-432e-9bcf-b719dcfe3437


Running optimization for ALS_factors=100 with log_idf...


[I 2026-02-22 23:04:09,532] Trial 1 finished with value: 0.08962804166825979 and parameters: {'conf_alpha': 16.728162451462246}. Best is trial 1 with value: 0.08962804166825979.
[I 2026-02-22 23:04:10,227] Trial 3 finished with value: 0.07105008796062286 and parameters: {'conf_alpha': 87.70352046976754}. Best is trial 1 with value: 0.08962804166825979.
[I 2026-02-22 23:04:12,321] Trial 0 finished with value: 0.0791166874964234 and parameters: {'conf_alpha': 44.37226545133652}. Best is trial 1 with value: 0.08962804166825979.
[I 2026-02-22 23:05:32,879] Trial 2 finished with value: 0.09244324176423778 and parameters: {'conf_alpha': 11.668076661444786}. Best is trial 2 with value: 0.09244324176423778.
[I 2026-02-22 23:08:05,945] Trial 4 finished with value: 0.08538803088255947 and parameters: {'conf_alpha': 25.721015278045208}. Best is trial 2 with value: 0.09244324176423778.
[I 2026-02-22 23:08:08,116] Trial 6 finished with value: 0.07113608202689786 and parameters: {'conf_alpha': 87.09

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,log_idf,10,0.097297,0.091035,0.10613,0.099694,0.128535,29.454495,{'conf_alpha': 3.665509628784722}


In [16]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 23:11:46,860] A new study created in memory with name: no-name-22903ea9-c610-48da-a4df-42b6470a3d70


Running optimization for ALS_factors=100 with power...


[I 2026-02-22 23:15:42,479] Trial 1 finished with value: 0.07656794860841311 and parameters: {'power_p': 1.4132691977969003}. Best is trial 1 with value: 0.07656794860841311.
[I 2026-02-22 23:15:43,718] Trial 2 finished with value: 0.07755912393681152 and parameters: {'power_p': 0.8310705788492939}. Best is trial 2 with value: 0.07755912393681152.
[I 2026-02-22 23:15:43,935] Trial 0 finished with value: 0.07058555622758642 and parameters: {'power_p': 0.6011416294921398}. Best is trial 2 with value: 0.07755912393681152.
[I 2026-02-22 23:17:11,667] Trial 3 finished with value: 0.07383910513119363 and parameters: {'power_p': 0.707080118674362}. Best is trial 2 with value: 0.07755912393681152.
[I 2026-02-22 23:19:37,223] Trial 4 finished with value: 0.07544936194280656 and parameters: {'power_p': 0.753657583579313}. Best is trial 2 with value: 0.07755912393681152.
[I 2026-02-22 23:19:38,042] Trial 5 finished with value: 0.07585820852795618 and parameters: {'power_p': 1.4914814679715507}. B

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power,10,0.078909,0.076903,0.087408,0.084819,0.105841,29.395649,{'power_p': 1.1242434273794637}


In [17]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 23:23:20,089] A new study created in memory with name: no-name-b08762d3-db95-4988-8c2f-4a10f4e0fa10


Running optimization for ALS_factors=100 with normalized...


[I 2026-02-22 23:24:21,843] Trial 0 finished with value: 0.04948952539332179 and parameters: {}. Best is trial 0 with value: 0.04948952539332179.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,normalized,1,0.04949,0.047761,0.054083,0.053928,0.067115,29.498233,{}


In [10]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 22:28:40,729] A new study created in memory with name: no-name-e1a2c60d-2e15-486d-834a-ae830e2bd1f1


Running optimization for ALS_factors=100 with pmi...


[I 2026-02-22 22:29:44,168] Trial 0 finished with value: 0.09700434996886853 and parameters: {}. Best is trial 0 with value: 0.09700434996886853.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,pmi,1,0.097004,0.094931,0.107497,0.106685,0.132673,29.45463,{}


In [11]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 22:31:32,231] A new study created in memory with name: no-name-ef46d484-df91-4987-877a-74f4aa087dd9


Running optimization for ALS_factors=100 with robust_user_centric...


[I 2026-02-22 22:35:33,796] Trial 1 finished with value: 0.090470528501922 and parameters: {'scale_factor': 9.388236263596115}. Best is trial 1 with value: 0.090470528501922.
[I 2026-02-22 22:35:34,092] Trial 3 finished with value: 0.08163582820877706 and parameters: {'scale_factor': 4.595236332909377}. Best is trial 1 with value: 0.090470528501922.
[I 2026-02-22 22:35:34,976] Trial 2 finished with value: 0.08267337143969711 and parameters: {'scale_factor': 4.858134795664266}. Best is trial 1 with value: 0.090470528501922.
[I 2026-02-22 22:35:37,980] Trial 0 finished with value: 0.07118127076282985 and parameters: {'scale_factor': 2.9529205794177678}. Best is trial 1 with value: 0.090470528501922.
[I 2026-02-22 22:39:35,099] Trial 6 finished with value: 0.0673734867162165 and parameters: {'scale_factor': 2.5219162679764064}. Best is trial 1 with value: 0.090470528501922.
[I 2026-02-22 22:39:35,883] Trial 5 finished with value: 0.09080677879278148 and parameters: {'scale_factor': 9.8540

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,robust_user_centric,10,0.090807,0.089436,0.101351,0.100418,0.125152,29.465653,{'scale_factor': 9.854023669092191}


In [None]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

In [12]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="ALS_factors=100", n_trials=10, output_dir=results_folder)

[I 2026-02-22 22:43:20,744] A new study created in memory with name: no-name-52444b39-c9ae-4fa0-bb35-a3c74c418bdf


Running optimization for ALS_factors=100 with sigmoid_propensity...


[I 2026-02-22 22:47:16,606] Trial 2 finished with value: 0.08106679385086486 and parameters: {'p': 3.039311426192426, 'beta': 0.51759715104344}. Best is trial 2 with value: 0.08106679385086486.
[I 2026-02-22 22:47:16,693] Trial 3 finished with value: 0.0831529524587202 and parameters: {'p': 0.9819392717974776, 'beta': 0.5157610418881893}. Best is trial 3 with value: 0.0831529524587202.
[I 2026-02-22 22:47:17,376] Trial 0 finished with value: 0.08627258007112329 and parameters: {'p': 2.786711179119254, 'beta': 0.7116598923423251}. Best is trial 0 with value: 0.08627258007112329.
[I 2026-02-22 22:48:41,007] Trial 1 finished with value: 0.0726123998316605 and parameters: {'p': 2.745859179027445, 'beta': 0.31766731628176526}. Best is trial 0 with value: 0.08627258007112329.
[I 2026-02-22 22:51:08,627] Trial 5 finished with value: 0.09191785346080845 and parameters: {'p': 1.2708492740699449, 'beta': 0.9510209971452145}. Best is trial 5 with value: 0.09191785346080845.
[I 2026-02-22 22:51:09

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,sigmoid_propensity,10,0.091918,0.091353,0.102874,0.102904,0.127081,29.487653,"{'p': 1.2708492740699449, 'beta': 0.9510209971..."


In [19]:

run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="ALS_factors=100", n_trials=20, output_dir=results_folder)

[I 2026-02-22 23:28:39,003] A new study created in memory with name: no-name-44ef3fe2-77eb-4d31-aef7-df844f791abb


Running optimization for ALS_factors=100 with power_lift...


[I 2026-02-22 23:32:34,692] Trial 1 finished with value: 0.05079826367523642 and parameters: {'p': 0.9697751878518299}. Best is trial 1 with value: 0.05079826367523642.
[I 2026-02-22 23:32:34,944] Trial 2 finished with value: 0.09693722235813836 and parameters: {'p': 0.281002380481669}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:32:35,725] Trial 3 finished with value: 0.07779418740279137 and parameters: {'p': 0.6866277904514063}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:34:00,733] Trial 0 finished with value: 0.05736247032721814 and parameters: {'p': 0.9086595936555}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:36:27,228] Trial 5 finished with value: 0.0744296636554598 and parameters: {'p': 0.7395215887750407}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:36:27,778] Trial 4 finished with value: 0.07805248705904613 and parameters: {'p': 0.11578697840011404}. Best is trial 2 with value: 0.096937222



[I 2026-02-22 23:40:20,643] Trial 8 finished with value: 0.08347339011879948 and parameters: {'p': 0.13860519256079096}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:40:23,101] Trial 10 finished with value: 0.04150513661054266 and parameters: {'p': 1.062110100353003}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:40:23,266] Trial 9 finished with value: 0.08292447375700729 and parameters: {'p': 0.49295789936823275}. Best is trial 2 with value: 0.09693722235813836.




[I 2026-02-22 23:42:02,651] Trial 11 finished with value: 0.028998641087741715 and parameters: {'p': 1.2183394685340272}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:44:16,025] Trial 12 finished with value: 0.012451620238964057 and parameters: {'p': 1.4973291748654276}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:44:17,577] Trial 13 finished with value: 0.012902552852716607 and parameters: {'p': 1.4862397188126613}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:44:18,388] Trial 14 finished with value: 0.012844947515370778 and parameters: {'p': 1.4894682488862354}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:45:55,121] Trial 15 finished with value: 0.08950785937428028 and parameters: {'p': 0.42231658899928903}. Best is trial 2 with value: 0.09693722235813836.
[I 2026-02-22 23:48:09,653] Trial 16 finished with value: 0.08623234485154195 and parameters: {'p': 0.45436513763083064}. Best is trial 2 with va

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,power_lift,20,0.096937,0.09422,0.106835,0.105954,0.132037,29.401729,{'p': 0.281002380481669}


In [20]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,ALS_factors=100,pmi,1,0.097004,0.094931,0.107497,0.106685,0.132673,29.45463,{}
0,ALS_factors=100,power_lift,20,0.096937,0.09422,0.106835,0.105954,0.132037,29.401729,{'p': 0.281002380481669}
0,ALS_factors=100,log_idf,10,0.097297,0.091035,0.10613,0.099694,0.128535,29.454495,{'conf_alpha': 3.665509628784722}
0,ALS_factors=100,tfidf,1,0.096255,0.092376,0.105904,0.102961,0.130058,29.615792,{}
0,ALS_factors=100,sigmoid_propensity,10,0.091918,0.091353,0.102874,0.102904,0.127081,29.487653,"{'p': 1.2708492740699449, 'beta': 0.9510209971..."
0,ALS_factors=100,robust_user_centric,10,0.090807,0.089436,0.101351,0.100418,0.125152,29.465653,{'scale_factor': 9.854023669092191}
0,ALS_factors=100,bm25,10,0.089699,0.084338,0.097639,0.093081,0.118595,29.470862,"{'bm25_k1': 315.9103178875338, 'bm25_b': 0.983..."
0,ALS_factors=100,no_weighting,1,0.079163,0.077948,0.088249,0.086483,0.107405,29.40326,{}
0,ALS_factors=100,power,10,0.078909,0.076903,0.087408,0.084819,0.105841,29.395649,{'power_p': 1.1242434273794637}
0,ALS_factors=100,log,1,0.060883,0.063452,0.071144,0.074246,0.091436,29.457849,{}
