In [None]:
import numpy as np
import pandas as pd
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from scipy.sparse import csr_matrix
from weighting_strategies import (
    bm25_weight, tfidf_weight, normalized_weight,
    log_weight, log_idf_weight, power_weight,
    pmi_weight, robust_user_centric_weight, sigmoid_propensity_weight, power_lift_weight, robust_user_centric_weight_v2
)
# We keep implicit evaluation for symmetry and metric consistency with the previous notebook
from implicit.evaluation import train_test_split, ranking_metrics_at_k
import cornac

from autorec import AutoRec

In [2]:
import sys
import os

# Add the parent directory to sys.path to resolve imports from sibling directories
sys.path.append(os.path.abspath(".."))

from utils.sparse import transform_dataframe_to_sparse

In [3]:
movielens_df = (
    pd.DataFrame(
        data=cornac.datasets.movielens.load_feedback(variant="20M"),
        columns=['user_id', 'item_id', 'target']
    )
    .loc[:, ['user_id', 'item_id', 'target']]
    .dropna()
)
movielens_df['user_id'].nunique(), movielens_df['item_id'].nunique(), movielens_df.shape[0]

(138493, 26744, 20000263)

In [4]:
user_item_matrix, user_mapping, item_mapping = transform_dataframe_to_sparse(
    movielens_df, row_field='user_id', col_field='item_id', data_field='target'
)


train_val_mat, test_mat = train_test_split(user_item_matrix, train_percentage=0.9, random_state=42)
train_mat, val_mat = train_test_split(train_val_mat, train_percentage=0.9, random_state=42)

print(f"Train Shape: {train_mat.shape}, Val Shape: {val_mat.shape}, Test Shape: {test_mat.shape}")

Train Shape: (138493, 26744), Val Shape: (138493, 26744), Test Shape: (138493, 26744)


In [None]:
results_folder = "results/movielens_20m_autorec"
results_filename = "movielens_20m_autorec_results.csv"
import time

def run_hyperparameter_optimization(
    train_mat: csr_matrix,
    val_mat: csr_matrix,
    train_val_mat: csr_matrix,
    test_mat: csr_matrix,
    weighting_strategy: str,
    algorithm: str,
    n_trials: int = 20,
    output_dir: str = None,
) -> pd.DataFrame:
    results = []
    
    algorithms = {
        "I-AutoRec": lambda: AutoRec(unobserved_weight=0.1, hidden_dim=10, epochs=20),
    }
    strategies = [
        "no_weighting",
        "bm25",
        "tfidf",
        "log", 
        "log_idf",
        "power",
        "normalized",
        "pmi",
        "robust_user_centric",
        "robust_user_centric_weight_v2",
        "sigmoid_propensity",
        "power_lift"
    ]
    if weighting_strategy not in strategies:
        raise ValueError(f"Weighting strategy '{weighting_strategy}' is not recognized.")
    strategy = weighting_strategy

    if algorithm not in algorithms:
        raise ValueError(f"Algorithm '{algorithm}' is not recognized.")
    algo_name = algorithm
    AlgoFactory = algorithms[algorithm]

    print(f"Running optimization for {algo_name} with {strategy}...")

    def get_weighted_matrix(matrix, params):
        weighted = matrix.copy()
        if strategy == "bm25":
            weighted = bm25_weight(weighted, K1=params.get("bm25_k1"), B=params.get("bm25_b"))
        elif strategy == "log_idf":
            weighted = log_idf_weight(weighted, alpha=params.get("conf_alpha"))
        elif strategy == "power":
            weighted = power_weight(weighted, p=params.get("power_p"))
        elif strategy == "tfidf":
            weighted = tfidf_weight(weighted)
        elif strategy == "log":
            weighted = log_weight(weighted)
        elif strategy == "normalized":
            weighted = normalized_weight(weighted)
        elif strategy == "pmi":
            weighted = pmi_weight(weighted)
        elif strategy == "robust_user_centric":
            weighted = robust_user_centric_weight(weighted, scale_factor=params.get("scale_factor"))
        elif strategy == "sigmoid_propensity":
            weighted = sigmoid_propensity_weight(weighted, p=params.get("p"), beta=params.get("beta"))
        elif strategy == "power_lift":
            weighted = power_lift_weight(weighted, p=params.get("p"))
        elif strategy == "robust_user_centric_weight_v2":
            weighted = robust_user_centric_weight_v2(weighted, lower_q=params.get("lower_q"), upper_q=params.get("upper_q"))
        return weighted

    def objective(trial):
        params = {}
        # Suggest weighting strategy parameters
        if strategy == "bm25":
            params["bm25_k1"] = trial.suggest_float("bm25_k1", 0.1, 1000)
            params["bm25_b"] = trial.suggest_float("bm25_b", 0.0, 1.0)
        elif strategy == "log_idf":
            params["conf_alpha"] = trial.suggest_float("conf_alpha", 1.0, 150.0)
        elif strategy == "power":
            params["power_p"] = trial.suggest_float("power_p", 0.1, 1.5)
        elif strategy == "robust_user_centric":
            params["scale_factor"] = trial.suggest_float("scale_factor", 0.1, 10.0)
        elif strategy == "robust_user_centric_weight_v2":
            params["lower_q"] = trial.suggest_float("lower_q", 5.0, 45.0)
            params["upper_q"] = trial.suggest_float("upper_q", 55.0, 95.0)
        elif strategy == "sigmoid_propensity":
            params["p"] = trial.suggest_float("p", 0.1, 5.0)
            params["beta"] = trial.suggest_float("beta", 0.0, 1.0)
        elif strategy == "power_lift":
            params["p"] = trial.suggest_float("p", 0.1, 1.5)
        
        # Apply weighting
        weighted_train = get_weighted_matrix(train_mat, params)
        
        # Train Model
        model = AlgoFactory()
        model.fit(weighted_train, show_progress=False)

        # Evaluate on Validation Set
        return ranking_metrics_at_k(model, train_mat, val_mat, K=20, show_progress=False)['ndcg']

    # Optimize only if strategy has parameters
    current_trials = n_trials if strategy in ["bm25", "log_idf", "power", "robust_user_centric", "robust_user_centric_weight_v2", "sigmoid_propensity", "power_lift"] else 1
    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=current_trials, n_jobs=-1)

    # --- Final Retraining & Testing ---
    # Use best params to weight the full train_val matrix
    best_params = study.best_params
    weighted_train_val = get_weighted_matrix(train_val_mat, best_params)

    # Train Final Model
    final_model = AlgoFactory()
    
    start_time = time.time()
    final_model.fit(weighted_train_val, show_progress=False)
    end_time = time.time()
    
    # Evaluate on Test Set
    metrics_at_10 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=10, show_progress=False)
    metrics_at_20 = ranking_metrics_at_k(final_model, train_val_mat, test_mat, K=20, show_progress=False)

    results.append({
        "Algorithm": algo_name,
        "Strategy": strategy,
        "Number of Optimization Trials": current_trials,
        "Best Val NDCG@20": study.best_value,
        "Test NDCG@10": metrics_at_10['ndcg'],
        "Test NDCG@20": metrics_at_20['ndcg'],
        "Test Precision@10": metrics_at_10['precision'],
        "Test Precision@20": metrics_at_20['precision'],
        "Final Train Time (s)": end_time - start_time,
        "Best Params": best_params
    })

    if output_dir:
        output_path = os.path.join(output_dir, f"{algo_name}_{strategy}_results.csv")
        pd.DataFrame(results).to_csv(output_path, index=False)
    return pd.DataFrame(results)

In [6]:
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

# Execution of all experiments
# Note: Reduced n_trials to 15 for Deep Learning models to save time, or keep at 20.
TRIALS = 15

In [7]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)

[I 2026-02-08 00:41:05,532] A new study created in memory with name: no-name-de62dfa2-3d73-4ae5-8e84-195779f85478


Running optimization for I-AutoRec with no_weighting...


[W 2026-02-08 00:50:15,774] Trial 0 failed with parameters: {} because of the following error: OutOfMemoryError('CUDA out of memory. Tried to allocate 13.80 GiB. GPU 0 has a total capacity of 14.56 GiB of which 13.79 GiB is free. Process 27020 has 188.00 MiB memory in use. Including non-PyTorch memory, this process has 602.00 MiB memory in use. Of the allocated memory 251.82 MiB is allocated by PyTorch, and 214.18 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)').
Traceback (most recent call last):
  File "/home/coder/.pyenv/versions/3.12.0/lib/python3.12/site-packages/optuna/study/_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_27962/1981260216.py", line 97, in objectiv

OutOfMemoryError: CUDA out of memory. Tried to allocate 13.80 GiB. GPU 0 has a total capacity of 14.56 GiB of which 13.79 GiB is free. Process 27020 has 188.00 MiB memory in use. Including non-PyTorch memory, this process has 602.00 MiB memory in use. Of the allocated memory 251.82 MiB is allocated by PyTorch, and 214.18 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="no_weighting", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)

[I 2026-02-08 00:25:21,130] A new study created in memory with name: no-name-604e26af-0b9b-4d54-a21a-6a92e634acfc


Running optimization for I-AutoRec with no_weighting...


[I 2026-02-08 00:25:21,704] Trial 0 finished with value: 0.08846093269222169 and parameters: {}. Best is trial 0 with value: 0.08846093269222169.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,no_weighting,1,0.088461,0.081683,0.09642,0.096652,0.126007,0.452133,{}


In [45]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="bm25", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:23,726] A new study created in memory with name: no-name-9ccc12b1-fdf3-4b86-8cec-3408f9ac8225


Running optimization for I-AutoRec with bm25...


[I 2026-02-08 00:25:24,310] Trial 0 finished with value: 0.08029173480260333 and parameters: {'bm25_k1': 586.1110049158626, 'bm25_b': 0.4343629881486941}. Best is trial 0 with value: 0.08029173480260333.
[I 2026-02-08 00:25:24,893] Trial 1 finished with value: 0.08036209418786992 and parameters: {'bm25_k1': 36.923190666968026, 'bm25_b': 0.8766758177353506}. Best is trial 1 with value: 0.08036209418786992.
[I 2026-02-08 00:25:25,466] Trial 2 finished with value: 0.0897352075289786 and parameters: {'bm25_k1': 309.46876999113476, 'bm25_b': 0.1756231303208251}. Best is trial 2 with value: 0.0897352075289786.
[I 2026-02-08 00:25:26,029] Trial 3 finished with value: 0.086178226045516 and parameters: {'bm25_k1': 65.84899476905596, 'bm25_b': 0.8650975412090878}. Best is trial 2 with value: 0.0897352075289786.
[I 2026-02-08 00:25:26,598] Trial 4 finished with value: 0.094336136911049 and parameters: {'bm25_k1': 485.26240295140127, 'bm25_b': 0.7473983335343836}. Best is trial 4 with value: 0.094

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,bm25,15,0.094336,0.072824,0.085765,0.08791,0.117598,0.442636,"{'bm25_k1': 485.26240295140127, 'bm25_b': 0.74..."


In [46]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="tfidf", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)

[I 2026-02-08 00:25:33,330] A new study created in memory with name: no-name-db91a759-995f-4d0d-9937-ac89bbd5e052


Running optimization for I-AutoRec with tfidf...


[I 2026-02-08 00:25:33,905] Trial 0 finished with value: 0.09303465194814381 and parameters: {}. Best is trial 0 with value: 0.09303465194814381.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,tfidf,1,0.093035,0.067634,0.079583,0.079828,0.100782,0.452446,{}


In [47]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)

[I 2026-02-08 00:25:34,887] A new study created in memory with name: no-name-83f48b7c-73dd-43e6-8d78-bd83fb75fd7b


Running optimization for I-AutoRec with log...


[I 2026-02-08 00:25:35,469] Trial 0 finished with value: 0.07702259852893688 and parameters: {}. Best is trial 0 with value: 0.07702259852893688.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,log,1,0.077023,0.097909,0.111324,0.120073,0.152534,0.439404,{}


In [None]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="log_idf", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:36,426] A new study created in memory with name: no-name-8b9a02ae-6f2c-4312-8671-54e23fc3c96d


Running optimization for I-AutoRec with confidence...


[I 2026-02-08 00:25:36,995] Trial 0 finished with value: 0.0853247205377085 and parameters: {'conf_alpha': 114.16658093785324}. Best is trial 0 with value: 0.0853247205377085.
[I 2026-02-08 00:25:37,571] Trial 1 finished with value: 0.081454524279266 and parameters: {'conf_alpha': 81.17257238519126}. Best is trial 0 with value: 0.0853247205377085.
[I 2026-02-08 00:25:38,143] Trial 2 finished with value: 0.06586132474128605 and parameters: {'conf_alpha': 32.77056615191154}. Best is trial 0 with value: 0.0853247205377085.
[I 2026-02-08 00:25:38,719] Trial 3 finished with value: 0.08621939246351479 and parameters: {'conf_alpha': 102.39325145322381}. Best is trial 3 with value: 0.08621939246351479.
[I 2026-02-08 00:25:39,293] Trial 4 finished with value: 0.07581273088614644 and parameters: {'conf_alpha': 86.98328148148384}. Best is trial 3 with value: 0.08621939246351479.
[I 2026-02-08 00:25:39,871] Trial 5 finished with value: 0.08401847323409631 and parameters: {'conf_alpha': 134.4709375

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,confidence,15,0.090046,0.091282,0.101113,0.100445,0.123638,0.439393,{'conf_alpha': 82.8836536398104}


In [49]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:46,068] A new study created in memory with name: no-name-60675239-ddcd-41bd-9c96-4fa091ee42fb


Running optimization for I-AutoRec with power...


[I 2026-02-08 00:25:46,639] Trial 0 finished with value: 0.09064619177858398 and parameters: {'power_p': 1.2413915386808767}. Best is trial 0 with value: 0.09064619177858398.
[I 2026-02-08 00:25:47,210] Trial 1 finished with value: 0.08940173809373857 and parameters: {'power_p': 0.89043370388516}. Best is trial 0 with value: 0.09064619177858398.
[I 2026-02-08 00:25:47,772] Trial 2 finished with value: 0.08255263051880277 and parameters: {'power_p': 0.5851534580275215}. Best is trial 0 with value: 0.09064619177858398.
[I 2026-02-08 00:25:48,344] Trial 3 finished with value: 0.0730975135079735 and parameters: {'power_p': 0.5279264923367316}. Best is trial 0 with value: 0.09064619177858398.
[I 2026-02-08 00:25:48,913] Trial 4 finished with value: 0.07874686173607118 and parameters: {'power_p': 0.25617967323924484}. Best is trial 0 with value: 0.09064619177858398.
[I 2026-02-08 00:25:49,472] Trial 5 finished with value: 0.09384331201487413 and parameters: {'power_p': 1.0021632243656629}. B

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,power,15,0.094503,0.083288,0.097717,0.094343,0.124586,0.449054,{'power_p': 1.4764361205665877}


In [50]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="normalized", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:55,448] A new study created in memory with name: no-name-22c33aad-5265-40dd-9a5b-2b738da05596


Running optimization for I-AutoRec with normalized...


[I 2026-02-08 00:25:56,004] Trial 0 finished with value: 0.07923700222336508 and parameters: {}. Best is trial 0 with value: 0.07923700222336508.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,normalized,1,0.079237,0.092506,0.103587,0.109847,0.141757,0.434748,{}


In [51]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="pmi", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:56,977] A new study created in memory with name: no-name-50db93ac-c9bb-47d3-8242-7132686a69b1


Running optimization for I-AutoRec with pmi...


[I 2026-02-08 00:25:57,543] Trial 0 finished with value: 0.08718408072296388 and parameters: {}. Best is trial 0 with value: 0.08718408072296388.


Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,pmi,1,0.087184,0.070043,0.085659,0.073726,0.099479,0.439845,{}


In [52]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:25:58,495] A new study created in memory with name: no-name-93d1bc87-98e3-4fae-9845-d58d11d45d7c


Running optimization for I-AutoRec with robust_user_centric...


[I 2026-02-08 00:25:59,063] Trial 0 finished with value: 0.08400320496143286 and parameters: {'scale_factor': 2.672217051346436}. Best is trial 0 with value: 0.08400320496143286.
[I 2026-02-08 00:25:59,627] Trial 1 finished with value: 0.08364146185980337 and parameters: {'scale_factor': 7.337216886995203}. Best is trial 0 with value: 0.08400320496143286.
[I 2026-02-08 00:26:00,184] Trial 2 finished with value: 0.07330879966949254 and parameters: {'scale_factor': 0.3146865056683955}. Best is trial 0 with value: 0.08400320496143286.
[I 2026-02-08 00:26:00,845] Trial 3 finished with value: 0.0839772065931242 and parameters: {'scale_factor': 7.42715207894299}. Best is trial 0 with value: 0.08400320496143286.
[I 2026-02-08 00:26:01,551] Trial 4 finished with value: 0.07643527187957697 and parameters: {'scale_factor': 4.831293076750521}. Best is trial 0 with value: 0.08400320496143286.
[I 2026-02-08 00:26:02,147] Trial 5 finished with value: 0.08507887669144518 and parameters: {'scale_facto

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,robust_user_centric,15,0.093402,0.08592,0.095679,0.103249,0.128494,0.447601,{'scale_factor': 7.716780227648577}


In [53]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="robust_user_centric_weight_v2", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:26:08,143] A new study created in memory with name: no-name-980af31e-e743-4b34-8765-e7290209b8a2


Running optimization for I-AutoRec with robust_user_centric_weight_v2...


[I 2026-02-08 00:26:08,712] Trial 0 finished with value: 0.06921445535480071 and parameters: {'lower_q': 39.5763932215865, 'upper_q': 57.37454889564381}. Best is trial 0 with value: 0.06921445535480071.
[I 2026-02-08 00:26:09,287] Trial 1 finished with value: 0.07701437754515567 and parameters: {'lower_q': 29.173872397986564, 'upper_q': 73.5415788573026}. Best is trial 1 with value: 0.07701437754515567.
[I 2026-02-08 00:26:09,840] Trial 2 finished with value: 0.08857623358720446 and parameters: {'lower_q': 15.303179382979488, 'upper_q': 64.70470017748892}. Best is trial 2 with value: 0.08857623358720446.
[I 2026-02-08 00:26:10,414] Trial 3 finished with value: 0.07988466414211791 and parameters: {'lower_q': 7.192009456408153, 'upper_q': 86.7666979859734}. Best is trial 2 with value: 0.08857623358720446.
[I 2026-02-08 00:26:10,961] Trial 4 finished with value: 0.07509625289349038 and parameters: {'lower_q': 11.224408752704885, 'upper_q': 68.61014013537614}. Best is trial 2 with value: 0

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,robust_user_centric_weight_v2,15,0.092195,0.092401,0.100669,0.109022,0.131573,0.439092,"{'lower_q': 5.401583884003134, 'upper_q': 75.7..."


In [54]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="sigmoid_propensity", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)


[I 2026-02-08 00:26:17,557] A new study created in memory with name: no-name-7aa1b016-5080-4dd0-bce8-1f013fca3fcc


Running optimization for I-AutoRec with sigmoid_propensity...


[I 2026-02-08 00:26:18,130] Trial 0 finished with value: 0.0723367564708605 and parameters: {'p': 1.6548990532169117, 'beta': 0.6752174940793115}. Best is trial 0 with value: 0.0723367564708605.
[I 2026-02-08 00:26:18,697] Trial 1 finished with value: 0.09032916249334144 and parameters: {'p': 0.3186300966746646, 'beta': 0.1927249214228478}. Best is trial 1 with value: 0.09032916249334144.
[I 2026-02-08 00:26:19,257] Trial 2 finished with value: 0.08716006088958443 and parameters: {'p': 0.9182533399407692, 'beta': 0.8312179796311866}. Best is trial 1 with value: 0.09032916249334144.
[I 2026-02-08 00:26:19,819] Trial 3 finished with value: 0.07330956641247428 and parameters: {'p': 4.0702438125372264, 'beta': 0.7712321853383914}. Best is trial 1 with value: 0.09032916249334144.
[I 2026-02-08 00:26:20,375] Trial 4 finished with value: 0.07711975056864598 and parameters: {'p': 4.9465332428701965, 'beta': 0.10020859299163043}. Best is trial 1 with value: 0.09032916249334144.
[I 2026-02-08 00

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,sigmoid_propensity,15,0.090927,0.097562,0.106396,0.117763,0.144244,0.433664,"{'p': 1.5681013991925234, 'beta': 0.9878724036..."


In [55]:
run_hyperparameter_optimization(train_mat, val_mat, train_val_mat, test_mat, weighting_strategy="power_lift", algorithm="I-AutoRec", n_trials=TRIALS, output_dir=results_folder)

[I 2026-02-08 00:26:26,850] A new study created in memory with name: no-name-8f39886e-054b-4e4b-895e-009537355ba4


Running optimization for I-AutoRec with power_lift...


[I 2026-02-08 00:26:27,408] Trial 0 finished with value: 0.07408679800807418 and parameters: {'p': 1.0035638240563256}. Best is trial 0 with value: 0.07408679800807418.
[I 2026-02-08 00:26:27,949] Trial 1 finished with value: 0.08171485501539048 and parameters: {'p': 0.7950215492191984}. Best is trial 1 with value: 0.08171485501539048.
[I 2026-02-08 00:26:28,510] Trial 2 finished with value: 0.08448556159108428 and parameters: {'p': 0.38181531389079426}. Best is trial 2 with value: 0.08448556159108428.
[I 2026-02-08 00:26:29,221] Trial 3 finished with value: 0.08239751075242582 and parameters: {'p': 0.5172924429650805}. Best is trial 2 with value: 0.08448556159108428.
[I 2026-02-08 00:26:29,760] Trial 4 finished with value: 0.08440264250120148 and parameters: {'p': 0.6058546367436108}. Best is trial 2 with value: 0.08448556159108428.
[I 2026-02-08 00:26:30,302] Trial 5 finished with value: 0.075908714172485 and parameters: {'p': 0.7527440997625388}. Best is trial 2 with value: 0.084485

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,power_lift,15,0.105658,0.09026,0.101539,0.108692,0.134415,0.44961,{'p': 0.15919200962186064}


In [56]:
import glob

all_results = []
# Match any CSV in the result folder
for f in glob.glob(f"{results_folder}/*.csv"):
    all_results.append(pd.read_csv(f))

if all_results:
    experiment_results = pd.concat(all_results)
    experiment_results = experiment_results.sort_values("Test NDCG@20", ascending=False)
    experiment_results.to_csv(results_filename, index=False)
else:
    print("No results found.")

experiment_results

Unnamed: 0,Algorithm,Strategy,Number of Optimization Trials,Best Val NDCG@20,Test NDCG@10,Test NDCG@20,Test Precision@10,Test Precision@20,Final Train Time (s),Best Params
0,I-AutoRec,log,1,0.077023,0.097909,0.111324,0.120073,0.152534,0.439404,{}
0,I-AutoRec,sigmoid_propensity,15,0.090927,0.097562,0.106396,0.117763,0.144244,0.433664,"{'p': 1.5681013991925234, 'beta': 0.9878724036..."
0,I-AutoRec,normalized,1,0.079237,0.092506,0.103587,0.109847,0.141757,0.434748,{}
0,I-AutoRec,power_lift,15,0.105658,0.09026,0.101539,0.108692,0.134415,0.44961,{'p': 0.15919200962186064}
0,I-AutoRec,confidence,15,0.090046,0.091282,0.101113,0.100445,0.123638,0.439393,{'conf_alpha': 82.8836536398104}
0,I-AutoRec,robust_user_centric_weight_v2,15,0.092195,0.092401,0.100669,0.109022,0.131573,0.439092,"{'lower_q': 5.401583884003134, 'upper_q': 75.7..."
0,I-AutoRec,power,15,0.094503,0.083288,0.097717,0.094343,0.124586,0.449054,{'power_p': 1.4764361205665877}
0,I-AutoRec,no_weighting,1,0.088461,0.081683,0.09642,0.096652,0.126007,0.452133,{}
0,I-AutoRec,robust_user_centric,15,0.093402,0.08592,0.095679,0.103249,0.128494,0.447601,{'scale_factor': 7.716780227648577}
0,I-AutoRec,bm25,15,0.094336,0.072824,0.085765,0.08791,0.117598,0.442636,"{'bm25_k1': 485.26240295140127, 'bm25_b': 0.74..."
