In [1]:
import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import MF, PMF, BPR, SANSA, BiVAECF, LightGCN, RecVAE, EASE, NGCF, VAECF, IBPR, NeuMF, HPF
from cornac.metrics import Precision, Recall, NDCG, MAP

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


### 1. Import datasets

In [2]:
def stratified_ranking_split(
    df: pd.DataFrame,
    entity_field: str,
    test_size: float = 0.1,
    random_state: int | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Splits a ranking-based dataset into training and test sets while preserving the entity distribution.

    This function is useful for ranking models (e.g., next-best-offer, personalized recommendations) where each
    entity (e.g., user) has multiple interactions with different items, and stratification ensures that
    different user interaction levels are maintained in both splits.

    Parameters:
        df: The ranking dataset, where each row represents an interaction between an entity (e.g., user)
            and an item (e.g., game, offer).
        entity_field: The column representing the entity to be stratified.
        test_size: Fraction of unique entities to allocate to the test set.
        random_state: Random seed for reproducibility.

    Returns:
        DataFrames containing training and test data.

    Example:
        >>> train_validation_df, test_df = stratified_ranking_split(df, entity_field='user_id', test_size=0.1)
        >>> train_df, validation_df = stratified_ranking_split(
        ...     train_validation_df, entity_field='user_id', test_size=0.1
        ... )
        >>> print(train_df.shape, validation_df.shape, test_df.shape)
    """
    entity_interaction_counts = df[entity_field].value_counts()

    interaction_frequencies = entity_interaction_counts.value_counts()
    stratifiable_interaction_counts = interaction_frequencies[interaction_frequencies >= 2].index

    stratifiable_entities = entity_interaction_counts[
        entity_interaction_counts.isin(stratifiable_interaction_counts)
    ].index
    non_stratifiable_entities = entity_interaction_counts[
        ~entity_interaction_counts.isin(stratifiable_interaction_counts)
    ].index

    train_strat, test_strat = (
        train_test_split(
            stratifiable_entities,
            test_size=test_size,
            stratify=entity_interaction_counts[stratifiable_entities],
            random_state=random_state,
        )
        if len(stratifiable_entities) > 1
        else (stratifiable_entities, [])
    )

    if len(non_stratifiable_entities) > 1:
        train_non_strat, test_non_strat = train_test_split(
            non_stratifiable_entities,
            test_size=test_size,
            random_state=random_state,
        )
    else:
        train_non_strat = non_stratifiable_entities
        test_non_strat = []

    train_users = np.concatenate([train_strat, train_non_strat])
    test_users = np.concatenate([test_strat, test_non_strat])

    return df[df[entity_field].isin(train_users)], df[df[entity_field].isin(test_users)]

In [3]:
SEED = 123

metrics = [Precision(k=10), Recall(k=10), NDCG(k=10), MAP()]

In [4]:
steam_dataset = (
    pd.read_csv(
        "/Users/a-shyraliev/phd/rec-sys-research/collab_filtering_battlefield/steam_recommendations.csv",
        usecols=['user_id', 'app_id', 'hours'],
    )
    .loc[:, ['user_id', 'app_id', 'hours']]
    .drop_duplicates()
)

# movielens_100k_dataset = cornac.datasets.movielens.load_feedback()
# movielens_20M_dataset = cornac.datasets.movielens.load_feedback(variant="20m")



# lastfm_dataset = (
#     pd.read_csv(
#         "/Users/a-shyraliev/phd/rec-sys-research/collab_filtering_battlefield/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv",
#         sep="\t",
#         header=None,
#         usecols=[0, 2, 3],
#         names=['user_id', 'item_id', 'play_count'],
#         nrows=1000
#     )
#     .loc[:, ['user_id', 'item_id', 'play_count']]
#     .drop_duplicates()
#     .values
# )

In [5]:
steam_dataset['app_id'].nunique()

37610

In [6]:
# steam_dataset_sample = sample_data(steam_dataset, n_users=100000, user_col='user_id')
_, steam_dataset_sample_str = stratified_ranking_split(
    steam_dataset,
    entity_field='user_id',
    test_size=0.01,
    random_state=SEED,
)

In [7]:
rs = RatioSplit(data=steam_dataset_sample_str.values, test_size=0.2, rating_threshold=0.0, seed=SEED)
rs.train_set.csr_matrix, rs.test_set.csr_matrix

(<121521x14807 sparse matrix of type '<class 'numpy.float64'>'
 	with 325828 stored elements in Compressed Sparse Row format>,
 <121521x14807 sparse matrix of type '<class 'numpy.float64'>'
 	with 62707 stored elements in Compressed Sparse Row format>)

### 1. MF hyperparams optimization

In [25]:
mf_models = [
    MF(name='MF k=10, lambda_reg=0.02', k=10, max_iter=200, lambda_reg=0.02, use_bias=True, seed=SEED),
    MF(name='MF k=50, lambda_reg=0.02', k=50, max_iter=200, lambda_reg=0.02, use_bias=True, seed=SEED),
    MF(name='MF k=100, lambda_reg=0.02', k=100, max_iter=200, lambda_reg=0.02, use_bias=True, seed=SEED),
    MF(name='MF k=10, lambda_reg=0.1', k=10, max_iter=200, lambda_reg=0.1, use_bias=True, seed=SEED),
    MF(name='MF k=50, lambda_reg=0.1', k=50, max_iter=200, lambda_reg=0.1, use_bias=True, seed=SEED),
    MF(name='MF k=100, lambda_reg=0.1', k=100, max_iter=200, lambda_reg=0.1, use_bias=True, seed=SEED),
    MF(name='MF k=10, lambda_reg=1', k=10, max_iter=200, lambda_reg=1, use_bias=True, seed=SEED),
    MF(name='MF k=50, lambda_reg=1', k=50, max_iter=200, lambda_reg=1, use_bias=True, seed=SEED),
    MF(name='MF k=100, lambda_reg=1', k=100, max_iter=200, lambda_reg=1, use_bias=True, seed=SEED),
]

In [None]:
cornac.Experiment(
    eval_method=rs,
    models=mf_models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


TEST:
...
                          | MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
------------------------- + --- + ------- + ------------ + --------- + --------- + --------
MF k=10, lambda_reg=0.02  | nan |  0.0001 |       0.0000 |    0.0003 |    0.6121 |  21.6584
MF k=50, lambda_reg=0.02  | nan |  0.0001 |       0.0000 |    0.0003 |    1.4565 |  31.7773
MF k=100, lambda_reg=0.02 | nan |  0.0001 |       0.0000 |    0.0003 |    4.6947 |  30.5076
MF k=10, lambda_reg=0.1   | nan |  0.0001 |       0.0000 |    0.0003 |    0.5838 |  21.4168
MF k=50, lambda_reg=0.1   | nan |  0.0001 |       0.0000 |    0.0003 |    1.4895 |  32.6081
MF k=100, lambda_reg=0.1  | nan |  0.0001 |       0.0000 |    0.0003 |    5.0005 |  32.5445
MF k=10, lambda_reg=1     | nan |  0.0001 |       0.0000 |    0.0003 |    0.6227 |  21.9549
MF k=50, lambda_reg=1     | nan |  0.0001 |       0.0000 |    0.0003 |    1.6991 |  32.9640
MF k=100, lambda_reg=1    | nan |  0.0001 |       0.0000 |    0.0003 

### 2. PMF hyperparams optimization

In [27]:
pmf_models = [
    PMF(name='PMF k=5, lambda_reg=0.001', k=5, max_iter=200, lambda_reg=0.001, seed=SEED),
    PMF(name='PMF k=10, lambda_reg=0.001', k=10, max_iter=200, lambda_reg=0.001, seed=SEED),
    PMF(name='PMF k=15, lambda_reg=0.001', k=15, max_iter=200, lambda_reg=0.001, seed=SEED),
    PMF(name='PMF k=5, lambda_reg=0.1', k=5, max_iter=200, lambda_reg=0.1, seed=SEED),
    PMF(name='PMF k=10, lambda_reg=0.1', k=10, max_iter=200, lambda_reg=0.1, seed=SEED),
    PMF(name='PMF k=15, lambda_reg=0.1', k=15, max_iter=200, lambda_reg=0.1, seed=SEED),
    PMF(name='PMF k=5, lambda_reg=1', k=5, max_iter=200, lambda_reg=1, seed=SEED),
    PMF(name='PMF k=10, lambda_reg=1', k=10, max_iter=200, lambda_reg=1, seed=SEED),
    PMF(name='PMF k=15, lambda_reg=1', k=15, max_iter=200, lambda_reg=1, seed=SEED),
]

In [28]:
cornac.Experiment(
    eval_method=rs,
    models=pmf_models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


TEST:
...
                           |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-------------------------- + ------ + ------- + ------------ + --------- + --------- + --------
PMF k=5, lambda_reg=0.001  | 0.0019 |  0.0019 |       0.0005 |    0.0039 |    5.5292 |  44.1669
PMF k=10, lambda_reg=0.001 | 0.0017 |  0.0017 |       0.0005 |    0.0036 |    9.4754 |  47.0882
PMF k=15, lambda_reg=0.001 | 0.0008 |  0.0005 |       0.0001 |    0.0011 |   14.2352 |  45.9387
PMF k=5, lambda_reg=0.1    | 0.0007 |  0.0000 |       0.0000 |    0.0000 |    5.2613 |  45.3645
PMF k=10, lambda_reg=0.1   | 0.0007 |  0.0000 |       0.0000 |    0.0000 |   10.0980 |  47.8467
PMF k=15, lambda_reg=0.1   | 0.0007 |  0.0000 |       0.0000 |    0.0000 |   14.9663 |  47.6700
PMF k=5, lambda_reg=1      | 0.0006 |  0.0002 |       0.0001 |    0.0005 |    5.5415 |  45.5808
PMF k=10, lambda_reg=1     | 0.0008 |  0.0004 |       0.0001 |    0.0006 |   10.5913 |  47.4731
PMF k=15, lambda_reg=1     | 

### 3. BPR hyperparams optimization

In [29]:
bpr_models = [
    BPR(name='BPR k=10, lambda_reg=0.01', k=10, max_iter=200, lambda_reg=0.01, seed=SEED),
    BPR(name='BPR k=50, lambda_reg=0.01', k=50, max_iter=200, lambda_reg=0.01, seed=SEED),
    BPR(name='BPR k=100, lambda_reg=0.01', k=100, max_iter=200, lambda_reg=0.01, seed=SEED),
    BPR(name='BPR k=10, lambda_reg=0.1', k=10, max_iter=200, lambda_reg=0.1, seed=SEED),
    BPR(name='BPR k=50, lambda_reg=0.1', k=50, max_iter=200, lambda_reg=0.1, seed=SEED),
    BPR(name='BPR k=100, lambda_reg=0.1', k=100, max_iter=200, lambda_reg=0.1, seed=SEED),
    BPR(name='BPR k=10, lambda_reg=1', k=10, max_iter=200, lambda_reg=1, seed=SEED),
    BPR(name='BPR k=50, lambda_reg=1', k=50, max_iter=200, lambda_reg=1, seed=SEED),
    BPR(name='BPR k=100, lambda_reg=1', k=100, max_iter=200, lambda_reg=1, seed=SEED),
]

In [30]:
cornac.Experiment(
    eval_method=rs,
    models=bpr_models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


TEST:
...
                           |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-------------------------- + ------ + ------- + ------------ + --------- + --------- + --------
BPR k=10, lambda_reg=0.01  | 0.0308 |  0.0312 |       0.0092 |    0.0561 |    3.9516 |  45.1212
BPR k=50, lambda_reg=0.01  | 0.0309 |  0.0316 |       0.0094 |    0.0575 |    8.2983 |  57.4187
BPR k=100, lambda_reg=0.01 | 0.0307 |  0.0313 |       0.0093 |    0.0566 |   11.7384 |  57.0230
BPR k=10, lambda_reg=0.1   | 0.0307 |  0.0310 |       0.0090 |    0.0554 |    4.2672 |  47.3254
BPR k=50, lambda_reg=0.1   | 0.0303 |  0.0308 |       0.0091 |    0.0565 |    8.4085 |  57.3794
BPR k=100, lambda_reg=0.1  | 0.0281 |  0.0283 |       0.0087 |    0.0544 |   12.1549 |  55.3509
BPR k=10, lambda_reg=1     | 0.0300 |  0.0307 |       0.0092 |    0.0560 |    3.9472 |  44.8775
BPR k=50, lambda_reg=1     | 0.0299 |  0.0289 |       0.0081 |    0.0509 |    7.5961 |  52.7927
BPR k=100, lambda_reg=1    | 

### 4. BiVAECF hyperparams optimization

In [None]:
bivaecf_models = [
    BiVAECF(name='BiVAECF k=10, encoder_structure=[20]', k=10, encoder_structure=[20], use_gpu=False, seed=SEED),
    BiVAECF(name='BiVAECF k=20, encoder_structure=[20]', k=50, encoder_structure=[20], use_gpu=False, seed=SEED),
    BiVAECF(name='BiVAECF k=30, encoder_structure=[20]', k=100, encoder_structure=[20], use_gpu=False, seed=SEED),
    BiVAECF(name='BiVAECF k=10, encoder_structure=[40]', k=10, encoder_structure=[40], use_gpu=False, seed=SEED),
    BiVAECF(name='BiVAECF k=50, encoder_structure=[40]', k=50, encoder_structure=[40], use_gpu=False, seed=SEED),
    BiVAECF(name='BiVAECF k=100, encoder_structure=[40]', k=100, encoder_structure=[40], use_gpu=False, seed=SEED),
]

: 

In [None]:
cornac.Experiment(
    eval_method=rs,
    models=bivaecf_models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 7

### 5. RecVAE hyperparams optimization

In [None]:
recvae_models = [
    RecVAE(name='RecVAE hidden_dim=600, latent_dim=100', hidden_dim=600, latent_dim=100, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=600, latent_dim=200', hidden_dim=600, latent_dim=200, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=600, latent_dim=300', hidden_dim=600, latent_dim=300, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=800, latent_dim=100', hidden_dim=800, latent_dim=100, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=800, latent_dim=200', hidden_dim=800, latent_dim=200, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=800, latent_dim=300', hidden_dim=800, latent_dim=300, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=1000, latent_dim=100', hidden_dim=1000, latent_dim=100, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=1000, latent_dim=200', hidden_dim=1000, latent_dim=200, use_gpu=False, seed=SEED),
    RecVAE(name='RecVAE hidden_dim=1000, latent_dim=300', hidden_dim=1000, latent_dim=300, use_gpu=False, seed=SEED),
]

In [None]:
cornac.Experiment(
    eval_method=rs,
    models=recvae_models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


TEST:
...
                           |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-------------------------- + ------ + ------- + ------------ + --------- + --------- + --------
BPR k=10, lambda_reg=0.01  | 0.0306 |  0.0305 |       0.0091 |    0.0553 |    0.3570 |   1.8780
BPR k=50, lambda_reg=0.01  | 0.0308 |  0.0306 |       0.0092 |    0.0550 |    0.4974 |   2.1731
BPR k=100, lambda_reg=0.01 | 0.0307 |  0.0307 |       0.0091 |    0.0557 |    0.6450 |   2.1270
BPR k=10, lambda_reg=0.1   | 0.0293 |  0.0294 |       0.0091 |    0.0548 |    0.3522 |   1.8634
BPR k=50, lambda_reg=0.1   | 0.0308 |  0.0299 |       0.0089 |    0.0520 |    0.5152 |   2.1946
BPR k=100, lambda_reg=0.1  | 0.0306 |  0.0300 |       0.0089 |    0.0535 |    0.6750 |   2.2065
BPR k=10, lambda_reg=1     | 0.0304 |  0.0296 |       0.0088 |    0.0528 |    0.3588 |   1.7927
BPR k=50, lambda_reg=1     | 0.0303 |  0.0301 |       0.0091 |    0.0543 |    0.4873 |   2.2372
BPR k=100, lambda_reg=1    | 

In [None]:
# (rs.train_set.csr_matrix.toarray() != 0).sum(1)

In [None]:
rs = RatioSplit(data=steam_recommendations, test_size=0.2, rating_threshold=0.0, seed=123)

mf = MF(k=10, max_iter=200, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
pmf = PMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.001, seed=123)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)

models = [mf, pmf, bpr]

metrics = [Precision(k=10), Recall(k=10), NDCG(k=10), MAP()]

cornac.Experiment(
    eval_method=rs,
    models=models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()




TEST:
...
    |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------ + ------- + ------------ + --------- + --------- + --------
MF  |    nan |  0.0025 |       0.0008 |    0.0057 |   42.5482 | 175.5171
PMF | 0.0029 |  0.0004 |       0.0001 |    0.0010 |  294.0087 | 197.7616
BPR | 0.0654 |  0.0730 |       0.0191 |    0.1435 |  254.8315 | 199.4555



In [None]:
amzn_clothing = cornac.datasets.amazon_clothing.load_feedback()
rs = RatioSplit(data=ml_20M, test_size=0.2, rating_threshold=0.0, seed=123)

mf = MF(k=10, max_iter=200, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
pmf = PMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.001, seed=123)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)

models = [mf, pmf, bpr]

metrics = [Precision(k=10), Recall(k=10), NDCG(k=10), MAP()]

cornac.Experiment(
    eval_method=rs,
    models=models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()

In [None]:
rs = RatioSplit(data=ml_20M, test_size=0.2, rating_threshold=0.0, seed=123)

### 2. Initialize models, here we are comparing: Biased MF, PMF, and BPR

In [None]:
# mf = MF(k=10, max_iter=200, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
# pmf = PMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.001, seed=123)
# bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)
sansa_cholmod = SANSA(
    name="SANSA (CHOLMOD)",
    l2=500.0,
    weight_matrix_density=1e-2,
    compute_gramian=True,
    factorizer_class="CHOLMOD",
    factorizer_shift_step=1e-3,
    factorizer_shift_multiplier=2.0,
    inverter_scans=5,
    inverter_finetune_steps=20,
    use_absolute_value_scores=False,
    verbose=False,
)

sansa_icf = SANSA(
    name="SANSA (ICF)",
    l2=10.0,
    weight_matrix_density=1e-2,
    compute_gramian=True,
    factorizer_class="ICF",
    factorizer_shift_step=1e-3,
    factorizer_shift_multiplier=2.0,
    inverter_scans=5,
    inverter_finetune_steps=20,
    use_absolute_value_scores=False,
    verbose=False,
)

bivaecf = BiVAECF(
    k=10,
    use_gpu=False,
)

lightgcn = LightGCN()

# recvae = RecVAE(
#     use_gpu=False,
# )

ease = EASE()

ngcf = NGCF()

# vaecf = VAECF(
#     use_gpu=False,
# )

ibpr = IBPR()

neumf = NeuMF()

hpf = HPF()

models = [sansa_cholmod, sansa_icf, mf, pmf, bpr, bivaecf, recvae, ease, vaecf, ibpr, neumf, hpf]# lightgcn, ngcf]


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 7

### 3. Evaluation metrics

In [5]:
metrics = [Precision(k=10), Recall(k=10), NDCG(k=10), MAP()]

In [8]:
cornac.Experiment(
    eval_method=rs,
    models=models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()

INFO:sansa.core.factorizers:Computing incomplete Cholesky decomposition of X^TX + 500.0*I...
                For low desired desired (1.000000%), computing exact factorization (CHOLMOD) 
                followed by sparsification may be inefficient.
                You may want to try ICFGramianFactorizer instead of CHOLMODGramianFactorizer 
                (requires less memory and may be faster).
                
INFO:sansa.core.factorizers:Finding a fill-in reducing ordering (method = colamd)...
INFO:sansa.core.factorizers:Computing approximate Cholesky decomposition (method = CHOLMOD)...
INFO:sansa.core.factorizers:Dropping small entries in L (42.831549% dense, target = 1.000000%)...
INFO:sansa.core.factorizers:Scaling columns and creating diagonal matrix D (LL^T -> L'DL'^T)...
INFO:sansa.core.inverters:Calculating initial guess using 1 step of Schultz method...
INFO:sansa.core.inverters:Calculating approximate inverse using Uniform Minimal Residual algorithm...


Computing LDL^T decomposition of permuted item-item matrix...
Computing approximate inverse of L...


INFO:sansa.core._ops._inverse_ops:Current maximum residual: 10.131071105163123, relative Frobenius norm squared: 0.6055248249349813
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 1...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 1.2457866332326708, relative Frobenius norm squared: 0.027941335053085103
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 2...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 1.1095483615359882, relative Frobenius norm squared: 0.018554855430191287
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 3...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 1.0999471088459514, relative Frobenius norm squared: 0.015755439699060094
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 4...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.9496707834875402, relative Frobenius norm squared: 0.013191835991712673
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 5...
INFO:sansa.core._ops._inverse_ops:Cur

Computing LDL^T decomposition of permuted item-item matrix...


INFO:sansa.core.factorizers:Computing approximate Cholesky decomposition (method = ICF)...
INFO:sansa.core.factorizers:Scaling columns and creating diagonal matrix D (LL^T -> L'DL'^T)...
INFO:sansa.core.inverters:Calculating initial guess using 1 step of Schultz method...
INFO:sansa.core.inverters:Calculating approximate inverse using Uniform Minimal Residual algorithm...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0031312673818320036, relative Frobenius norm squared: 7.613023740304925e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 1...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0031312673818320036, relative Frobenius norm squared: 7.613023740304925e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 2...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0031312673818320036, relative Frobenius norm squared: 7.613023740304925e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 3...


Computing approximate inverse of L...


INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0031312673818320036, relative Frobenius norm squared: 7.613023740304925e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 4...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0015427451580762863, relative Frobenius norm squared: 3.3410395872124354e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR scan 5...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0015363885322585702, relative Frobenius norm squared: 3.223801172680396e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR finetune step 1...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0015363885322585702, relative Frobenius norm squared: 3.223800035812019e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR finetune step 2...
INFO:sansa.core._ops._inverse_ops:Current maximum residual: 0.0015363885322585702, relative Frobenius norm squared: 3.223800035812019e-07
INFO:sansa.core._ops._inverse_ops:Performing UMR finetu

Learning...
Learning completed!

TEST:
...
                |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--------------- + ------ + ------- + ------------ + --------- + --------- + --------
SANSA (CHOLMOD) | 0.2278 |  0.3272 |       0.2257 |    0.2660 |   26.8414 |   0.4076
SANSA (ICF)     | 0.2192 |  0.2930 |       0.2053 |    0.2343 |   13.1919 |   0.4110
MF              | 0.0290 |  0.0214 |       0.0218 |    0.0165 |    0.1259 |   0.3081
PMF             | 0.0538 |  0.0744 |       0.0618 |    0.0493 |    2.1268 |   0.2803
BPR             | 0.1022 |  0.1435 |       0.1077 |    0.1214 |    1.0798 |   0.2892
BiVAECF         | 0.1409 |  0.1857 |       0.1413 |    0.1555 |    9.6529 |   0.2986
RecVae          | 0.1267 |  0.1621 |       0.1259 |    0.1476 |   41.5360 |   0.6625
EASEᴿ           | 0.1743 |  0.2327 |       0.1630 |    0.2078 |    0.1129 |   0.3610
VAECF           | 0.1523 |  0.2047 |       0.1549 |    0.1706 |    3.8085 |   0.3938
IBPR            | 0.09

In [3]:
ml_100k = cornac.datasets.movielens.load_feedback()


In [None]:
models = [sansa_icf, mf, pmf, bpr, bivaecf, recvae, ease, vaecf, ibpr, neumf, hpf]

cornac.Experiment(
    eval_method=rs_mod,
    models=models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()

INFO:sansa.core.factorizers:Computing incomplete Cholesky decomposition of X^TX + 10.0*I...
INFO:sansa.core.factorizers:Finding a fill-in reducing ordering (method = colamd)...
INFO:sansa.core.factorizers:Computing X^TX...
INFO:sansa.core.factorizers:
                X^TX info:
                    shape = (1656, 1656) 
                    nnz = 1722862 
                    density = 62.824614% 
                    size = 13.8 MB
                
                Attempting incomplete factorization of a relatively dense matrix (62.824614% dense). 
                This is unstable:
                 - the factorization might fail and automatically restart with additional regularization
                 - the resulting approximate factor might be of lesser quality
                You may want to try CHOLMODGramianFactorizer instead of ICFGramianFactorizer 
                (requires more memory but is likely faster and more accurate).
                
INFO:sansa.core.factorizers:Sorting indi

In [2]:
import torch
torch.__version__

'2.2.0'

In [1]:
import dgl
dgl.__version__


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/a-shyraliev/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 7

ModuleNotFoundError: No module named 'torch.utils._import_utils'

In [87]:
ml_20M = cornac.datasets.movielens.load_feedback(variant="20m")
rs = RatioSplit(data=ml_20M, test_size=0.2, rating_threshold=0.0, seed=123)

mf = MF(k=10, max_iter=200, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
pmf = PMF(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.001, seed=123)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)

models = [mf, pmf, bpr]

metrics = [Precision(k=10), Recall(k=10), NDCG(k=10), MAP()]

cornac.Experiment(
    eval_method=rs,
    models=models,
    metrics=metrics,
    user_based=True,
    save_dir=None,
).run()


TEST:
...
    |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------ + ------- + ------------ + --------- + --------- + --------
MF  | 0.0031 |  0.0003 |       0.0004 |    0.0002 |   31.1900 | 327.8577
PMF | 0.0108 |  0.0191 |       0.0197 |    0.0099 |  515.0184 | 330.4225
BPR | 0.1050 |  0.1940 |       0.1690 |    0.0777 |  577.5718 | 315.8166

