In [1]:
import os
os.environ["OPENBLAS_NUM_THREADS"] = "1"
import numpy as np, pandas as pd
from scipy.sparse import coo_matrix
from lenskit.algorithms.als import ImplicitMF

df_all = pd.read_csv("lfm_all.csv")
df_train = pd.read_csv("lfm_train.csv")
df_test = pd.read_csv("lfm_test.csv")

def df_to_csr(df, ncols=None):
    shape = None
    if ncols is not None:
        shape = (df.user_id.max() + 1, ncols)
    X = coo_matrix((df.counts, (df.user_id, df.item_id)), shape=shape)
    X.row = X.row.astype(np.int32)
    X.col = X.col.astype(np.int32)
    X.data = X.data.astype(np.float64)
    return X.tocsr()

X_train = df_to_csr(df_train)
X_test = df_to_csr(df_test, X_train.shape[1])

remap = {"user_id":"user", "item_id":"item", "counts":"rating"}
df_all = df_all.rename(columns=remap)
df_train = df_train.rename(columns=remap)
df_test = df_test.rename(columns=remap)

In [2]:
%%time
### despite what it says below, it refused to work with TBB as threading layer,
### whether installed through conda or through pip, so it was left with omp
model_cg = ImplicitMF(
    features=50, iterations=15, reg=5, weight=1,
    use_ratings=True, method="cg", rng_spec=123,
    progress=None, save_user_features=True
)
model_cg.fit(df_all)

Numba is using threading layer omp - consider TBB
BLAS using multiple threads - can cause oversubscription
found 2 potential runtime problems - see https://boi.st/lkpy-perf


CPU times: user 15min 3s, sys: 3.46 s, total: 15min 6s
Wall time: 1min 8s


<lenskit.algorithms.als.ImplicitMF at 0x7fa868718100>

In [3]:
%%time
model_chol = ImplicitMF(
    features=50, iterations=15, reg=5, weight=1,
    use_ratings=True, method="lu", rng_spec=123,
    progress=None, save_user_features=True
)
model_chol.fit(df_all)

CPU times: user 19min 49s, sys: 4.32 s, total: 19min 53s
Wall time: 1min 24s


<lenskit.algorithms.als.ImplicitMF at 0x7fa7a64774c0>

In [4]:
from recometrics import calc_reco_metrics

model_cg = ImplicitMF(
    features=50, iterations=15, reg=5, weight=1,
    use_ratings=True, method="cg", rng_spec=123,
    progress=None, save_user_features=True
)
model_cg.fit(df_train)
metrics_cg = calc_reco_metrics(
    X_train[:X_test.shape[0], model_cg.item_index_],
    X_test[:, model_cg.item_index_],
    model_cg.user_features_[model_cg.user_index_ < X_test.shape[0], :],
    model_cg.item_features_,
    k=10, all_metrics=True
)

model_chol = ImplicitMF(
    features=50, iterations=15, reg=5, weight=1,
    use_ratings=True, method="lu", rng_spec=123,
    progress=None, save_user_features=True
)
model_chol.fit(df_train)
metrics_chol = calc_reco_metrics(
    X_train[:X_test.shape[0], model_chol.item_index_],
    X_test[:, model_chol.item_index_],
    model_chol.user_features_[model_chol.user_index_ < X_test.shape[0], :],
    model_chol.item_features_,
    k=10, all_metrics=True
)

In [5]:
metrics_cg.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.17069,0.171744,0.118615,0.061851,0.089617,0.168607,0.7845,0.413202,0.981176,0.121846


In [6]:
metrics_chol.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.16941,0.170519,0.117761,0.06156,0.089272,0.167357,0.7817,0.41194,0.98148,0.122121
