In [1]:
import os
os.environ["OPENBLAS_NUM_THREADS"] = "1"
import numpy as np, pandas as pd
from scipy.sparse import coo_matrix
from implicit.als import AlternatingLeastSquares

df_all = pd.read_csv("lfm_all.csv")
df_train = pd.read_csv("lfm_train.csv")
df_test = pd.read_csv("lfm_test.csv")

def df_to_csr(df):
    X = coo_matrix((df.counts, (df.user_id, df.item_id)))
    X.row = X.row.astype(np.int32)
    X.col = X.col.astype(np.int32)
    X.data = X.data.astype(np.float64)
    return X.tocsr()

X_all = df_to_csr(df_all)
X_train = df_to_csr(df_train)
X_test = df_to_csr(df_test)

X_train

<358858x160112 sparse matrix of type '<class 'numpy.float64'>'
	with 17164027 stored elements in Compressed Sparse Row format>

In [2]:
%%time
model_cg = AlternatingLeastSquares(
    factors=50, regularization=5, dtype=np.float64,
    use_native=True, use_cg=True, iterations=15,
    calculate_training_loss=False, use_gpu=False,
    num_threads=0, random_state=123
)
model_cg.fit(X_all)

  0%|          | 0/15 [00:00<?, ?it/s]

CPU times: user 7min 1s, sys: 945 ms, total: 7min 2s
Wall time: 29 s


In [3]:
%%time
model_chol = AlternatingLeastSquares(
    factors=50, regularization=5, dtype=np.float64,
    use_native=True, use_cg=False, iterations=15,
    calculate_training_loss=False, use_gpu=False,
    num_threads=0, random_state=123
)
model_chol.fit(X_all)

  0%|          | 0/15 [00:00<?, ?it/s]

CPU times: user 25min 20s, sys: 736 ms, total: 25min 21s
Wall time: 1min 38s


In [4]:
from recometrics import calc_reco_metrics

model_cg = AlternatingLeastSquares(
    factors=50, regularization=5, dtype=np.float64,
    use_native=True, use_cg=True, iterations=15,
    calculate_training_loss=False, use_gpu=False,
    num_threads=0, random_state=123
)
model_cg.fit(X_train)
metrics_cg = calc_reco_metrics(
    X_train[:X_test.shape[0],:X_test.shape[1]],
    X_test,
    model_cg.user_factors[:X_test.shape[0],:],
    model_cg.item_factors[:X_test.shape[1],:],
    k=10, all_metrics=True
)

model_chol = AlternatingLeastSquares(
    factors=50, regularization=5, dtype=np.float64,
    use_native=True, use_cg=False, iterations=15,
    calculate_training_loss=False, use_gpu=False,
    num_threads=0, random_state=123
)
model_chol.fit(X_train)
metrics_chol = calc_reco_metrics(
    X_train[:X_test.shape[0],:X_test.shape[1]],
    X_test,
    model_chol.user_factors[:X_test.shape[0],:],
    model_chol.item_factors[:X_test.shape[1],:],
    k=10, all_metrics=True
)

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

In [5]:
metrics_cg.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.17007,0.170982,0.117821,0.061387,0.089215,0.167968,0.7851,0.411801,0.980403,0.120986


In [6]:
metrics_chol.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.17031,0.171256,0.117941,0.061371,0.089321,0.168132,0.7872,0.413636,0.980073,0.121167
