In [1]:
import numpy as np, pandas as pd
from scipy.sparse import coo_matrix
from cmfrec import CMF_implicit

df_all = pd.read_csv("lfm_all.csv")
df_train = pd.read_csv("lfm_train.csv")
df_test = pd.read_csv("lfm_test.csv")

def df_to_coo(df):
    X = coo_matrix((df.counts, (df.user_id, df.item_id)))
    X.row = X.row.astype(np.int32)
    X.col = X.col.astype(np.int32)
    X.data = X.data.astype(np.float64)
    return X

X_all = df_to_coo(df_all)
X_train = df_to_coo(df_train)
X_test = df_to_coo(df_test)

X_train

<358858x160112 sparse matrix of type '<class 'numpy.float64'>'
	with 17164027 stored elements in COOrdinate format>

In [2]:
%%time
model_cg = CMF_implicit(
    k=50, lambda_=5, niter=15, use_float=False,
    use_cg=True, finalize_chol=False,
    precompute_for_predictions=False,
    verbose=False
)
model_cg.fit(X_all)

CPU times: user 7min 44s, sys: 923 ms, total: 7min 44s
Wall time: 31.9 s


Collective matrix factorization model
(implicit-feedback variant)


In [3]:
%%time
model_chol = CMF_implicit(
    k=50, lambda_=5, niter=15, use_float=False,
    use_cg=False,
    precompute_for_predictions=False,
    verbose=False
)
model_chol.fit(X_all)

CPU times: user 13min 16s, sys: 484 ms, total: 13min 16s
Wall time: 53.1 s


Collective matrix factorization model
(implicit-feedback variant)


In [4]:
from recometrics import calc_reco_metrics

model_cg = CMF_implicit(
    k=50, lambda_=5, niter=15, use_float=False,
    use_cg=True, finalize_chol=False,
    precompute_for_predictions=False,
    verbose=False
).fit(X_train)
metrics_cg = calc_reco_metrics(
    X_train.tocsr()[:X_test.shape[0],:X_test.shape[1]],
    X_test.tocsr(),
    model_cg.A_[:X_test.shape[0],:],
    model_cg.B_[:X_test.shape[1],:],
    k=10, all_metrics=True
)

model_chol = CMF_implicit(
    k=50, lambda_=5, niter=15, use_float=False,
    use_cg=False,
    precompute_for_predictions=False,
    verbose=False
).fit(X_train)
metrics_chol = calc_reco_metrics(
    X_train.tocsr()[:X_test.shape[0],:X_test.shape[1]],
    X_test.tocsr(),
    model_chol.A_[:X_test.shape[0],:],
    model_chol.B_[:X_test.shape[1],:],
    k=10, all_metrics=True
)

In [5]:
metrics_cg.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.16969,0.170697,0.117693,0.061095,0.088718,0.168071,0.781,0.409968,0.98019,0.12135


In [6]:
metrics_chol.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.1701,0.171118,0.117924,0.06131,0.089032,0.167783,0.7844,0.413211,0.980527,0.121761
