In [1]:
import numpy as np, pandas as pd
from scipy.sparse import coo_matrix

df_all = pd.read_csv("lfm_all.csv")
df_train = pd.read_csv("lfm_train.csv")
df_test = pd.read_csv("lfm_test.csv")

def df_to_csr(df):
    X = coo_matrix((df.counts, (df.user_id, df.item_id)))
    X.row = X.row.astype(np.int32)
    X.col = X.col.astype(np.int32)
    X.data = X.data.astype(np.float64)
    return X.tocsr()

X_all = df_to_csr(df_all)
X_train = df_to_csr(df_train)
X_test = df_to_csr(df_test)

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import logging
logging.disable(logging.WARNING)
from cornac.data.dataset import Dataset
from cornac.models.wmf.recom_wmf import WMF

def coo_to_cornac(X):
    return Dataset(
        X.shape[0], X.shape[1],
        {i:i for i in range(X.shape[0])},
        {i:i for i in range(X.shape[1])},
        (X.row, X.col, X.data),
        seed=1)

dt_full = coo_to_cornac(X_all.tocoo())
dt_train = coo_to_cornac(X_train.tocoo())

In [3]:
%%time
model_sgd = WMF(k=50, max_iter=15, lambda_u=5, lambda_v=5, seed=123)
model_sgd.fit(dt_full)

  0%|          | 0/15 [00:00<?, ?it/s]

Learning completed!
CPU times: user 20h 30min 37s, sys: 20h 29min 50s, total: 1d 17h 28s
Wall time: 3h 42min 18s


<cornac.models.wmf.recom_wmf.WMF at 0x7fd4a41fde20>

In [4]:
from recometrics import calc_reco_metrics

model_sgd = WMF(k=50, max_iter=15, lambda_u=5, lambda_v=5, seed=123)
model_sgd.fit(dt_train)
metrics_sgd = calc_reco_metrics(
    X_train.tocsr()[:X_test.shape[0],:X_test.shape[1]],
    X_test,
    model_sgd.U[:X_test.shape[0],:],
    model_sgd.V[:X_test.shape[1],:],
    k=10, all_metrics=True
)
metrics_sgd.mean().to_frame().T

  0%|          | 0/15 [00:00<?, ?it/s]

Learning completed!


Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.00889,0.008937,0.006234,0.00213,0.003009,0.006946,0.0812,0.026816,0.922328,0.006288
