In [1]:
import numpy as np, pandas as pd
from scipy.sparse import coo_matrix

df_all = pd.read_csv("lfm_all.csv")
df_train = pd.read_csv("lfm_train.csv")
df_test = pd.read_csv("lfm_test.csv")

def df_to_csr(df, ncols=None):
    shape = None
    if ncols is not None:
        shape = (df.user_id.max() + 1, ncols)
    X = coo_matrix((df.counts, (df.user_id, df.item_id)), shape=shape)
    X.row = X.row.astype(np.int32)
    X.col = X.col.astype(np.int32)
    X.data = X.data.astype(np.float64)
    return X.tocsr()

X_train = df_to_csr(df_train)
X_test = df_to_csr(df_test, X_train.shape[1])

X_train

<358858x160112 sparse matrix of type '<class 'numpy.float64'>'
	with 17164027 stored elements in Compressed Sparse Row format>

In [2]:
df_all.to_csv("lfm_all_qmf.txt", header=None, index=False, sep=" ")
df_train.to_csv("lfm_train_qmf.txt", header=None, index=False, sep=" ")

In [3]:
%%time
%%bash
./wals \
    --train_dataset=lfm_all_qmf.txt \
    --regularization_lambda=5 \
    --confidence_weight=1 \
    --nepochs=15 \
    --nfactors=50 \
    --nthreads=`nproc`

I0130 16:20:56.736975 51412 wals.cpp:85] loading training data
I0130 16:21:05.400418 51412 wals.cpp:95] training
I0130 16:21:11.585600 51412 WALSEngine.cpp:80] epoch 1: train loss = 0.0311849
I0130 16:21:17.760138 51412 WALSEngine.cpp:80] epoch 2: train loss = 0.00979692
I0130 16:21:24.229785 51412 WALSEngine.cpp:80] epoch 3: train loss = 0.00852864
I0130 16:21:30.383226 51412 WALSEngine.cpp:80] epoch 4: train loss = 0.00815274
I0130 16:21:36.551026 51412 WALSEngine.cpp:80] epoch 5: train loss = 0.00797889
I0130 16:21:42.725528 51412 WALSEngine.cpp:80] epoch 6: train loss = 0.00787992
I0130 16:21:48.944829 51412 WALSEngine.cpp:80] epoch 7: train loss = 0.0078166
I0130 16:21:55.209519 51412 WALSEngine.cpp:80] epoch 8: train loss = 0.00777291
I0130 16:22:01.493095 51412 WALSEngine.cpp:80] epoch 9: train loss = 0.00774109
I0130 16:22:07.788581 51412 WALSEngine.cpp:80] epoch 10: train loss = 0.00771694
I0130 16:22:14.612273 51412 WALSEngine.cpp:80] epoch 11: train loss = 0.00769798
I0130 1

CPU times: user 25.9 ms, sys: 4.86 ms, total: 30.8 ms
Wall time: 1min 42s


In [4]:
%%bash
./wals \
    --train_dataset=lfm_train_qmf.txt \
    --user_factors=qmf_out_A.txt \
    --item_factors=qmf_out_B.txt \
    --regularization_lambda=5 \
    --confidence_weight=1 \
    --nepochs=15 \
    --nfactors=50 \
    --nthreads=`nproc`

I0130 16:22:39.548285 51653 wals.cpp:85] loading training data
I0130 16:22:47.779603 51653 wals.cpp:95] training
I0130 16:22:55.056471 51653 WALSEngine.cpp:80] epoch 1: train loss = 0.0311324
I0130 16:23:02.110939 51653 WALSEngine.cpp:80] epoch 2: train loss = 0.00975674
I0130 16:23:09.361900 51653 WALSEngine.cpp:80] epoch 3: train loss = 0.0084877
I0130 16:23:15.750965 51653 WALSEngine.cpp:80] epoch 4: train loss = 0.00811952
I0130 16:23:21.997252 51653 WALSEngine.cpp:80] epoch 5: train loss = 0.00795056
I0130 16:23:28.306654 51653 WALSEngine.cpp:80] epoch 6: train loss = 0.00785446
I0130 16:23:34.683807 51653 WALSEngine.cpp:80] epoch 7: train loss = 0.00779274
I0130 16:23:41.628362 51653 WALSEngine.cpp:80] epoch 8: train loss = 0.00774977
I0130 16:23:48.921173 51653 WALSEngine.cpp:80] epoch 9: train loss = 0.00771808
I0130 16:23:56.186064 51653 WALSEngine.cpp:80] epoch 10: train loss = 0.00769369
I0130 16:24:02.648850 51653 WALSEngine.cpp:80] epoch 11: train loss = 0.00767434
I0130 1

In [5]:
from recometrics import calc_reco_metrics

A = pd.read_csv("qmf_out_A.txt", sep="\s", engine="python", header=None)
B = pd.read_csv("qmf_out_B.txt", sep="\s", engine="python", header=None)

idx_A = A[0]
idx_A_test = idx_A[idx_A < X_test.shape[0]]
A = np.ascontiguousarray(A[A.columns[1:]].to_numpy())
idx_B = B[0]
B = np.ascontiguousarray(B[B.columns[1:]].to_numpy())

metrics_chol = calc_reco_metrics(
    X_train[idx_A_test, :][:, idx_B],
    X_test[idx_A_test, :][:, idx_B],
    A[idx_A < X_test.shape[0], :],
    B,
    k=10, all_metrics=True
)

In [6]:
metrics_chol.mean().to_frame().T

Unnamed: 0,P@10,TP@10,R@10,AP@10,TAP@10,NDCG@10,Hit@10,RR@10,ROC_AUC,PR_AUC
0,0.17019,0.171254,0.118182,0.06189,0.089924,0.168932,0.7873,0.419423,0.981775,0.122017
