In [1]:
from implicit.datasets.lastfm import get_lastfm

artists, users, artist_user_plays = get_lastfm()

In [2]:
from implicit.evaluation import train_test_split

user_plays = artist_user_plays.T.tocsr()

train_split, test_split = train_test_split(user_plays)

In [3]:
from implicit.als import AlternatingLeastSquares

model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
model.fit(train_split)

  0%|          | 0/15 [00:00<?, ?it/s]

In [4]:
#Calc Standard Discounted Cumulative Squared Error

import numpy as np
import pandas as pd


def sdcse(relevance_vector, predicted_vector):
    
    """ Calculate STANDARD DISCOUNTED CUMULATIVE SQUARED ERROR
    as described in the paper Evaluating Top-N Recommendations
    Using Ranked Error Approach: An Empirical Analysis


    Parameters
    ----------
    relevance_vector : relevance vector
        relevance vector for each user
    predicted_vector : predicted scores for N items
    
    Each relvec and each predvec are within [1, 5] and [0, 1]
    intervals for explicit and implicit feedback.

    Returns
    -------
    float
        the calculated SDCSE
    """
        
        
    # Sort the relevance and predicted vectors in descending order
    relvec = np.sort(relevance_vector)[::-1]
    predvec = np.sort(predicted_vector)[::-1]
    
    # Initialize variables for discounted cumulative squared error
    # at N, descending squared error, and worst DCSE at N
    DCSE_at_N = 0
    n = len(relvec)
    SE_des = []
    WDCSE_at_N = 0
    
    # Calculate the Discounted Cumulative Squared Error (DCSE) at N
    for i in range (1,n+1):
        
        squared_difference = (relvec[i - 1] - predvec[i - 1])**2
        DCSE = squared_difference / np.log2(i + 1)
        DCSE_at_N += DCSE
    
    # Calculate the Squared Error vector
    for j in range(1,n+1):
        element = (relvec[j - 1] - predvec[j - 1])**2
        SE_des.append(element)

    # Sort the Squared Error vector in descending order
    SE_des.sort(reverse=True)

    # Calculate the Worst Discounted Cumulative Squared Error (WDCSE) at N
    for k in range(1,n+1):
        element2 = SE_des[k - 1] / np.log2(k + 1)
        WDCSE_at_N += element2
    
    # Check if nominator and denominator are 0 (result in "nan")
    if DCSE_at_N == 0 and WDCSE_at_N == 0:
        return 0
    else:
            SDCSE_at_N = 1 - (DCSE_at_N / WDCSE_at_N)
            return SDCSE_at_N
        
        


In [5]:
# ranking_metrics_at_k converted from cython to python

import numpy as np

users = test_split.shape[0]
items = test_split.shape[1]
test_indptr = np.ndarray(test_split.indptr.shape, dtype=np.int32, buffer=test_split.indptr)
test_indices = test_split.indices
likes = set()
batch_size = 1000
start_idx = 0
sum_sdcse = 0

to_generate = np.arange(users, dtype="int32")
to_generate = to_generate[np.ediff1d(test_split.indptr) > 0]
batch = to_generate[start_idx: start_idx + batch_size]
K = 10
ids, _ = model.recommend(batch, train_split[batch], N=K)

actual_items = np.array([], dtype=np.int32)
predicted_items = np.array([], dtype=np.int32)


for batch_idx in range(len(batch)):
    u = batch[batch_idx]
    likes.clear()
    for x in range(test_indptr[u], test_indptr[u+1]):
        likes.add(test_indices[x])

    actual_items = np.concatenate((actual_items, np.array(list(likes), dtype=np.int32)))

    predicted_items = np.concatenate((predicted_items, ids[batch_idx]))

    relevant = actual_items[test_indptr[u]:test_indptr[u+1]]
    predicted = predicted_items[batch_idx*K:(batch_idx+1)*K]
    np.array(relevant)
    np.array(predicted)
    sum_sdcse += sdcse(relevant[:K], predicted[:K])
    
print(sum_sdcse / batch_size)

0.39066188102633453
