In [218]:
import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from sklearn.model_selection import train_test_split
import random
from sklearn.preprocessing import MinMaxScaler

import implicit

# Load the data like we did before
raw_data = pd.read_csv('data_60.csv')
raw_data.columns = ['user', 'artist', 'plays']

# Drop NaN columns
data = raw_data.dropna()
data = data.copy()

# Create a numeric user_id and artist_id column
data['user'] = data['user'].astype("category")
data['artist'] = data['artist'].astype("category")
data['user_id'] = data['user'].cat.codes
data['artist_id'] = data['artist'].cat.codes

# Split the data into training and test sets
train_data, test_data = train_test_split(data, test_size=0.2)

# The implicit library expects data as an item-user matrix, so we
# create two matrices: one for fitting the model (item-user) 
# and one for recommendations (user-item)
sparse_item_user_train = sparse.csr_matrix((train_data['plays'].astype(float), (train_data['artist_id'], train_data['user_id'])))
sparse_item_user_test = sparse.csr_matrix((test_data['plays'].astype(float), (test_data['artist_id'], test_data['user_id'])))

sparse_user_item_train = sparse.csr_matrix((train_data['plays'].astype(float), (train_data['user_id'], train_data['artist_id'])))
sparse_user_item_test = sparse.csr_matrix((test_data['plays'].astype(float), (test_data['user_id'], test_data['artist_id'])))


# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=100)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 200
data_conf = (sparse_item_user_train * alpha_val).astype('double')

#Fit the model
model.fit(data_conf)

  0%|          | 0/100 [00:00<?, ?it/s]

In [228]:
# Set the value of k for precision@k and recall@k
k = 5

# Call evaluate_model to evaluate your model
evaluation_results = evaluate_model(model, sparse_user_item_test, sparse_user_item_train, k)

# Print the evaluation results
print("Evaluation results:")
for metric, value in evaluation_results.items():
    print("{}: {}".format(metric, value))

0
  (0, 42)	81.5
  (0, 62)	92.18
  (0, 129)	-37.02
  (0, 261)	-63.88
  (0, 290)	80.99
  (0, 483)	81.29
  (0, 537)	80.78
  (0, 648)	-72.1
  (0, 719)	83.12
  (0, 792)	-55.69
  (0, 809)	-66.18
  (0, 857)	75.8
  (0, 865)	-6.55
  (0, 916)	91.24
  (0, 947)	10.18
  (0, 955)	83.93
  (0, 972)	-26.8
  (0, 1008)	89.17
  (0, 1013)	88.74
  (0, 1018)	75.8
  (0, 1076)	76.05
  (0, 1203)	88.1
  (0, 1219)	78.11
  (0, 1237)	75.8
  (0, 1330)	86.97
  :	:
  (0, 11452)	-86.45
  (0, 11463)	-68.65
  (0, 11465)	58.76
  (0, 11557)	86.85
  (0, 11569)	88.26
  (0, 11741)	76.17
  (0, 11765)	36.54
  (0, 11860)	9.25
  (0, 11889)	91.93
  (0, 12013)	-62.14
  (0, 12030)	31.1
  (0, 12048)	83.77
  (0, 12096)	63.97
  (0, 12178)	74.35
  (0, 12429)	67.98
  (0, 12461)	46.15
  (0, 12499)	92.82
  (0, 12511)	68.63
  (0, 12598)	84.86
  (0, 12656)	-68.77
  (0, 12673)	77.82
  (0, 12680)	92.09
  (0, 12791)	53.39
  (0, 12861)	84.59
  (0, 12872)	86.79


IndexError: index 129 is out of bounds for axis 1 with size 105

In [227]:
def precision_at_k(recommended_items, relevant_items, k):
    # Get the top-k recommended items
    top_k_items = recommended_items[:k]
    # Calculate the number of recommended items that are relevant
    num_relevant_items = len(set(top_k_items) & set(relevant_items))
    # Calculate precision@k
    precision = num_relevant_items / k
    return precision

def recall_at_k(recommended_items, relevant_items, k):
    # Get the top-k recommended items
    top_k_items = recommended_items[:k]
    # Calculate the number of relevant items that are recommended
    num_relevant_items = len(set(top_k_items) & set(relevant_items))
    # Calculate recall@k
    recall = num_relevant_items / len(relevant_items)
    return recall

def average_precision(recommended_items, relevant_items):
    precision_sum = 0.0
    num_relevant_items = 0
    for i, item in enumerate(recommended_items):
        if item in relevant_items:
            num_relevant_items += 1
            precision = precision_at_k(recommended_items, relevant_items, i+1)
            precision_sum += precision
    if num_relevant_items == 0:
        return 0.0
    average_precision = precision_sum / num_relevant_items
    return average_precision

def evaluate_model(model, test_data, train_data, k):
    # Create a dictionary to store evaluation results
    evaluation_results = {}

    # Iterate over each user in the test data
    for user in range(train_data.shape[0]):
        print(user)
        # Get the items in the test set for the current user
        relevant_items = test_data[user].indices

        # Get the items that were already interacted with in the training set
        interacted_items = train_data[user].indices

        # Remove the already interacted items from relevant items
        relevant_items = list(set(relevant_items) - set(interacted_items))

        # Make recommendations for the current user
        recommended_items = model.recommend(user, train_data[0], N=k)

        # Get the recommended item IDs
        recommended_items = [item[0] for item in recommended_items]

        # Calculate precision@k
        precision = precision_at_k(recommended_items, relevant_items, k)

        # Calculate recall@k
        recall = recall_at_k(recommended_items, relevant_items, k)

        # Calculate average precision
        avg_precision = average_precision(recommended_items, relevant_items)

        # Store the evaluation results for the current user
        evaluation_results[user] = {
            'precision': precision,
            'recall': recall,
            'avg_precision': avg_precision
        }

    # Calculate the average evaluation metrics across all users
    precision_values = [result['precision'] for result in evaluation_results.values()]
    recall_values = [result['recall'] for result in evaluation_results.values()]
    avg_precision_values = [result['avg_precision'] for result in evaluation_results.values()]

    avg_precision = np.mean(avg_precision_values)
    mean_precision = np.mean(precision_values)
    mean_recall = np.mean(recall_values)

    return {
        'precision@{}'.format(k): mean_precision,
        'recall@{}'.format(k): mean_recall,
        'MAP@{}'.format(k): avg_precision
    }

In [217]:
user_id = 1
recommended = model.recommend(userid=user_id, user_items=sparse_item_user_test[user_id], N=10)

actual_ratings = []
predicted_ratings = []

for artist_id in recommended[0]:
    # Find the corresponding score in the data DataFrame
    data_score = data.loc[data['artist_id'] == artist_id, 'plays'].iloc[0]

    # Find the corresponding score in the recommended list
    recommended_index = np.where(recommended[0] == artist_id)[0]
    recommended_score = recommended[1][recommended_index][0] * 200 - 100

    actual_ratings.append(data_score)
    predicted_ratings.append(recommended_score)

    
#Compute evaluation metrics

mae = mean_absolute_error(actual_ratings, predicted_ratings)
rmse = mean_squared_error(actual_ratings, predicted_ratings, squared=False)

print("MAE:", mae)
print("RMSE:", rmse)

MAE: 121.62299999931363
RMSE: 136.5501838513273


In [171]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
actual_ratings = []
predicted_ratings = []

for artist_id in recommended[0]:
    # Find the corresponding score in the data DataFrame
    data_score = data.loc[data['artist_id'] == artist_id, 'plays'].iloc[0]

    # Find the corresponding score in the recommended list
    recommended_index = np.where(recommended[0] == artist_id)[0]
    recommended_score = recommended[1][recommended_index][0] * 200 - 100

    actual_ratings.append(data_score)
    predicted_ratings.append(recommended_score)

    
#Compute evaluation metrics

mae = mean_absolute_error(actual_ratings, predicted_ratings)
rmse = mean_squared_error(actual_ratings, predicted_ratings, squared=False)

print("MAE:", mae)
print("RMSE:", rmse)

MAE: 135.95598010760546
RMSE: 141.13467700356972


In [None]:
reversed_item_dict = { v:k for k,v in item_dict.items()}
for item_index, predicted_score in zip(*recommended_items):
    print(f"https://tournesol.app/entities/yt:{reversed_item_dict[item_index]}", f"Predicted score: {predicted_score}")