In [65]:
import pandas as pd
import numpy as np
import surprise
from surprise import Reader, Dataset, SVD, evaluate, SlopeOne, KNNBasic, KNNWithMeans
from surprise import accuracy
from sklearn.model_selection import train_test_split

# from surprise.model_selection import train_test_split
from collections import defaultdict

In [66]:
ratings = pd.read_csv('/Users/yashnisar/Downloads/ratings.csv', sep=',', usecols=['userId', 'movieId', 'rating','timestamp'])

## Splitting data such that for each user, randomly = 80% of his/her ratings as the training ratings, and use the remaining 20% ratings as testing ratings

In [67]:
train_set = pd.DataFrame(columns = ['userId', 'movieId', 'rating','timestamp'])
test_set = pd.DataFrame(columns = ['userId', 'movieId', 'rating','timestamp'])
for user in ratings['userId'].unique():
    temp = ratings.loc[ratings['userId'] == user]
    train, test = train_test_split(temp, test_size=0.2)
    train_set = train_set.append(train, ignore_index=True)
    test_set = test_set.append(test, ignore_index=True)

In [68]:
reader = Reader()
data = Dataset.load_from_df(train_set[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()

In [69]:
def recommend_items(predictions):
    results = {}
    for uid, iid, true_r, est, details in predictions:
        if uid in results:
            results[uid].append((iid, est))
        else:
            results[uid] = [(iid, est)]

    for uid in results:
        results[uid].sort(key=lambda x: x[1], reverse=True)
        

    return results

In [70]:
def precision_recall_at_k(predictions, k=10, threshold=3.5):
   
    user_dict = {}
    for uid, iid, true_r, est, details in predictions:
        if uid in user_dict:
            user_dict[uid].append((est, true_r))
        else:
            user_dict[uid] = [(est, true_r)]

    precisions = {}
    recalls = {}
    for uid in user_dict:
        user_dict[uid].sort(key=lambda x: x[0], reverse=True)
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_dict[uid])
        n_rec_k = sum((est >= threshold) for (est, _) in user_dict[uid][:k])
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_dict[uid][:k])
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

In [71]:
def norm_dcg_score(predictions, k=10, gains="exponential"):
    
    user_dict = {}
    for uid, iid, true_r, est, details in predictions:
        if uid in user_dict:
            user_dict[uid].append((est, true_r))
        else:
            user_dict[uid] = [(est, true_r)]

    dcg_score = {}
    norm_dcg_score = {}
    for uid in user_dict:
        est = []
        true_r = []
        user_dict[uid].sort(key=lambda x: x[0], reverse=True)
        user_dict[uid] = user_dict[uid][:k]
        
        for x, y in user_dict[uid]:
            est.append(x)
            true_r.append(y)

        est = np.array(est)
        true_r = np.array(true_r)
        
        order = np.argsort(est)[::-1]
        y_true = np.take(true_r, order[:k])
        
        if gains == "exponential":
            gain_val = 2 ** y_true - 1
        elif gains == "linear":
            gain_val = y_true
        else:
            raise ValueError("Invalid gains option.")
        
        discounts = np.log2(np.arange(len(y_true)) + 2)
        dcg_score[uid] = np.sum(gain_val / discounts)
        
        
        #-------code for norm dcg score---------------------------------
        order = np.argsort(true_r)[::-1]
        y_true = np.take(true_r, order[:k])
        
        if gains == "exponential":
            gain_val = 2 ** y_true - 1
        elif gains == "linear":
            gain_val = y_true
        else:
            raise ValueError("Invalid gains option.")
        
        discounts = np.log2(np.arange(len(y_true)) + 2)
        norm_dcg_score[uid] = dcg_score[uid]/np.sum(gain_val / discounts)
    
    return norm_dcg_score

In [72]:
algo_names = ['SVD', 'SlopeOne', 'KNN Basic','KNN with Means']

In [1]:
for index, algo in enumerate([SVD(), SlopeOne(), KNNBasic(), KNNWithMeans()]):
    print("\n Algorithm:", algo_names[index])
    algo.fit(trainset)
    predictions = []
    for _, row in test_set.iterrows():
        predictions.append(algo.predict(row.userId, row.movieId, row.rating))
    print("RMSE", accuracy.rmse(predictions, verbose=False))
    print("MAE", accuracy.mae(predictions, verbose=False))
    # print(predictions)
    print(norm_dcg_score(predictions, k=10, gains="exponential"))
    top_recommendations = recommend_items(predictions)
    precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=4)
    avg_prec = sum(prec for prec in precisions.values()) / len(precisions)
    avg_recall = sum(rec for rec in recalls.values()) / len(recalls)
    fscore = 2 * (avg_prec * avg_recall)/ (avg_prec + avg_recall)
    print("Fscore:", fscore)
    print("Average Precision:", avg_prec)
    print("Average Recall:", avg_recall)
    print("Top recommendations: ", top_recommendations)

NameError: name 'SVD' is not defined