# **SVD Top N Recommendation**

## Import Suprise

In [None]:
! pip install surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 KB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp39-cp39-linux_x86_64.whl size=3193630 sha256=17131516cfbfd012fc1a14d1c1f795d5d1ac2970c6b2ce7974ddf09b22531b81
  Stored in directory: /root/.cache/pip/wheels/c6/3a/46/9b17b3512bdf283c6cb84f59929cdd5199d4e754d596d22784
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


## Import Library

In [None]:
from collections import defaultdict

from surprise import Dataset, SVD
from surprise import dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import LeaveOneOut
from surprise.model_selection import train_test_split

import pandas as pd
import numpy as np

## Load Dataset

In [None]:
!gdown 1Hfyy570RjBMqXM_0KrtoP6jEYd4J6uZQ

Downloading...
From: https://drive.google.com/uc?id=1Hfyy570RjBMqXM_0KrtoP6jEYd4J6uZQ
To: /content/topN.csv
  0% 0.00/521k [00:00<?, ?B/s]100% 521k/521k [00:00<00:00, 34.2MB/s]


In [None]:
topN_df = pd.read_csv('/content/topN.csv')

topN_df.head()

Unnamed: 0,userID,itemID,rating
0,79450,27890,5
1,19656,41072,5
2,11408,37185,5
3,163662,72377,5
4,16961,2555,5


In [None]:
topN_df.shape

(37000, 3)

In [None]:
topN_df.isna().sum() / len(topN_df) * 100

userID    0.0
itemID    0.0
rating    0.0
dtype: float64

In [None]:
topN_df.nunique()

userID    31032
itemID    16926
rating        5
dtype: int64

## Get Start Top N Recommendation

In [None]:
min_rating = topN_df.rating.min()
max_rating = topN_df.rating.max()

In [None]:
reader = Reader(line_format='user item rating', rating_scale=(min_rating, max_rating))

class MyDataset(dataset.DatasetAutoFolds):

    def __init__(self, df, reader):

        self.raw_ratings = [(uid, iid, r, None) for (uid, iid, r) in
                            zip(topN_df['userID'], topN_df['itemID'], topN_df['rating'])]
        self.reader=reader

data = MyDataset(topN_df, reader)

In [None]:
trainSet, testSet = train_test_split(data, test_size=.20, random_state=0)

In [None]:
def GetTopN(predictions, n=10):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

In [None]:
def HitRate(topNPredicted, leftOutPredictions):
    hits = 0
    total = 0

 # For each left-out rating
    for leftOut in leftOutPredictions:
        userID = leftOut[0]
        leftOutFoodID = leftOut[1]
        # Is it in the predicted top 10 for this user?
        hit = False
        for foodID, predictedRating in topNPredicted[int(userID)]:
            if (int(leftOutFoodID) == int(foodID)):
                hit = True
                break
        if (hit) :
            hits += 1

        total += 1

    # Compute overall precision
    return hits/total

In [None]:
algo = SVD(n_factors=20, n_epochs=20)
algo.fit(trainSet)
predictions = algo.test(testSet)

def MAE(predictions):
        return accuracy.mae(predictions, verbose=False)
def RMSE(predictions):
        return accuracy.rmse(predictions, verbose=False)
def MSE(predictions):
        return accuracy.mse(predictions, verbose=False)
    
print("MAE: ", MAE(predictions))
print("RMSE: ", RMSE(predictions))
print("MSE: ", MSE(predictions))

MAE:  1.0162499402334493
RMSE:  1.2841270774689308
MSE:  1.6489823510888975


In [None]:
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    # Train model without left-out ratings
    algo.fit(trainSet)
    # Predicts ratings for left-out ratings only
    leftOutPredictions = algo.test(testSet)
    # Build predictions for all ratings not in the training set
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)
    # Compute top 10 recs for each user
    topNPredictedSVD10 = GetTopN(allPredictions, n=10)
    print("\nHit Rate N=10: ", HitRate(topNPredictedSVD10, leftOutPredictions))
    topNPredictedSVD15 = GetTopN(allPredictions, n=15)
    print("\nHit Rate N=15: ", HitRate(topNPredictedSVD15, leftOutPredictions))
    topNPredictedSVD20 = GetTopN(allPredictions, n=20)
    print("\nHit Rate N=20: ", HitRate(topNPredictedSVD20, leftOutPredictions))
    topNPredictedSVD25 = GetTopN(allPredictions, n=25)
    print("\nHit Rate N=25: ", HitRate(topNPredictedSVD25, leftOutPredictions))


Hit Rate N=10:  0.001160092807424594

Hit Rate N=15:  0.001482340809486981

Hit Rate N=20:  0.001997937612786801

Hit Rate N=25:  0.0022879608146429494


In [None]:
round(0.0022879608146429494, 4)

0.0023