# **Final Project Muhammad Bayu Samudra Siddik**

## Iteration 2 : Top N Recommendation

### Import Library

In [None]:
! pip install surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from surprise import dataset
from surprise import Reader

from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import BaselineOnly

from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise.model_selection import LeaveOneOut
from surprise.model_selection import KFold
from surprise import accuracy

from collections import defaultdict

import pandas as pd
import numpy as np

from collections import Counter
from scipy import sparse

### Load Dataset

In [None]:
!gdown 1Hfyy570RjBMqXM_0KrtoP6jEYd4J6uZQ

Downloading...
From: https://drive.google.com/uc?id=1Hfyy570RjBMqXM_0KrtoP6jEYd4J6uZQ
To: /content/topN.csv
  0% 0.00/521k [00:00<?, ?B/s]100% 521k/521k [00:00<00:00, 121MB/s]


In [None]:
topN_df = pd.read_csv('/content/topN.csv')

topN_df.head()

Unnamed: 0,userID,itemID,rating
0,79450,27890,5
1,19656,41072,5
2,11408,37185,5
3,163662,72377,5
4,16961,2555,5


**Create sample data**

In [None]:
topN_df.shape

(37000, 3)

In [None]:
topN_df.to_csv('topN.csv', encoding='utf-8', index=False)

**Check Missing Value**

In [None]:
topN_df.isna().sum() / len(topN_df) * 100

userID    0.0
itemID    0.0
rating    0.0
dtype: float64

**Check Unique Value**

In [None]:
topN_df.nunique()

userID    31032
itemID    16926
rating        5
dtype: int64

### Get Start Top N Recommendation

**Convert Data to Reader**

In [None]:
from surprise import Dataset

min_rating = topN_df.rating.min()
max_rating = topN_df.rating.max()

reader = Reader(rating_scale=(min_rating, max_rating))
data = Dataset.load_from_df(topN_df[['userID', 'itemID', 'rating']], reader)

**Split Data for 20% Data Testing**

In [None]:
trainSet, testSet = train_test_split(data, test_size=.20, random_state=0)

#### Def Top-N

In [None]:
def GetTopN(predictions, n=10, minimumRating=4.0):
    topN = defaultdict(list)
    for userID, foodID, actualRating, estimatedRating, _ in predictions:
        if (estimatedRating >= minimumRating):
            topN[int(userID)].append((int(foodID), estimatedRating))

    for userID, ratings in topN.items():
        ratings.sort(key=lambda x: x[1], reverse=True)
        topN[int(userID)] = ratings[:n]

    return topN

#### Def Hit Ratio

In [None]:
def HitRate(topNPredicted, leftOutPredictions):
    hits = 0
    total = 0

 # For each left-out rating
    for leftOut in leftOutPredictions:
        userID = leftOut[0]
        leftOutFoodID = leftOut[1]
        # Is it in the predicted top 10 for this user?
        hit = False
        for foodID, predictedRating in topNPredicted[int(userID)]:
            if (int(leftOutFoodID) == int(foodID)):
                hit = True
                break
        if (hit) :
            hits += 1

        total += 1

    # Compute overall precision
    return hits/total

#### BaselineOnly

In [None]:
algo = BaselineOnly()
algo.fit(trainSet)
predictions = algo.test(testSet)

def MAE(predictions):
        return accuracy.mae(predictions, verbose=False)
def RMSE(predictions):
        return accuracy.rmse(predictions, verbose=False)
def MSE(predictions):
        return accuracy.mse(predictions, verbose=False)
    
print("MAE: ", MAE(predictions))
print("RMSE: ", RMSE(predictions))
print("MSE: ", MSE(predictions))

Estimating biases using als...
MAE:  1.022801125686244
RMSE:  1.2889753298162012
MSE:  1.6614574008747844


In [None]:
# LOOCV = LeaveOneOut(n_splits=1, random_state=1)

# for trainSet, testSet in LOOCV.split(data):
#     # Train model without left-out ratings
#     algo.fit(trainSet)
#     # Predicts ratings for left-out ratings only
#     leftOutPredictions = algo.test(testSet)
#     # Build predictions for all ratings not in the training set
#     bigTestSet = trainSet.build_anti_testset()
#     allPredictions = algo.test(bigTestSet)
#     # Compute top 10 recs for each user
#     topNPredictedBase = GetTopN(allPredictions, n=10)
#     print("\nHit Rate: ", HitRate(topNPredictedBase, leftOutPredictions))

In [None]:
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    # Train model without left-out ratings
    algo.fit(trainSet)
    # Predicts ratings for left-out ratings only
    leftOutPredictions = algo.test(testSet)
    # Build predictions for all ratings not in the training set
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)
    # Compute top 10 recs for each user
    topNPredictedBase10 = GetTopN(allPredictions, n=10)
    print("\nHit Rate N=10: ", HitRate(topNPredictedBase10, leftOutPredictions))
    topNPredictedBase15 = GetTopN(allPredictions, n=15)
    print("\nHit Rate N=15: ", HitRate(topNPredictedBase15, leftOutPredictions))
    topNPredictedBase20 = GetTopN(allPredictions, n=20)
    print("\nHit Rate N=20: ", HitRate(topNPredictedBase20, leftOutPredictions))
    topNPredictedBase25 = GetTopN(allPredictions, n=25)
    print("\nHit Rate N=25: ", HitRate(topNPredictedBase25, leftOutPredictions))

Estimating biases using als...

Hit Rate N=10:  0.0009989688063934004

Hit Rate N=15:  0.001611240010311936

Hit Rate N=20:  0.001965712812580562

Hit Rate N=25:  0.002320185614849188


https://github.com/jvntra/Movie_Recommendation_System_Framework

https://medium.com/@rishabhbhatia315/recommendation-system-evaluation-metrics-3f6739288870