# 1. Install the packages

In [None]:
! pip install numpy
! pip install scikit-surprise

# 2. Import the libraries

In [2]:
import os
import sys
from surprise import Dataset
from surprise import Reader
from surprise import KNNBasic
import heapq
from collections import defaultdict
from operator import itemgetter
from surprise.model_selection import LeaveOneOut

# 3. Load the dataset

In [3]:
ratingsPath = '../data/ratings.csv'

# Look for files relative to the directory we are running from

os.chdir(os.path.dirname(sys.argv[0]))
data = 0
reader = Reader(line_format='user item rating timestamp',
                sep=',', skip_lines=1)
data = Dataset.load_from_file(ratingsPath, reader=reader)
print("Dataset Loaded")


Loading the movies...


# 4. Build a "leave one out" train/test split for evaluating the top-N recommender

In [4]:
LOOCV = LeaveOneOut(n_splits=1, random_state=1)
for train, test in LOOCV.split(data):
    LOOCVTrain = train
    LOOCVTest = test

# 5. Train on Leave-One-Out train set

In [5]:
trainSet = LOOCVTrain
sim_options = {'name': 'cosine',
               'user_based': True
               }

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

leftOutTestSet = LOOCVTest

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


# 6. Build up dictionary to lists of (int(movieID), predictedrating) pairs

In [6]:
topN = defaultdict(list)
k = 10
for uiid in range(trainSet.n_users):
    # Get top N similar users to this one
    similarityRow = simsMatrix[uiid]

    similarUsers = []
    for innerID, score in enumerate(similarityRow):
        if (innerID != uiid):
            similarUsers.append((innerID, score))

    kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])

    # Get the stuff they rated, and add up ratings for each item, weighted by user similarity
    candidates = defaultdict(float)
    for similarUser in kNeighbors:
        innerID = similarUser[0]
        userSimilarityScore = similarUser[1]
        theirRatings = trainSet.ur[innerID]
        for rating in theirRatings:
            candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore

    # Build a dictionary of stuff the user has already seen
    watched = {}
    for itemID, rating in trainSet.ur[uiid]:
        watched[itemID] = 1

    # Get top-rated items from similar users:
    pos = 0
    for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
        if not itemID in watched:
            movieID = trainSet.to_raw_iid(itemID)
            topN[int(trainSet.to_raw_uid(uiid))].append((int(movieID), 0.0))
            pos += 1
            if (pos > 40):
                break


# 7. Evaluation of the model using "hit Rate" metric




In [8]:
hits = 0
total = 0

# For each left-out rating
for leftOut in leftOutTestSet:
    userID = leftOut[0]
    leftOutMovieID = leftOut[1]
    # Is it in the predicted top 10 for this user?
    hit = False
    for movieID, predictedRating in topN[int(userID)]:
        if (int(leftOutMovieID) == int(movieID)):
            hit = True
            break
    if (hit):
        hits += 1

    total += 1

# Compute overall precision
HR = hits/total

print("Hit Rate is : ", HR)

Hit Rate is :  0.05514157973174367
