In [0]:
!pip install surprise



In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
from surprise import Dataset, Reader, KNNBasic
from collections import defaultdict
from operator import itemgetter
import os, csv, sys

In [0]:
class Films:
    film_Label = {}
    label_Film = {}
    link1 = '/content/drive/My Drive/ml-latest-small/ratings_after_wrangling.csv'
    link2 = '/content/drive/My Drive/ml-latest-small/movies.csv'

    def download(self):

        # Look for files relative to the directory we are running from
        os.chdir(os.path.dirname(sys.argv[0]))

        ratingsDataset = 0
        self.film_Label = {}
        self.label_Film = {}

        reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)

        ratingsDataset = Dataset.load_from_file(self.link1, reader=reader)

        with open(self.link2, newline='', encoding='ISO-8859-1') as csvfile:
            movieReader = csv.reader(csvfile)
            next(movieReader)  # Skip header line
            for row in movieReader:
                movieID = int(row[0])
                movieName = row[1]
                self.film_Label[movieID] = movieName
                self.label_Film[movieName] = movieID

        return ratingsDataset

    def filmLabel(self, movieID):
        if movieID in self.film_Label:
            return self.film_Label[movieID]
        else:
            return ""





In [0]:

USER = '70'
k = 10

ml = Films()
data = ml.download()
trainSet = data.build_full_trainset()

sim_options = {'name': 'cosine',
               'user_based': False
               }


In [0]:
model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

testUserInnerID = trainSet.to_inner_uid(USER)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


Get the top K items we rated

In [0]:
testUserRatings = trainSet.ur[testUserInnerID]
kNeighbors =[]
for rating in testUserRatings:
    if rating[1] > 4.0:
        kNeighbors.append(rating)

Get similar items to stuff we liked (weighted by rating)

In [0]:
candidates = defaultdict(float)
for itemID, rating in kNeighbors:
    similarityRow = simsMatrix[itemID]
    for innerID, score in enumerate(similarityRow):
        candidates[innerID] += score * (rating / 5.0)


Build a dictionary of stuff the user has already seen

In [0]:
watched = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    watched[itemID] = 1


Get top-rated items from similar users:

In [0]:
pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in watched:
        movieID = trainSet.to_raw_iid(itemID)
        print(ml.filmLabel(int(movieID)), ratingSum)
        pos += 1
        if (pos > 5):
            break

Only Angels Have Wings (1939) 33.94475384724753
Yankee Doodle Dandy (1942) 33.944753847247526
Little Women (1949) 33.944753847247526
Mr. Deeds Goes to Town (1936) 33.944753847247526
Asphalt Jungle, The (1950) 33.93620814279008
Destry Rides Again (1939) 33.923282171036185
