# Movie Recommender, give Top 3 choice

Follow this link: https://medium.com/playkids-tech-blog/recommender-systems-with-collaborative-filters-531e8315ba58

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from collections import Counter

pd.set_option('max_columns', None)

In [3]:
from surprise import Dataset
from surprise import KNNBasic
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

data = Dataset.load_builtin("ml-100k")
trainingSet = data.build_full_trainset()

In [5]:
knn = KNNBasic(sim_options={
                'name': 'cosine',
                'user_based': False })
knn.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fdd3da06490>

In [14]:
# View surprise dataset

pd.DataFrame(data.__dict__['raw_ratings'], columns=['user_id','item_id','rating','timestamp'])

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596
...,...,...,...,...
99995,880,476,3.0,880175444
99996,716,204,5.0,879795543
99997,276,1090,1.0,874795795
99998,13,225,2.0,882399156


In [15]:
testSet = trainingSet.build_anti_testset()

In [20]:
pd.DataFrame(testSet, columns=['user_id','item_id','rating'])

Unnamed: 0,user_id,item_id,rating
0,196,302,3.52986
1,196,377,3.52986
2,196,51,3.52986
3,196,346,3.52986
4,196,474,3.52986
...,...,...,...
1486121,941,1674,3.52986
1486122,941,1640,3.52986
1486123,941,1637,3.52986
1486124,941,1630,3.52986


In [23]:
pd.DataFrame(testSet, columns=['user_id','item_id','rating'])

Unnamed: 0,user_id,item_id,rating
0,196,302,3.52986
1,196,377,3.52986
2,196,51,3.52986
3,196,346,3.52986
4,196,474,3.52986
...,...,...,...
1486121,941,1674,3.52986
1486122,941,1640,3.52986
1486123,941,1637,3.52986
1486124,941,1630,3.52986


In [24]:
predictions = knn.test(testSet)

In [25]:
from collections import defaultdict
 
def get_top3_recommendations(predictions, topN = 3):
     
    top_recs = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_recs[uid].append((iid, est))
     
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
     
    return top_recs

In [26]:
defaultdict(list)

defaultdict(list, {})

In [27]:
import os, io
 
def read_item_names():
    """Read the u.item file from MovieLens 100-k dataset and returns a
    mapping to convert raw ids into movie names.
    """
 
    file_name = (os.path.expanduser('~') +
                 '/.surprise_data/ml-100k/ml-100k/u.item')
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]
 
    return rid_to_name

In [29]:
get_top3_recommendations(predictions, topN = 3)

defaultdict(list,
            {'196': [('1309', 4.5), ('1310', 4.5), ('1676', 4.25)],
             '186': [('1674', 4.0),
              ('328', 3.973309863694452),
              ('1', 3.9706070820795514)],
             '22': [('1653', 5),
              ('1618', 4.473684210526316),
              ('12', 4.424971858861809)],
             '244': [('1236', 4.4), ('1235', 4.4), ('127', 4.250789530255668)],
             '166': [('1674', 5),
              ('1306', 4.496629145544974),
              ('1307', 4.406637188447526)],
             '298': [('480', 4.500680623081089),
              ('404', 4.4992335315836405),
              ('191', 4.475757029866706)],
             '115': [('135', 4.575476688073763),
              ('179', 4.574590814293505),
              ('61', 4.473354139275399)],
             '253': [('1653', 5),
              ('866', 4.5223884425635905),
              ('172', 4.500711018576675)],
             '305': [('124', 4.173099630818667),
              ('498', 4.14951989801383