In [2]:
pip install surprise

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 312kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617552 sha256=83cfc92bf7406fa61cfba902ac22983ac71b45dbfaf8727bef0ac299e9ad5de5
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [3]:
from datetime import datetime
import os
import pandas as pd
import numpy as np
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate
from surprise import NormalPredictor
from surprise import KNNBasic
from surprise import SVD
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise.accuracy import rmse
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from collections import defaultdict

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
ratings = pd.read_csv('/content/drive/MyDrive/data/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [7]:
ratings_dict = {'itemID': list(ratings.movieId),
                'userID': list(ratings.userId),
                'rating': list(ratings.rating)}

df = pd.DataFrame(ratings_dict)
df.shape

reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

In [8]:
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

benchmark = []

for algorithm in [SVD(), NMF(), NormalPredictor(), KNNBasic()]:
    
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]],index=['Algorithm']))
    benchmark.append(tmp)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


In [9]:
surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')
surprise_results

Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SVD,0.880964,4.17818,0.344505
NMF,0.938353,5.522219,0.325261
KNNBasic,0.958021,0.117507,2.419565
NormalPredictor,1.425172,0.134469,0.301548


In [10]:
trainset, testset = train_test_split(data, test_size=0.25)
algo = SVD()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions)

RMSE: 0.8732


0.873210080996095

In [11]:
trainset = data.build_full_trainset() 
algo = SVD()
algo.fit(trainset)
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [12]:
def get_all_predictions(predictions):
    # First map the predictions to each user.
    top_n = defaultdict(list)    
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)

    return top_n

In [13]:
all_pred = get_all_predictions(predictions)

In [14]:
#To get top 5 reommendation
n = 5

for uid, user_ratings in all_pred.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    all_pred[uid] = user_ratings[:n]

In [15]:
tmp = pd.DataFrame.from_dict(all_pred)
tmp_transpose = tmp.transpose()

In [16]:
def get_predictions(user_id):
    results = tmp_transpose.loc[user_id]
    return results

In [17]:
#specifying the user
user_id= 47
results = get_predictions(user_id)
results


0     (1204, 3.876090415655406)
1     (922, 3.8585863254916637)
2     (2542, 3.843350161705625)
3    (1248, 3.8188818885223443)
4    (1217, 3.8114199076479958)
Name: 47, dtype: object

In [18]:
recommended_movie_ids=[]
for x in range(0, n):
    recommended_movie_ids.append(results[x][0])

recommended_movie_ids

[1204, 922, 2542, 1248, 1217]

In [20]:
#getting the title of the recommended movies
movies = pd.read_csv('/content/drive/MyDrive/data/movies.csv')
movies.head()
recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]
recommended_movies

Unnamed: 0,movieId,title,genres
704,922,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),Drama|Film-Noir|Romance
906,1204,Lawrence of Arabia (1962),Adventure|Drama|War
918,1217,Ran (1985),Drama|War
947,1248,Touch of Evil (1958),Crime|Film-Noir|Thriller
1917,2542,"Lock, Stock & Two Smoking Barrels (1998)",Comedy|Crime|Thriller


In [22]:
ratings = pd.read_csv('/content/drive/MyDrive/data/ratings.csv')


ratings_dict = {'itemID': list(ratings.movieId),
                'userID': list(ratings.userId),
                'rating': list(ratings.rating)}

df = pd.DataFrame(ratings_dict)
df.head()

Unnamed: 0,itemID,userID,rating
0,1,1,4.0
1,3,1,4.0
2,6,1,4.0
3,47,1,5.0
4,50,1,5.0


In [23]:
temp = df[df['userID'] == 47].sort_values("rating", ascending = False)
temp.head()

Unnamed: 0,itemID,userID,rating
7041,112804,47,5.0
6997,47423,47,5.0
6958,3147,47,5.0
7039,112552,47,5.0
7002,51931,47,5.0


In [24]:
history_movie_ids = temp['itemID']
user_history = movies[movies['movieId'].isin(history_movie_ids)]

In [25]:
user_history[:n]

Unnamed: 0,movieId,title,genres
30,31,Dangerous Minds (1995),Drama
43,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
55,62,Mr. Holland's Opus (1995),Drama
116,141,"Birdcage, The (1996)",Comedy
120,147,"Basketball Diaries, The (1995)",Drama


In [26]:
recommended_movies

Unnamed: 0,movieId,title,genres
704,922,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),Drama|Film-Noir|Romance
906,1204,Lawrence of Arabia (1962),Adventure|Drama|War
918,1217,Ran (1985),Drama|War
947,1248,Touch of Evil (1958),Crime|Film-Noir|Thriller
1917,2542,"Lock, Stock & Two Smoking Barrels (1998)",Comedy|Crime|Thriller
