## Install Libraries

In [1]:
%pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Import Libraries

In [2]:
from surprise import Dataset, Reader
from surprise import KNNBaseline
from surprise.model_selection import train_test_split
import pandas as pd


## Load Datasets

In [3]:
movies_df = pd.read_csv("ml-latest-small/movies.csv")
movies_df


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
reader = Reader(line_format="user item rating timestamp", sep=",", skip_lines=1)


In [5]:
data = Dataset.load_from_file("ml-latest-small/ratings.csv", reader=reader)


In [6]:
trainset, testset = train_test_split(data, test_size=0.2)


## Movie Recommendation

In [7]:
algo = KNNBaseline()


In [8]:
predictions = algo.fit(trainset).test(testset)


Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


In [9]:
def get_top_n(user_id, predictions, movies_df, n=10):
    pred_df = pd.DataFrame(
        predictions, columns=["userId", "movieId", "r_ui", "est", "details"]
    )
    pred_df[["userId", "movieId"]] = pred_df[["userId", "movieId"]].astype(int)
    pred_df = pred_df[pred_df["userId"] == user_id]
    pred_df = pd.merge(pred_df, movies_df, on="movieId")
    top_n_df = pred_df.nlargest(n, "est").reset_index()

    return top_n_df[["movieId", "title", "genres"]]


In [10]:
# Predict ratings for all pairs (u, i) that are NOT in the training set.
anti_testset = trainset.build_anti_testset(fill=0)
predictions = algo.test(anti_testset)


In [11]:
user_id = 1


In [12]:
top_n_df = get_top_n(user_id, predictions, movies_df, n=10)
top_n_df


Unnamed: 0,movieId,title,genres
0,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
1,4142,Left Behind: The Movie (2000),Action|Adventure|Drama|Thriller
2,134796,Bitter Lake (2015),Documentary
3,947,My Man Godfrey (1936),Comedy|Romance
4,4454,More (1998),Animation|Drama|Sci-Fi|IMAX
5,318,"Shawshank Redemption, The (1994)",Crime|Drama
6,1201,"Good, the Bad and the Ugly, The (Buono, il bru...",Action|Adventure|Western
7,3275,"Boondock Saints, The (2000)",Action|Crime|Drama|Thriller
8,2959,Fight Club (1999),Action|Crime|Drama|Thriller
9,94810,Eva (2011),Drama|Fantasy|Sci-Fi
