## Install Libraries

In [1]:
%pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Import Libraries

In [2]:
from surprise import Dataset, Reader
from surprise import KNNBaseline
import pandas as pd


## Load Datasets

In [3]:
movies_df = pd.read_csv("ml-latest-small/movies.csv")
movies_df


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
reader = Reader(line_format="user item rating timestamp", sep=",", skip_lines=1)


In [5]:
data = Dataset.load_from_file("ml-latest-small/ratings.csv", reader=reader)


In [6]:
trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()


## Movie Recommendation

In [7]:
def get_top_n(user_id, predictions, movies_df, n=10):
    pred_df = pd.DataFrame(
        predictions, columns=["userId", "movieId", "r_ui", "est", "details"]
    )
    pred_df[["userId", "movieId"]] = pred_df[["userId", "movieId"]].astype(int)
    pred_df = pred_df[pred_df["userId"] == user_id]
    pred_df = pd.merge(pred_df, movies_df, on="movieId")
    top_n_df = pred_df.nlargest(n, "est").reset_index()

    return top_n_df[["movieId", "title", "genres"]]


In [8]:
algo = KNNBaseline()


In [9]:
# Predict ratings for all pairs (u, i) that are NOT in the training set.
predictions = algo.fit(trainset).test(testset)


Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


In [10]:
# Get top 10 recommendations for user_id 1
user_id = 1


In [11]:
top_n_df = get_top_n(user_id, predictions, movies_df, n=10)
top_n_df


Unnamed: 0,movieId,title,genres
0,318,"Shawshank Redemption, The (1994)",Crime|Drama
1,131724,The Jinx: The Life and Deaths of Robert Durst ...,Documentary
2,720,Wallace & Gromit: The Best of Aardman Animatio...,Adventure|Animation|Comedy
3,1272,Patton (1970),Drama|War
4,5746,Galaxy of Terror (Quest) (1981),Action|Horror|Mystery|Sci-Fi
5,5764,Looker (1981),Drama|Horror|Sci-Fi|Thriller
6,6835,Alien Contamination (1980),Action|Horror|Sci-Fi
7,7899,Master of the Flying Guillotine (Du bi quan wa...,Action
8,898,"Philadelphia Story, The (1940)",Comedy|Drama|Romance
9,1719,"Sweet Hereafter, The (1997)",Drama
