# Movie Recommendation by Surprise

In [1]:
import warnings
warnings.filterwarnings("ignore")

import random
import pandas as pd

from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import accuracy

In [2]:
# Load movie ratings CSV file
df = pd.read_csv('/kaggle/input/movie-recommendation-system/ratings.csv', 
                          skiprows=lambda i: i>0 and random.random() > 0.002)
print(len(df))

49964


# Load the data using Surprise library

In [3]:
# Define the rating scale
reader = Reader(rating_scale=(0.5, 5))

In [4]:
# Load the data into the Surprise Dataset
data = Dataset.load_from_df(df[['userId','movieId','rating']], reader)

In [5]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2)

# KNNBasic
## Use item-based collaborative filtering with cosine similarity

In [6]:
# Build the KNNBasic collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)

In [7]:
# Train the model on the training set
model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7aea17f7f010>

In [8]:
# Get movie recommendations for user 1
user_id = 1
user_movies = df[df['userId'] == user_id]['movieId'].tolist()
movies_to_predict = [movie for movie in df['movieId'].unique() if movie not in user_movies]

In [9]:
# Predict ratings for movies not rated by the user
userpredictions = [model.predict(user_id, movie) for movie in movies_to_predict]

In [10]:
# Sort predictions by estimated rating
sorted_predictions = sorted(userpredictions, key=lambda x: x.est, reverse=True)

In [11]:
# Display the top 5 recommendations
top_recommendations = sorted_predictions[:5]
for recommendation in top_recommendations:
    print(f"MovieId: {recommendation.iid}, Estimated Rating: {recommendation.est}")

MovieId: 1345, Estimated Rating: 3.530672237372095
MovieId: 2302, Estimated Rating: 3.530672237372095
MovieId: 4022, Estimated Rating: 3.530672237372095
MovieId: 40870, Estimated Rating: 3.530672237372095
MovieId: 5816, Estimated Rating: 3.530672237372095


In [12]:
# Make predictions on the test set
predictions = model.test(testset)

In [13]:
# Evaluate the model using RMSE (Root Mean Squared Error)
accuracy.rmse(predictions)

RMSE: 1.0623


1.062271994606982