In [2]:
pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357294 sha256=2b9ba423c0334d0c0e0d11f19366644ccd8d0776b8548fd7cd099bc4192c094e
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [4]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from collections import defaultdict

# Load data
movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')

# Prepare data for Surprise
reader = Reader(line_format='user item rating timestamp', sep=',')
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

# Build and train the model
trainset, testset = train_test_split(data, test_size=0.25)
model = SVD()
model.fit(trainset)

# Make predictions
predictions = model.test(testset)
accuracy.rmse(predictions)

# Function to get movie ID from movie title
def get_movie_id(title):
    return movies_df[movies_df['title'] == title]['movieId'].values[0]

# Function to recommend movies
def get_similar_movies(movie_title, top_n=5):
    movie_id = get_movie_id(movie_title)

    # Get a list of all movie IDs
    all_movie_ids = movies_df['movieId'].tolist()

    # Create a list of predictions for the movie
    predictions = [model.predict(movie_id, mid) for mid in all_movie_ids]

    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Get top N recommendations
    top_predictions = predictions[:top_n]

    # Retrieve movie titles for top N recommendations
    recommended_movie_ids = [pred.iid for pred in top_predictions]
    recommended_movies = movies_df[movies_df['movieId'].isin(recommended_movie_ids)]

    return recommended_movies[['title', 'genres']]

# Example usage
# use this movies because your movie may or may not be in the data.
# 1) Toy Story (1995)
# 2) Jumanji (1995)
# 3)RoboCop 3 (1993)
# for more see the movies.csv file

movie_title = "Toy Story (1995)"  # Replace with your movie title
recommendations = get_similar_movies(movie_title)
print(recommendations)



RMSE: 0.8800
                                                 title  \
46                          Usual Suspects, The (1995)   
224          Star Wars: Episode IV - A New Hope (1977)   
277                   Shawshank Redemption, The (1994)   
520                                       Fargo (1996)   
602  Dr. Strangelove or: How I Learned to Stop Worr...   

                          genres  
46        Crime|Mystery|Thriller  
224      Action|Adventure|Sci-Fi  
277                  Crime|Drama  
520  Comedy|Crime|Drama|Thriller  
602                   Comedy|War  
