In [1]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [2]:
ratings = pd.read_csv(r'dataset/ratings.csv')

In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,tstamp
0,206,4803,4.0,2003-04-07 13:52:01
1,5073,72731,4.0,2020-02-19 16:07:53
2,4739,91653,4.0,2020-12-28 15:35:58
3,535,3005,3.0,2008-12-26 05:38:11
4,465,4776,3.0,2008-08-13 20:22:36


In [4]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3908657 entries, 0 to 3908656
Data columns (total 4 columns):
 #   Column   Dtype  
---  ------   -----  
 0   userId   int64  
 1   movieId  int64  
 2   rating   float64
 3   tstamp   object 
dtypes: float64(1), int64(2), object(1)
memory usage: 119.3+ MB


In [5]:
ratings['rating'].value_counts()

4.0    900243
3.5    747522
3.0    693019
4.5    405031
5.0    341782
2.5    310870
2.0    241348
1.5     95228
1.0     95044
0.5     78570
Name: rating, dtype: int64

In [6]:
min_rating = 0.5
max_rating = 5.0

# Create a Surprise Reader object to specify the rating scale
reader = Reader(rating_scale=(min_rating, max_rating))  # Specify the actual rating scale used in your data

# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=.2)


In [7]:
algo = SVD()

In [8]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x217808f9ac0>

In [9]:
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.7435


0.7435430437774956

In [10]:
def predict_rating(user_id, movie_id):
    prediction = algo.predict(user_id, movie_id)
    return prediction.est

In [11]:
def recommend_movies(user_id):
    # Get a list of all the movies the user has not rated
    all_movies = ratings['movieId'].unique()
    user_movies = ratings[ratings['userId'] == user_id]['movieId'].unique()
    new_movies = list(set(all_movies) - set(user_movies))
    
    # Predict the ratings for the new movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in new_movies]
    
    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Get the top 10 recommendations
    top_recommendations = [prediction.iid for prediction in predictions[:10]]
    
    return top_recommendations

In [12]:
movie_ids=recommend_movies(206)

In [None]:
user_id = input("Enter your user ID: ")

# Convert user_id to an integer
user_id = int(user_id)

# Call the recommend_movies function with the user's ID
movie_ids = recommend_movies(user_id)

df=ratings = pd.read_csv(r'dataset/movies.csv')
filtered_df = df[df['movieId'].isin(movie_ids)]

# Iterate over the filtered DataFrame and print the title and genres for each movie
for index, row in filtered_df.iterrows():
    print(f"Movie ID: {row['movieId']}")
    print(f"Title: {row['title']}")
    print(f"Genres: {row['genres']}")
    print('-' * 30)