In [5]:
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split
from surprise import NMF

In [6]:
# Load data from files
ratings_df = pd.read_csv('ratings.csv')
movies_df = pd.read_csv('movies.csv')

In [9]:
# Define a Reader object for the Surprise library
reader = Reader(rating_scale=(0.5, 5.0))

# Load data into Surprise format
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)
data

<surprise.dataset.DatasetAutoFolds at 0x128565fd0>

In [10]:
# Split the data into training and test sets
trainset, testset = train_test_split(data, test_size=0.25)

In [11]:
# Build and train the model using Singular Value Decomposition (SVD)
model = SVD()
model.fit(trainset)

# Predict ratings for the test set
predictions = model.test(testset)

# Compute and print RMSE
accuracy.rmse(predictions)

# Build the full trainset and train the model on it
trainset = data.build_full_trainset()
model.fit(trainset)

RMSE: 0.8772


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x12e726650>

In [12]:
# Function to get movie recommendations for a user
def get_top_n_recommendations(predictions, n=10):
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if not top_n.get(uid):
            top_n[uid] = []
        top_n[uid].append((iid, est))
    
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    
    return top_n

In [13]:
# Get top 10 recommendations for each user
top_n_recommendations = get_top_n_recommendations(predictions, n=10)

In [14]:
# Print recommendations
for user_id, recommendations in top_n_recommendations.items():
    print(f"User {user_id} recommendations:")
    for movie_id, rating in recommendations:
        movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
        print(f"  {movie_title} (Estimated rating: {rating:.2f})")



User 391 recommendations:
  Election (1999) (Estimated rating: 4.58)
  Blood Simple (1984) (Estimated rating: 4.48)
  Monty Python and the Holy Grail (1975) (Estimated rating: 4.33)
  Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964) (Estimated rating: 4.26)
  Shawshank Redemption, The (1994) (Estimated rating: 4.18)
  Twelve Monkeys (a.k.a. 12 Monkeys) (1995) (Estimated rating: 4.16)
  There's Something About Mary (1998) (Estimated rating: 4.16)
  Godfather: Part II, The (1974) (Estimated rating: 4.13)
  Vertigo (1958) (Estimated rating: 4.12)
  Fantasia (1940) (Estimated rating: 4.11)
User 182 recommendations:
  Monty Python and the Holy Grail (1975) (Estimated rating: 4.49)
  Graduate, The (1967) (Estimated rating: 4.45)
  To Kill a Mockingbird (1962) (Estimated rating: 4.35)
  Kelly's Heroes (1970) (Estimated rating: 4.33)
  Goodfellas (1990) (Estimated rating: 4.32)
  Twelve Monkeys (a.k.a. 12 Monkeys) (1995) (Estimated rating: 4.31)
  Citizen Kane (1941)

In [15]:
print("Evaluation on test set:")
print(f"RMSE: {accuracy.rmse(predictions)}")
print(f"MAE: {accuracy.mae(predictions)}")


Evaluation on test set:
RMSE: 0.8772
RMSE: 0.8772065034436716
MAE:  0.6747
MAE: 0.6747145229576447
