In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD

# Load the movies metadata
movies = pd.read_csv("movies_metadata.csv", dtype={'id': 'str'})

# Load the ratings dataset
ratings = pd.read_csv("ratings_small.csv", dtype={'movieId': 'str'})

# Merge the ratings and movies datasets
df = pd.merge(ratings, movies[['id', 'original_title']], left_on='movieId', right_on='id', how='left')

# Create a user-item matrix
user_item_matrix = df.pivot_table(index='userId', columns='original_title', values='rating')

# Fill missing values with 0
user_item_matrix = user_item_matrix.fillna(0)

# Split the user-item matrix into training and test sets
train_user_item_matrix, test_user_item_matrix = train_test_split(user_item_matrix, test_size=0.2, random_state=42)

# Filter the test set to include only items present in the training set
common_movies = train_user_item_matrix.columns.intersection(test_user_item_matrix.columns)
test_user_item_matrix_filtered = test_user_item_matrix[common_movies]

# Predictions using SVD
svd = TruncatedSVD(n_components=50, random_state=42)
train_svd_matrix = svd.fit_transform(train_user_item_matrix)
test_svd_matrix = svd.transform(test_user_item_matrix_filtered)

# Reconstruct predicted ratings
predicted_ratings = svd.inverse_transform(test_svd_matrix)

# Evaluate the model using mean squared error
mse = mean_squared_error(test_user_item_matrix_filtered, predicted_ratings)
print(f'Mean Squared Error: {mse}')


  movies = pd.read_csv("movies_metadata.csv", dtype={'id': 'str'})


Mean Squared Error: 0.20054266480944113


In [None]:
import numpy as np

# Choose a user for recommendation
available_user_indices = test_user_item_matrix_filtered.index
user_id_for_recommendation = np.random.choice(available_user_indices)

# Get the predicted ratings for the chosen user
user_predicted_ratings = predicted_ratings[available_user_indices.get_loc(user_id_for_recommendation)]

# Get the indices of the top N recommended movies
N = 10  # You can choose any number of recommendations
top_movie_indices = user_predicted_ratings.argsort()[-N:][::-1]

# Get the corresponding movie titles
top_movie_titles = user_item_matrix.columns[top_movie_indices]

# Display the top recommended movies
print(f"Top {N} Recommended Movies for User {user_id_for_recommendation}:\n")
for i, movie_title in enumerate(top_movie_titles, 1):
    print(f"{i}. {movie_title}")


Top 10 Recommended Movies for User 481:

1. Terminator 3: Rise of the Machines
2. Sous le Sable
3. Scarface
4. Dawn of the Dead
5. License to Wed
6. Sleepless in Seattle
7. 5 Card Stud
8. Shriek If You Know What I Did Last Friday the Thirteenth
9. The Prisoner of Zenda
10. The Talented Mr. Ripley


In [None]:
# Calculate and print Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print(f'Root Mean Squared Error: {rmse}')

Root Mean Squared Error: 0.44781990220337586
