In [1]:
import pandas as pd

# Load the dataset
movies = pd.read_csv('/kaggle/input/movielens-100k-dataset/ml-100k/u.item', sep='|', names=['movie_id', 'title'], usecols=[0, 1], encoding='latin-1')
ratings = pd.read_csv('/kaggle/input/movielens-100k-dataset/ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
ratings = ratings[['user_id', 'movie_id', 'rating']]
# Merge the two datasets
data = pd.merge(ratings, movies, on='movie_id')


In [2]:
data.head(2)

Unnamed: 0,user_id,movie_id,rating,title
0,196,242,3,Kolya (1996)
1,186,302,3,L.A. Confidential (1997)


In [3]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy

# Load data into Surprise format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['user_id', 'movie_id', 'rating']], reader)

# Split the data into training and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# Use SVD algorithm
algo = SVD()

# Train the algorithm on the trainset
algo.fit(trainset)

# Test the algorithm on the testset
predictions = algo.test(testset)

# Evaluate the performance of the algorithm
accuracy.rmse(predictions)


RMSE: 0.9341


0.934068044122332

In [4]:
def get_movie_recommendations(user_id, n=10):
    # Get a list of all movie ids
    all_movie_ids = movies['movie_id'].unique()
    
    # Get a list of movie ids that the user has already rated
    rated_movie_ids = ratings[ratings['user_id'] == user_id]['movie_id'].unique()
    
    # Get a list of movie ids that the user has not rated
    unrated_movie_ids = [movie_id for movie_id in all_movie_ids if movie_id not in rated_movie_ids]
    
    # Predict ratings for all unrated movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movie_ids]
    
    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Get the top n recommendations
    top_n = predictions[:n]
    
    # Get the movie titles for the top n recommendations
    top_n_movie_titles = [movies[movies['movie_id'] == pred.iid]['title'].values[0] for pred in top_n]
    
    return top_n_movie_titles

# Get top 10 recommendations for user with user_id 1
print(get_movie_recommendations(1, 10))


['Leaving Las Vegas (1995)', 'North by Northwest (1959)', 'As Good As It Gets (1997)', 'Raise the Red Lantern (1991)', 'Philadelphia (1993)', 'Boot, Das (1981)', 'Close Shave, A (1995)', 'People vs. Larry Flynt, The (1996)', 'Lawrence of Arabia (1962)', 'Manchurian Candidate, The (1962)']
