In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# Install scikit-surprise
!pip install scikit-surprise

# Import necessary libraries
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# Load the dataset (assuming you have a CSV file with columns: userId, movieId, rating)
file_path = "/kaggle/input/movie-recommendation-system/ratings.csv"
df = pd.read_csv(file_path)

# Define the Reader object and load the dataset using Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build and train the collaborative filtering model (SVD)
model = SVD()
model.fit(trainset)

# Make predictions on the test set
predictions = model.test(testset)

# Evaluate the model's performance
accuracy = rmse(predictions)
print(f'RMSE: {accuracy}')

# Assuming you have a separate movies.csv file with columns: movieId, title
movies_file_path = "/kaggle/input/movie-recommendation-system/movies.csv"
movies_df = pd.read_csv(movies_file_path)

# Function to get movie recommendations for a user
def get_top_n_recommendations(user_id, n=10):
    user_movies = df[df['userId'] == user_id]['movieId'].unique()
    movies_to_predict = df[~df['movieId'].isin(user_movies)]['movieId'].unique()

    predictions = [model.predict(user_id, movie_id) for movie_id in movies_to_predict]
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]

    recommended_movie_ids = [int(recommendation.iid) for recommendation in recommendations]
    recommended_movies = movies_df[movies_df['movieId'].isin(recommended_movie_ids)]['title'].unique()

    return recommended_movies

# Example: Get recommendations for user with userId 1
user_id_to_recommend = 1
recommendations = get_top_n_recommendations(user_id_to_recommend)
print(f'Recommendations for User {user_id_to_recommend}:')
for i, movie in enumerate(recommendations, 1):
    print(f'{i}. {movie}')


RMSE: 0.7775
RMSE: 0.7775469643633
Recommendations for User 1:
1. Clerks (1994)
2. Trainspotting (1996)
3. One Flew Over the Cuckoo's Nest (1975)
4. Breaking the Waves (1996)
5. American Beauty (1999)
6. Waking Life (2001)
7. Spirited Away (Sen to Chihiro no kamikakushi) (2001)
8. Great Beauty, The (Grande Bellezza, La) (2013)
9. The Heart of the World (2000)
10. Black Mirror
