<a href="https://colab.research.google.com/github/bharatyadav6622-arch/Customer-Churn-Prediction/blob/main/Movie_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 Movie Recommendation System â€“ Collaborative filtering using Surprise library.


In [1]:
!pip install scikit-surprise

from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

# 1. Load the built-in MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')

# 2. Split into train and test sets (80/20)
trainset, testset = train_test_split(data, test_size=0.2)

# Initialize the SVD algorithm
model = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)

# Train the model
model.fit(trainset)

# Make predictions on the test set
predictions = model.test(testset)

# Compute RMSE (Aim for < 0.94 on MovieLens 100k)
rmse_score = accuracy.rmse(predictions)
print(f"Model RMSE: {rmse_score:.4f}")

user_id = str(196)  # Example User ID
movie_id = str(302) # Example Movie ID

# Predict the rating
pred = model.predict(user_id, movie_id)
print(f"Predicted rating for User {user_id} on Movie {movie_id}: {pred.est:.2f}")

Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k
RMSE: 0.9417
Model RMSE: 0.9417
Predicted rating for User 196 on Movie 302: 4.17


In [4]:
import io
from surprise import get_dataset_dir

def get_movie_names():
    """ Read the u.item file from MovieLens 100-k and return a dictionary mapping ID to Title. """
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}

    # We use 'ISO-8859-1' because the file contains special characters in movie titles
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]

    return rid_to_name

# 1. Get the mapping
movie_titles = get_movie_names()

# 2. Test it with your previous prediction
user_id = str(196)
movie_id = str(302)
prediction = model.predict(user_id, movie_id)

print(f"User {user_id} predicted rating for '{movie_titles[movie_id]}': {prediction.est:.2f}")

def get_top_n_recommendations(user_id, n=10):
    # Get a list of all movie IDs
    all_movie_ids = [str(i) for i in range(1, 1683)]

    # Predict ratings for all movies the user hasn't seen
    preds = [model.predict(user_id, mid) for mid in all_movie_ids]

    # Sort by estimated rating and pick top N
    preds.sort(key=lambda x: x.est, reverse=True)
    top_n = preds[:n]

    print(f"\n--- Top {n} Recommendations for User {user_id} ---")
    for i, p in enumerate(top_n):
        print(f"{i+1}. {movie_titles[p.iid]} (Predicted: {p.est:.2f})")

# Run it!
get_top_n_recommendations(user_id='12')

User 196 predicted rating for 'L.A. Confidential (1997)': 4.17

--- Top 10 Recommendations for User 12 ---
1. Star Wars (1977) (Predicted: 5.00)
2. Wrong Trousers, The (1993) (Predicted: 5.00)
3. 12 Angry Men (1957) (Predicted: 5.00)
4. Schindler's List (1993) (Predicted: 5.00)
5. Close Shave, A (1995) (Predicted: 5.00)
6. Good Will Hunting (1997) (Predicted: 4.97)
7. Raiders of the Lost Ark (1981) (Predicted: 4.97)
8. Return of the Jedi (1983) (Predicted: 4.94)
9. It's a Wonderful Life (1946) (Predicted: 4.93)
10. Shall We Dance? (1996) (Predicted: 4.92)
