In [None]:
# Recommendation System using Collaborative Filtering (SVD)

# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
# Load and Explore the Dataset
# Load MovieLens 100K dataset
url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
columns = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(url, sep='\t', names=columns)

# Display dataset overview
print("Dataset Overview:")
print(df.head())
print(f"Dataset shape: {df.shape}")

In [None]:
# Prepare Data for Surprise Library
# Define reader to normalize ratings between 1 and 5
reader = Reader(rating_scale=(1, 5))

# Load data into Surprise dataset
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

In [None]:
# Train-Test Split
# Split the data into training and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# Apply Matrix Factorization (SVD)
# Initialize SVD model
model = SVD()

# Train the model
model.fit(trainset)

# Test the model
predictions = model.test(testset)

In [None]:
# Evaluate the Model
# Calculate RMSE
rmse = accuracy.rmse(predictions)
# Calculate MAE
mae = accuracy.mae(predictions)

# Display Evaluation Metrics
print(f"\nEvaluation Metrics:\nRMSE: {rmse:.2f}\nMAE: {mae:.2f}")

In [None]:
# Make Recommendations
# Function to recommend items for a given user
def get_recommendations(user_id, model, data, top_n=5):
    # Get all item IDs
    all_items = data.df['item_id'].unique()

    # Get items already rated by the user
    rated_items = data.df[data.df['user_id'] == user_id]['item_id']

    # Filter unrated items
    unrated_items = [item for item in all_items if item not in rated_items]

    # Predict ratings for unrated items
    predictions = [model.predict(user_id, item) for item in unrated_items]

    # Sort predictions by estimated rating
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)

    # Return top N recommendations
    return recommendations[:top_n]

# Example usage
user_id = 1  # Specify a user ID
recommendations = get_recommendations(user_id, model, data)
print("\nTop Recommendations:")
for rec in recommendations:
    print(f"Item ID: {rec.iid}, Estimated Rating: {rec.est:.2f}")

In [None]:
# Visualization
# Plot actual vs. predicted ratings
actual = [pred.r_ui for pred in predictions]
predicted = [pred.est for pred in predictions]

plt.figure(figsize=(8, 6))
plt.scatter(actual, predicted, alpha=0.5)
plt.plot([1, 5], [1, 5], '--', color='red', label='Perfect Prediction')
plt.xlabel('Actual Ratings')
plt.ylabel('Predicted Ratings')
plt.title('Actual vs. Predicted Ratings')
plt.legend()
plt.show()