# 🎬 Movie Recommendation System using SVD and Scikit-Learn
This notebook demonstrates how to build a **movie recommendation system** using **Singular Value Decomposition (SVD)** with **Scikit-learn's TruncatedSVD**, pipelines, and visualizations.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from math import sqrt


In [None]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')
tags = pd.read_csv('tags.csv')

print("Movies:")
display(movies.head())

print("Ratings:")
display(ratings.head())

print("Tags:")
display(tags.head())


In [None]:
class RatingsMatrixBuilder(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)

class MeanCentering(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.means_ = np.mean(X, axis=1)
        return self

    def transform(self, X):
        return X - self.means_[:, np.newaxis]


In [None]:
pipeline = Pipeline([
    ('matrix', RatingsMatrixBuilder()),
    ('center', MeanCentering()),
    ('svd', TruncatedSVD(n_components=50, random_state=42))
])

X = ratings[['userId','movieId','rating']]
X_transformed = pipeline.fit_transform(X)
print("SVD Transformation Shape:", X_transformed.shape)


In [None]:
explained_variance = pipeline.named_steps['svd'].explained_variance_ratio_
plt.figure(figsize=(10,5))
plt.plot(np.cumsum(explained_variance), marker='o')
plt.title('Cumulative Explained Variance by SVD Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid()
plt.show()


In [None]:
matrix_builder = pipeline.named_steps['matrix']
user_item_matrix = matrix_builder.transform(X).values
svd = pipeline.named_steps['svd']

reconstructed = svd.inverse_transform(X_transformed)
rmse = sqrt(mean_squared_error(user_item_matrix, reconstructed))
print(f"RMSE between Original and Reconstructed Ratings: {rmse:.4f}")


In [None]:
plt.figure(figsize=(6,6))
plt.scatter(user_item_matrix.flatten(), reconstructed.flatten(), alpha=0.1)
plt.title('Original vs Reconstructed Ratings')
plt.xlabel('Original Ratings')
plt.ylabel('Reconstructed Ratings')
plt.grid()
plt.show()


In [None]:
movie_id_to_title = dict(zip(movies['movieId'], movies['title']))

def recommend_movies(user_id, top_n=10):
    user_index = user_id - 1
    user_ratings = reconstructed[user_index]
    original_ratings = user_item_matrix[user_index]

    unseen = np.where(original_ratings == 0)[0]
    recommendations = [(idx, user_ratings[idx]) for idx in unseen]
    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)[:top_n]

    recommended_titles = [(movie_id_to_title[matrix_builder.transform(X).columns[idx]], score)
                          for idx, score in recommendations]
    return recommended_titles

print("Top Recommendations for User 1:")
display(recommend_movies(1))


In [None]:
recs = recommend_movies(1, top_n=10)
titles = [r[0] for r in recs]
scores = [r[1] for r in recs]

plt.figure(figsize=(10,5))
sns.barplot(x=scores, y=titles, palette='viridis')
plt.title('Top 10 Movie Recommendations for User 1')
plt.xlabel('Predicted Rating')
plt.ylabel('Movie')
plt.show()
