# Movie/TV Show Recommendation System
This notebook implements a hybrid recommendation system using collaborative filtering, content-based filtering, and deep learning techniques.
Dataset: Kaggle's The Movies Dataset (subset).

In [None]:
# Install required packages
!pip install scikit-surprise nltk

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

In [None]:
# Load datasets (assumes you uploaded them via Colab Files tab)
metadata = pd.read_csv('movies_metadata.csv', low_memory=False)
ratings = pd.read_csv('ratings_small.csv')
keywords = pd.read_csv('keywords.csv')
credits = pd.read_csv('credits.csv')
links_small = pd.read_csv('links_small.csv')

In [None]:
# Content-based filtering using TF-IDF on movie descriptions
metadata = metadata.dropna(subset=['overview'])
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(metadata['overview'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(metadata.index, index=metadata['title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices.get(title)
    if idx is None:
        return []
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return metadata['title'].iloc[movie_indices]

In [None]:
# Collaborative filtering using SVD
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)
model = SVD()
model.fit(trainset)
predictions = model.test(testset)
rmse(predictions)

In [None]:
# Hybrid: Combine both content and collaborative filtering
def hybrid(userId, title):
    idx = indices.get(title)
    if idx is None:
        return []
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]

    movies = metadata.iloc[movie_indices][['title', 'id']]
    movies['est'] = movies['id'].apply(lambda x: model.predict(userId, x).est if not pd.isnull(x) else 0)
    return movies.sort_values('est', ascending=False).head(10)

You can now run `get_recommendations('The Dark Knight')` or `hybrid(1, 'The Dark Knight')`.