# 🎬 Hybrid Movie Recommendation System (Colab Edition)
This notebook combines fuzzy clustering (MovieLens) and NLP-based content similarity (TMDB) to recommend movies. It auto-downloads the TMDB dataset from Kaggle.

In [1]:
# ⚙️ STEP 1: Install required libraries (takes 1-2 mins)
!pip install -q scikit-fuzzy fuzzywuzzy kaggle

# 🔐 STEP 2: Upload kaggle.json (Kaggle API key)
from google.colab import files
files.upload()  # Upload kaggle.json here

# 🔐 STEP 3: Move kaggle.json to ~/.kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/920.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/920.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m839.7/920.8 kB[0m [31m12.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m911.4/920.8 kB[0m [31m11.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m920.8/920.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h

Saving kaggle.json to kaggle.json


In [3]:
# 📁 STEP 4: Upload MovieLens dataset (ratings.csv and movies.csv)
# Download it from https://grouplens.org/datasets/movielens/latest/
# Upload the files manually here:

from google.colab import files
uploaded_files = files.upload()  # Upload ratings.csv and movies.csv


Saving movies.csv to movies (1).csv
Saving ratings.csv to ratings.csv


In [10]:

def download_tmdb_dataset():
    import os
    import zipfile
    import subprocess

    zip_filename = "tmdb-movies-dataset-2023-930k-movies.zip"
    kaggle_dataset = "asaniczka/tmdb-movies-dataset-2023-930k-movies"
    extracted_csv = "TMDB_movie_dataset_v1.csv"

    if not os.path.exists(extracted_csv):
        print("📦 Downloading TMDB dataset...")
        subprocess.run(['kaggle', 'datasets', 'download', '-d', kaggle_dataset], check=True)

        print("📂 Unzipping", zip_filename)
        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall()
        print("✅ Done unzipping.")
    else:
        print("✅ Dataset already downloaded.")


In [11]:
# 🚀 Run the system

download_tmdb_dataset()

ml_df, user_movie_matrix, user_movie_matrix_scaled, scaler = load_movielens_data()
cntr, u = fuzzy_cluster_users(user_movie_matrix_scaled)
tmdb_df = load_tmdb_data()
tfidf_matrix = build_tfidf_matrix(tmdb_df)

# User Input
user_movie = input("🎥 Enter a movie you like (e.g., Inception): ")
user_ratings = {
    "Toy Story (1995)": 5,
    "Jumanji (1995)": 3,
    "Grumpier Old Men (1995)": 4,
    "Heat (1995)": 2
}

print("\n📚 Top NLP-based Recommendations:")
matched_title, content_recs = recommend_by_overview(user_movie, tmdb_df, tfidf_matrix)
print(f"Matched to: {matched_title.title()}\n")
print(content_recs.to_string(index=False))

print("\n🤖 Top Fuzzy Logic-based Recommendations:")
cluster_recs = recommend_from_cluster(user_ratings, user_movie_matrix, scaler, cntr, u)
print(cluster_recs.to_string())


📦 Downloading TMDB dataset...
Dataset URL: https://www.kaggle.com/datasets/asaniczka/tmdb-movies-dataset-2023-930k-movies
License(s): ODC Attribution License (ODC-By)
tmdb-movies-dataset-2023-930k-movies.zip: Skipping, found more recently modified local copy (use --force to force download)
📂 Unzipping tmdb-movies-dataset-2023-930k-movies.zip...
Archive:  tmdb-movies-dataset-2023-930k-movies.zip
  inflating: TMDB_movie_dataset_v11.csv  
✅ Done unzipping.


NameError: name 'csv_filename' is not defined