# **Netflix Movie Recommendation System Using ChromaDB**

In [4]:
# Install dependencies (Uncomment and run if not installed)
# !pip install pandas chromadb datasets sentence-transformers

import pandas as pd
import chromadb
from chromadb.utils import embedding_functions
from sentence_transformers import SentenceTransformer
import os
import numpy as np
import pandas as pd
import chromadb
import openai
import os

## **1. Initialize ChromaDB and Create a Collection**

In [None]:
from sentence_transformers import SentenceTransformer
import chromadb
import pandas as pd

# Load SentenceTransformer model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB Persistent Client
client = chromadb.PersistentClient(path="data")

# Use get_or_create to avoid duplicates
collection = client.get_or_create_collection(name="netflix_titles")


# Load CSV
csv_path = "data/netflix_titles_1000.csv"  # Adjust path if needed
df = pd.read_csv(csv_path)

# Ensure the required column exists
if "description" not in df.columns:
    raise ValueError("The CSV file does not contain a 'description' column.")

# Generate embeddings and store them in ChromaDB
for idx, row in df.iterrows():
    embedding = embedding_model.encode(str(row["description"])).tolist()

    collection.add(
        ids=[str(idx)],
        documents=[row["description"]],
        embeddings=[embedding]
    )

print("✅ Embeddings stored using Sentence Transformers.")

# Function to retrieve embeddings
def get_embedding(text):
    return embedding_model.encode(text).tolist()

# Example query
query_text = "A thrilling mystery series"
query_embedding = get_embedding(query_text)

# Search in ChromaDB
results = collection.query(query_embeddings=[query_embedding], n_results=5)
print(results)


✅ Embeddings stored using Sentence Transformers.
{'ids': [['538', '128', '481', '398', '532']], 'embeddings': None, 'documents': [['Romance, rivalry and radical mystery collide as a group of teens attend a remote island sleepaway camp in this suspenseful, supernatural drama.', "When a detective investigates the death of his ex-lover's grandfather, he uncovers secrets about the tycoon's manipulative family.", 'In a world where humans and superheroes coexist, a lone wolf cop teams up with a bright detective to dismantle a dark plot to extract superpowers.', 'Tragedy, betrayal and a mysterious discovery fuel a woman\'s vengeance for the loss of her tribe and family in this special episode of "Kingdom."', 'In this new TV series sequel to the classic horror franchise, a cyberbullying incident in Lakewood leads to a murder with chilling echoes of the past.']], 'uris': None, 'data': None, 'metadatas': [[None, None, None, None, None]], 'distances': [[0.9494847655296326, 1.008953332901001, 1.03

In [26]:
def search_netflix(query_text, search_by="description", top_n=5):
    """
    Search for similar Netflix titles based on title or description.

    Parameters:
        query_text (str): User's input query.
        search_by (str): "title" or "description".
        top_n (int): Number of results to return.

    Returns:
        DataFrame: Top matching results in a table format.
    """
    # If searching by title, find the description first
    if search_by == "title":
        match = df[df["title"].str.contains(query_text, case=False, na=False)]
        if match.empty:
            print("No matching title found.")
            return None
        query_text = match.iloc[0]["description"]  # Use description for embedding search

    # Query ChromaDB
    
    results = collection.query(query_embeddings=[get_embedding(query_text)], n_results=top_n)

    # Extract matching document IDs
    ids = results["ids"][0]

    # Retrieve matching rows from the original CSV
    matching_rows = df.loc[df.index.isin(map(int, ids))]

    # Select required columns for display
    display_columns = ["title", "cast", "rating", "description"]
    matching_rows = matching_rows[display_columns]

    return matching_rows

# User input choice
search_type = input("Search by (title/description): ").strip().lower()
query_text = input("Enter search query: ").strip()

# Get results
results_df = search_netflix(query_text, search_by=search_type)

# Display table if results are found
if results_df is not None:
    print(results_df.to_string(index=False))  # Prints table format in terminal

                       title                                                                                                                                                                                                                                                       cast rating                                                                                                                                             description
            The Walking Dead Andrew Lincoln, Steven Yeun, Norman Reedus, Chandler Riggs, Melissa McBride, Lauren Cohan, Danai Gurira, Alanna Masterson, Sonequa Martin-Green, Josh McDermitt, Christian Serratos, Seth Gilliam, Ross Marquand, Lennie James, Emily Kinney, Jon Bernthal  TV-MA               In the wake of a zombie apocalypse, survivors hold on to the hope of humanity by banding together to wage a fight for their own survival.
      A Classic Horror Story                                                                                              