In [1]:
from typing import Dict, List
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# Define the questions
QUESTIONS = [
    "Would you like to see movies from a specific genre?",
    "What kind of mood? (e.g. lighthearted, intense, adventurous)",
    "Which decade do you prefer? (e.g. 1980s, 1990s, 2000s)",
    "Do you lean toward popular hits or hidden gems?",
    "Any runtime preference? (e.g. < 90 min, 90–120 min, > 120 min)"
]

QUESTION_KEYS = ["genre", "mood", "decade", "popularity", "runtime"]


In [139]:
class MoviePreferenceAgent:
    def __init__(self):
        self.preferences = {}
        self.current_question_index = 0
        self.llm = ChatOpenAI(temperature=0)
        self.output_parser = StrOutputParser()
        
        # Create the summary prompt
        self.summary_prompt = ChatPromptTemplate.from_messages([
            ("system", "You are a helpful movie recommendation assistant. Create a friendly summary of the user's movie preferences."),
            ("human", "Here are the user's preferences:\n{preferences}\n\nPlease create a friendly summary of these preferences.")
        ])
        
        # Create the chain for summarization
        self.summary_chain = self.summary_prompt | self.llm | self.output_parser

    def get_next_question(self) -> str:
        """Get the next question to ask."""
        if self.current_question_index < len(QUESTIONS):
            return QUESTIONS[self.current_question_index]
        return None

    def process_answer(self, answer: str) -> str:
        """Process the user's answer and store it."""
        if self.current_question_index < len(QUESTIONS):
            self.preferences[QUESTION_KEYS[self.current_question_index]] = answer
            self.current_question_index += 1
            return self.get_next_question()
        return None

    def get_summary(self) -> str:
        """Generate a summary of the collected preferences."""
        if not self.preferences:
            return "No preferences collected yet."
            
        # Format preferences for the prompt
        preferences_text = "\n".join([
            f"{QUESTIONS[i]}\n→ {self.preferences[key]}"
            for i, key in enumerate(QUESTION_KEYS)
            if key in self.preferences
        ])
        
        # Generate summary using the LLM
        summary = self.summary_chain.invoke({"preferences": preferences_text})
        return summary


In [140]:
def main():
    # Create the agent
    agent = MoviePreferenceAgent()
    
    # Start the conversation
    print("Welcome to the Movie Preference Assistant!")
    print("I'll ask you a few questions to understand your movie preferences.\n")
    
    # Ask questions and collect answers
    while True:
        question = agent.get_next_question()
        if not question:
            break
            
        print(f"\nAgent: {question}")
        answer = input("You: ").strip()
        
        if answer.lower() in ['quit', 'exit', 'stop']:
            print("\nAgent: Conversation ended. Here's a summary of what we discussed:")
            break
            
        agent.process_answer(answer)

    # Generate and display summary
    summary = agent.get_summary()
    print("\nAgent: " + summary)

In [4]:
if __name__ == "__main__":
    main()

Welcome to the Movie Preference Assistant!
I'll ask you a few questions to understand your movie preferences.


Agent: Would you like to see movies from a specific genre?

Agent: What kind of mood? (e.g. lighthearted, intense, adventurous)

Agent: Which decade do you prefer? (e.g. 1980s, 1990s, 2000s)

Agent: Do you lean toward popular hits or hidden gems?

Agent: Any runtime preference? (e.g. < 90 min, 90–120 min, > 120 min)

Agent: It sounds like you enjoy intense dramas from the 2010s that are hidden gems and have a runtime of around 120 minutes. I'll keep these preferences in mind while recommending movies for you. Let's find some captivating films for your next movie night!


In [141]:
agent = MoviePreferenceAgent()
    
# Start the conversation
print("Welcome to the Movie Preference Assistant!")
print("I'll ask you a few questions to understand your movie preferences.\n")

# Ask questions and collect answers
while True:
    question = agent.get_next_question()
    if not question:
        break
        
    print(f"\nAgent: {question}")
    answer = input("You: ").strip()
    
    if answer.lower() in ['quit', 'exit', 'stop']:
        print("\nAgent: Conversation ended. Here's a summary of what we discussed:")
        break
        
    agent.process_answer(answer)

# Generate and display summary
summary = agent.get_summary()
print("\nAgent: " + summary)

Welcome to the Movie Preference Assistant!
I'll ask you a few questions to understand your movie preferences.


Agent: Would you like to see movies from a specific genre?

Agent: What kind of mood? (e.g. lighthearted, intense, adventurous)

Agent: Which decade do you prefer? (e.g. 1980s, 1990s, 2000s)

Agent: Do you lean toward popular hits or hidden gems?

Agent: Any runtime preference? (e.g. < 90 min, 90–120 min, > 120 min)

Agent: It sounds like you're in the mood for intense action movies from the 2010s that are popular hits and have a runtime around 150 minutes. I'll make sure to recommend some thrilling and action-packed films that fit your preferences!


In [14]:
agent.preferences['genre']

'drama'

In [None]:
def find_movies(genre):
    


def get_movies():
    if agent.preferences['genre']:






In [114]:
from tmdbv3api import TMDb, Genre, Discover
import pandas as pd

discover = Discover()
def recommend_by_preferences(
    genre: str = 'action',
    mood: str = 'intense',
    decade: str = '2000',
    popularity: str = 'popular',
    runtime: str = '120',
    top_n: str = "25"
) -> str:
    # 1) Parse top_n
    try:
        n = max(1, int(top_n))
    except ValueError:
        n = 25

    # # 2) Genre → TMDb ID
    # gid = GENRE_MAP.get(genre.strip().lower())
    # if not gid:
    #     return "I couldn’t find that genre—please try again."
    gid = 28

    # 3) Decade → date filters
    dec = decade.strip().lower()
    year_start, year_end = None, None
    if dec.endswith("s") and dec[:-1].isdigit():
        ys = int(dec[:-1])
        year_start, year_end = ys, ys + 9

    # 4) Runtime → runtime filters
    rt = runtime.replace(" ", "")
    rt_gte = rt_lte = None
    if "<" in rt:
        rt_lte = int(rt.split("<")[-1].replace("min",""))
    elif ">" in rt:
        rt_gte = int(rt.split(">")[-1].replace("min",""))
    elif "-" in rt:
        low, high = rt.split("-")
        rt_gte, rt_lte = int(low.replace("min","")), int(high.replace("min",""))

    # 5) Popularity → sort_by
    sort_by = "popularity.desc"
    if "hidden" in popularity.lower():
        sort_by = "vote_average.desc"

    # 6) Build discover params
    params = {
        "with_genres": gid,
        "sort_by": sort_by,
        "page": 1,
        "raw_response": True
    }
    if year_start and year_end:
        params["primary_release_date.gte"] = f"{year_start}-01-01"
        params["primary_release_date.lte"] = f"{year_end}-12-31"
    if rt_gte is not None:
        params["with_runtime.gte"] = rt_gte
    if rt_lte is not None:
        params["with_runtime.lte"] = rt_lte

    # 7) Fetch & trim
    raw = list(discover.discover_movies(params))[:n]
    raw = pd.DataFrame([dict(row) for row in raw])

    return raw
    # return raw

In [150]:
from dotenv import load_dotenv

load_dotenv()

tmdb = TMDb()
tmdb.api_key ='2fa30f6a1d22eb80c6dc9cac9cc67bdc'

mvs = recommend_by_preferences()

In [151]:
mvs

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/cJvUJEEQ86LSjl4gFLkYpdCJC96.jpg,"[10752, 28]",1241436,en,Warfare,A platoon of Navy SEALs embarks on a dangerous...,524.0776,/j8tqBXwH2PxBPzbtO19BTF9Ukbf.jpg,2025-04-09,Warfare,False,7.221,199
1,False,/bVm6udIB6iKsRqgMdQh6HywuEBj.jpg,"[53, 28]",1233069,de,Exterritorial,"When her son vanishes inside a US consulate, e...",391.7688,/jM2uqCZNKbiyStyzXOERpMqAbdx.jpg,2025-04-29,Exterritorial,False,6.648,266
2,False,/rthMuZfFv4fqEU4JVbgSW9wQ8rs.jpg,"[28, 878, 18]",986056,en,Thunderbolts*,After finding themselves ensnared in a death t...,383.6091,/m9EtP1Yrzv6v7dMaC9mRaGhd1um.jpg,2025-04-30,Thunderbolts*,False,7.5,718
3,False,/4A5HH9HkCPqAwyYL6CnA0mxbYjn.jpg,"[28, 80, 53]",1144430,fr,Balle perdue 3,Car genius Lino returns to conclude his vendet...,381.5997,/gO33EBU7JHd6xBjT62XMt61KoqX.jpg,2025-05-06,Last Bullet,False,6.864,59
4,False,/fTrQsdMS2MUw00RnzH0r3JWHhts.jpg,"[28, 80, 53]",1197306,en,A Working Man,Levon Cade left behind a decorated military ca...,329.0393,/6FRFIogh3zFnVWn7Z6zcYnIbRcX.jpg,2025-03-26,A Working Man,False,6.507,609
5,False,/jRvhP4AfFnJ03lCQhp1fie7XPSd.jpg,"[28, 53]",977294,en,Tin Soldier,An ex-special forces operative seeks revenge a...,302.9943,/lFFDrFLXywFhy6khHes1LCFVMsL.jpg,2025-05-22,Tin Soldier,False,0.0,0
6,False,/9yMmIou6xMtw2A61xiMsEcAhTrL.jpg,"[28, 18, 35]",897160,ko,용감한 시민,"An expelled boxing champion, who now is a high...",219.187,/ixQoOExnnvIxYvnqGgfhaWqXeXc.jpg,2023-10-25,Brave Citizen,False,7.1,35
7,False,/8eifdha9GQeZAkexgtD45546XKx.jpg,"[28, 53, 878]",822119,en,Captain America: Brave New World,After meeting with newly elected U.S. Presiden...,209.6215,/pzIddUEMWhWzfvLI3TwxUG2wGoi.jpg,2025-02-12,Captain America: Brave New World,False,6.137,1795
8,False,/65MVgDa6YjSdqzh7YOA04mYkioo.jpg,"[28, 80, 53]",668489,en,Havoc,When a drug heist swerves lethally out of cont...,188.7515,/r46leE6PSzLR3pnVzaxx5Q30yUF.jpg,2025-04-24,Havoc,False,6.537,574
9,False,/op3qmNhvwEvyT7UFyPbIfQmKriB.jpg,"[28, 14, 12]",324544,en,In the Lost Lands,A queen sends the powerful and feared sorceres...,178.8997,/dDlfjR7gllmr8HTeN6rfrYhTdwX.jpg,2025-02-27,In the Lost Lands,False,6.318,349


In [149]:
# Extract relevant columns and create list of documents
movie_data = mvs[['overview', 'id', 'original_title']].to_dict('records')
texts = [m['overview'] for m in movie_data]
metadatas = [{'id': str(m['id']), 'title': m['original_title']} for m in movie_data]

# Initialize Chroma vectorstore
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_texts(
    texts=texts,
    embedding=OpenAIEmbeddings(),
    metadatas=metadatas,
    collection_name="movies"
)

SyntaxError: unmatched ')' (3948606891.py, line 15)

In [152]:
# Extract relevant columns and create list of documents
# Create persistent directory in current repo folder
import os
current_dir = os.path.dirname(os.path.abspath("__file__"))
persist_directory = os.path.join(current_dir, "chroma_db")
os.makedirs(persist_directory, exist_ok=True)

# Extract movie data
movie_data = mvs[['overview', 'id', 'original_title']].to_dict('records')
texts = [m['overview'] for m in movie_data]
metadatas = [{'id': str(m['id']), 'title': m['original_title']} for m in movie_data]

# Initialize or load existing Chroma vectorstore
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# Try to load existing vectorstore
try:
    vectorstore = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings,
        collection_name="movies"
    )
    
    # Add new documents, skipping duplicates based on movie ID
    existing_ids = set(m['id'] for m in vectorstore.get()['metadatas'])
    new_texts = []
    new_metadatas = []
    
    for text, metadata in zip(texts, metadatas):
        if metadata['id'] not in existing_ids:
            new_texts.append(text)
            new_metadatas.append(metadata)
            
    if new_texts:
        vectorstore.add_texts(texts=new_texts, metadatas=new_metadatas)
        
except:
    # If no existing vectorstore, create new one
    vectorstore = Chroma.from_texts(
        texts=texts,
        embedding=embeddings,
        metadatas=metadatas,
        collection_name="movies",
        persist_directory=persist_directory
    )

# Ready for querying


InternalError: Database error: error returned from database: (code: 1) no such table: tenants

In [156]:
query = "enjoy hardcore action movies from 2010 that are popular hits"
results = vectorstore.similarity_search(query, k=6)  # Returns top 5 matches

In [157]:
results

[Document(metadata={'id': '668489', 'title': 'Havoc'}, page_content="When a drug heist swerves lethally out of control, a jaded cop fights his way through a corrupt city's criminal underworld to save a politician's son."),
 Document(metadata={'title': 'Havoc', 'id': '668489'}, page_content="When a drug heist swerves lethally out of control, a jaded cop fights his way through a corrupt city's criminal underworld to save a politician's son."),
 Document(metadata={'title': 'Thunderbolts*', 'id': '986056'}, page_content='After finding themselves ensnared in a death trap, seven disillusioned castoffs must embark on a dangerous mission that will force them to confront the darkest corners of their pasts.'),
 Document(metadata={'title': 'Thunderbolts*', 'id': '986056'}, page_content='After finding themselves ensnared in a death trap, seven disillusioned castoffs must embark on a dangerous mission that will force them to confront the darkest corners of their pasts.'),
 Document(metadata={'title