In [1]:
# load the environment variables and import the necessary libraries
import os
from dotenv import load_dotenv
import openai
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import logging
import requests
from add_images_database import encode_image

logger = logging.getLogger(__name__)

load_dotenv()

# set the OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")


In [55]:
def query_movie_database(question):
    # Set up embeddings and database
    embeddings = OpenAIEmbeddings()
    db = Chroma(persist_directory="chroma", embedding_function=embeddings)
    DATA_PATH = "extracted_images/"

    # Query the database
    results = db.similarity_search_with_relevance_scores(question, k=10)
    results = [result for result in results if result[1] >= 0.6]

    # Check if results are empty
    if len(results) == 0:
        logger.info("No matching results found")
        return "No matching results found."

    # Extract metadata
    sources = [result[0].metadata["source"] for result in results]
    files = [result[0].metadata["file"] for result in results]
    documents = [result[0].page_content for result in results]

    # Separate sources that end with .png and .txt into different arrays
    png_files = [file for file in files if file.endswith(".png")]
    png_documents = [document for document, file in zip(documents, files) if file.endswith(".png")]
    txt_documents = [document for document, file in zip(documents, files) if file.endswith(".txt")]
    txt_sources = [source for source, file in zip(sources, files) if file.endswith(".txt")]
    # Create context for the AI
    context = """
    You are an expert film analyst with deep knowledge of cinema.
    You can provide detailed descriptions of movies, analyze visual elements, identify characters, scenes, and cinematography techniques.
    Use both textual information and visual cues from images to provide comprehensive answers.
    When describing movie scenes or visuals, be specific about what is shown in the images, including characters, settings,
    and visual storytelling elements.
    """

    # Construct messages for API call
    messages = [
        {
            "role": "system",
            "content": context
        },
        {
            "role": "user",
            "content": f"Here are the images with the the rank of the movie in the list and a description of the movie (if any):"
        },
        {
            "role": "user",
            "content": [    
                *[
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{encode_image(DATA_PATH + file)}"
                        }
                    }
                    for file in png_files
                ],
                *[
                    {"type": "text", "text": f"Image description: {document}"}
                    for document in png_documents
                ]
            ],
        },
        {
            "role": "user",
            "content": f"Here are the text documents from the pdf with the descriptions (if any):"
        },
        {
            "role": "user",
            "content": [    
                *[
                    {"type": "text", "text": f"{source}: {document}"}
                    for document, source in zip(txt_documents, txt_sources)
                ]
            ]
        },
        {
            "role": "user",
            "content": f"Here is the question: {question}"
        },
        {
            "role": "user",
            "content": f"append all the sources (page numbers) of the pdf pages used to answer the question in the end of the answer, do not include the images sources"
        }
    ]
    
    # Prepare API request
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 15000,
        "temperature": 0.0,
    }

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
    }

    # Make API call
    url = "https://api.openai.com/v1/chat/completions"
    response_json = requests.post(url, headers=headers, json=payload).json()

    # Extract and return the response
    response_text = response_json["choices"][0]["message"]["content"]
    return response_text

### Use case 1


In [61]:
print(query_movie_database("List the movies that is about ai and robotics with a short description of each movie"))

Here are some movies about AI and robotics:

1. **Ex Machina (2014)**
   - A suspenseful sci-fi film where a programmer is invited to his CEO's secluded home to administer the Turing test to a humanoid robot. The film explores themes of artificial intelligence, consciousness, and the ethical implications of creating sentient machines.

2. **Metropolis (1927)**
   - A classic silent film set in a dystopian future where a city is divided between the working class and city planners. It features the Maschinenmensch, a robot that plays a central role in the story, highlighting themes of class struggle and the dehumanizing effects of technology.

3. **Blade Runner (1982)**
   - Set in a future Los Angeles, the film follows a former police officer tasked with hunting down rogue replicants, bioengineered beings virtually identical to humans. It delves into themes of identity, humanity, and the moral complexities of artificial life.

**Sources:**
- Page 5
- Page 8
- Page 29


### Use case 2

In [62]:
print(query_movie_database("List the movies that have a poster with a space uniform"))

The movies with a poster featuring a space uniform are:

1. **2001: A Space Odyssey (1968)**
2. **Alien (1979)**
3. **Aliens (1986)**
4. **Galaxy Quest (1999)**

**Sources:**
- Page 27
- Page 14


### Use case 3

In [59]:
print(query_movie_database("What other movies mentioned in the comments and not in the list?"))

The comments mention several movies that are not included in the main list of the "25 Best Sci-Fi Movies of All Time" according to IGN. These movies are:

1. **Minority Report** - Mentioned as a favorite and considered underrated by some commenters.
2. **Total Recall** - Suggested by commenters as deserving a spot on the list.
3. **Predator** - Also mentioned as a potential candidate for the list.
4. **Arrival** - Praised as a masterpiece by Denis Villeneuve and suggested for inclusion.
5. **Interstellar** - Mentioned by multiple commenters as deserving a place on the list.
6. **Children of Men** - Described as a film that should be included and is often overlooked.
7. **Sunshine** - Suggested as a replacement for "Eternal Sunshine of the Spotless Mind" and described as underrated.
8. **The Terminator** - Mentioned as a classic that should be on the list, despite the preference for its sequel, "Terminator 2: Judgment Day."

These films were highlighted by users in the comments section 

### Use case 4

In [58]:
print(query_movie_database("What are the animation movies in the list?"))

The animated movie mentioned in the list of "The 25 Best Sci-Fi Movies of All Time" is "WALL-E." This film is highlighted for its imaginative storytelling and satirical social commentary on excess and pollution, presented through the eyes of a trash-bot on a future Earth overwhelmed by garbage. It is noted for its mostly silent, slapstick-prone protagonist and its emotionally charged narrative.

Sources: Page 4


### Use case 5

In [57]:
print(query_movie_database("What is the movie with rank 7?"))

The movie with rank 7 is "Metropolis" (1927).

**Sources: Page 3, Page 14, Page 21, Page 25, Page 26**
