In [76]:
# Packages Imported
import pdfplumber
import chromadb
from sentence_transformers import SentenceTransformer
import pandas as pd
import streamlit as st
from chromadb.utils import embedding_functions
import openai

In [77]:
# Extracting the text from the pdf 
with pdfplumber.open("Something_Something.pdf") as pdf:
    full_text = ""
    for page in pdf.pages:
        page_text = page.extract_text()
        if page_text:
            full_text += page_text + "\n"

In [78]:
# Count total words in the full text
total_words = len(full_text.split())
print("Total words in movie text:", total_words)

Total words in movie text: 576


In [79]:
# Split text into chunks by the keyword "Scene"
chunks = full_text.split("Scene")

# Re-add the word "Scene" to each chunk and clean up
chunks = ["Scene" + chunk.strip() for chunk in chunks if chunk.strip()]


In [80]:
# Load the Embedding Model
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

In [81]:
# Start ChromaDB and Create a Collection
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(
    name="movie_script", 
    embedding_function=embedding_function
)

In [82]:
# Store Chunks into ChromaDB
for i, chunk in enumerate(chunks):
    collection.add(
        documents=[chunk],
        ids=[f"scene_{i}"]
    )

In [83]:
# creating the prompt 
def create_prompt(context, question):
    return f"Context: {context}\n\nQuestion: {question}\nAnswer:"


In [84]:
# Getting response from gpt
openai.api_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

client = openai.OpenAI(api_key=openai.api_key )  # new client-style

def get_gpt_answer(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.5,
        max_tokens=200
    )
    return response.choices[0].message.content

In [85]:
print("Ask anything about the movie! (type 'exit' to stop)\n")

while True:
    question = input("You: ")

    if question.lower() == "exit":
        print("Chat ended.")
        break

    results = collection.query(query_texts=[question], n_results=1)
    chunk = results["documents"][0][0]  # directly get the top scene

    print("\nRetrieved Chunk:\n", chunk)


    prompt = create_prompt(chunk, question)
    answer = get_gpt_answer(prompt)

    print("\nAnswer:", answer)
    print("-" * 50)


Ask anything about the movie! (type 'exit' to stop)



You:  where did kavitha meet santhosh in the tamil movie something something ?



Retrieved Chunk:
 Scene4: Enter Santhosh The Hero
At the wedding, Kavitha meets Santhosh (Jayam Ravi), a rich, westernized NRI from London who is
also Lalithas cousin. He is fun-loving, talkative, and charming. He immediately becomes interested
in Kavitha.

Answer: Kavitha meets Santhosh at the wedding in the Tamil movie "Something Something."
--------------------------------------------------


You:  who is muthupandi to kavitha in the movie something something



Retrieved Chunk:
 Scene2: Introduction to Muthupandi and Kavitha
The flashback begins in a small Tamil Nadu village. Muthupandi is a caring, hard-working brother
who raised his younger sister Kavitha (Trisha) after their mother died. They live modestly but with
strong family values and immense affection for each other.

Answer: Muthupandi is Kavitha's caring and hard-working brother in the movie "Something Something." He raised her after their mother passed away, showing immense affection for her and instilling strong family values in their lives.
--------------------------------------------------


You:  exit


Chat ended.
