In [None]:
from PyPDF2 import PdfReader
import os
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from uuid import uuid4
from langchain.chat_models import init_chat_model
from langchain import hub
import streamlit as st

In [None]:
#loading pdf
import os
raw = []
path = r"C:\university\mov_script_proj\scripts"
for file in os.listdir(path):
    file_path = os.path.join(path, file)
    pdfreader = PdfReader(file_path)
    text = ""
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        text+=content
    raw.append(text)

In [3]:
#embedding function
def get_embeddings():
    embeddings = OpenAIEmbeddings(
        model = "text-embedding-ada-002")
    return embeddings

In [None]:
# creating chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

chunks = []
for text in raw:
    for chunk in text_splitter.create_documents([text]):
        chunks.append(chunk)


In [4]:
#creating vector store
vector_store = Chroma(
    collection_name="movie-scripts",
    embedding_function=get_embeddings(),
    persist_directory="movie-scripts-vdb"
)



  vector_store = Chroma(


In [None]:
#adding documents to vector store
uuids = [str(uuid4()) for _ in range(len(chunks))]
vector_store.add_documents(ids=uuids, documents=chunks)


['1865a8ea-7a01-40a0-ad68-e46712b35975',
 '6c4f30df-0976-4f42-95e5-dcb7e0dd4e62',
 '1d8f1603-1b5a-443d-ac1c-a44d7ff59828',
 'efe9be68-41fd-40db-ba45-4f0ef576574c',
 'b6bcdc0a-b944-4740-9396-8f814c2e95d4',
 'b3a9bae7-6498-4228-a24c-968a9e05ea89',
 'e03e0392-0348-4b4d-9097-884268f4553e',
 'd57043b2-fc13-4200-9d92-b883a8ff447f',
 '1f65895d-0442-4946-9e3d-b16d34bdf82e',
 '1f381d1a-be48-43e6-8578-07ecc00e6342',
 '7f7fb76c-5fd0-4101-bd7e-17155c0477ac',
 '04be4728-e547-4519-9a09-e09af6085763',
 'f66e8ea2-0b1c-443d-b805-1ef7d0449dfa',
 '841791c4-e6a3-41fa-93c8-9e8e7d80f3f1',
 '5187ab48-b145-467e-a528-cd16c38446a7',
 '967e15bd-5a52-4b81-879e-51f871855fd1',
 '59564101-1446-413c-90a1-39ebf54d7301',
 'dd230892-b888-46cf-961f-4fdf611d7710',
 '03df975c-3f3c-4901-97a6-9284640f27a1',
 'ff9f0118-dda0-4ae5-8660-2408b6a80420',
 'b8dd5877-142b-4aff-8b6d-d8a04283fa57',
 '37223263-dbd8-4980-99d3-625df3cc7035',
 '71753d4f-1603-4b66-be4b-b48b2552d820',
 '4644df43-7e0e-4390-8301-da47e2acbcf1',
 '7745b63a-449f-

In [5]:
#loading chat model
llm = init_chat_model(
    "llama3-8b-8192", 
    model_provider="groq")

In [6]:
#retrieving and generating answer

prompt = hub.pull("rlm/rag-prompt")
question = "in the movie parasite, who kills who?"

retrieved_docs = vector_store.similarity_search(question)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
prompt = prompt.invoke({"question": question, "context": docs_content})
answer = llm.invoke(prompt)
print(answer.content)



I don't know who kills who in the movie Parasite, as this context only appears to be a portion of the movie's script, not a summary of the entire plot.


In [20]:
print(docs_content)

COBB 
Guilt. I feel guilt. And however 
confused I might get. 133. 
(MORE) However lost I might seem... it's 
always there. Telling me something. 
Reminding me of the truth. 
MAL 
What truth? 
COBB 
That you were wrong to doubt our 
world. That the idea that drove you 
to question your reality was a 
lie... 
MAL 
How could you know  it was a lie? 
COBB 
Because it was my lie. 
MAL 
(realizing) 
Because you planted the idea in my 
mind. 
COBB 
Because I performed inception on my 
own wife, then reaped the bitter 
rewards... 
ARIADNE 
Why? 
COBB 
We'd become lost in here. Living in 
a world of infinite possibilities. 
A world where we were gods. I 
realized we needed to escape, but 
she'd locked away her knowledge of 
the unreality of this world... 
INSERT CUT: Mal opens the doll's house. Takes the s pinning 
top, lies it down in the safe. LOCKS IT AWAY. 
COBB 
I couldn't make Mal understand that 
we needed to break free. To die. So 
I started to search our world...

the duct... he raise

langgraph

In [1]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict

#definging graph state
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [None]:
#nodes
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    prompt = hub.pull("rlm/rag-prompt")
    prompt = prompt.invoke({"question": state["question"], "context": docs_content})
    answer = llm.invoke(prompt) 
    return {"answer": answer.content}

In [None]:
from langgraph.graph import START, StateGraph

#building graph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [36]:
answer = graph.invoke({"question": "Who orders nagini to kill snape?"})
print(answer['answer'])



Voldemort orders Nagini to kill Snape, saying "Kill" and pointing the wand at Nagini.


In [37]:
#stream
for step in graph.stream(
    {"question": "what is the tool the people use in dreams in the movie inception called?"}, stream_mode="updates"
):
    print(f"{step}\n\n----------------\n")

{'retrieve': {'context': [Document(metadata={}, page_content='INCEPTION \nBy \nChristopher Nolan \nSHOOTING SCRIPT FADE IN: \nDAWN. CRASHING SURF. \nThe waves TOSS a BEARDED MAN onto wet sand. He lies  there. \nA CHILD’S SHOUT makes him LIFT his head to see: a L ITTLE \nBLONDE BOY crouching, back towards us, watching the  tide eat \na SANDCASTLE. A LITTLE BLONDE GIRL joins the boy. T he Bearded \nMan tries to call them, but they RUN OFF, FACES UNS EEN. He \nCOLLAPSES. \nThe barrel of a rifle ROLLS the Bearded Man onto hi s back. A \nJAPANESE SECURITY GUARD looks down at him, then cal ls up the \nbeach to a colleague leaning against a JEEP. Behind  them is a \ncliff, and on top of that, a JAPANESE CASTLE. \nINT. ELEGANT DINING ROOM, JAPANESE CASTLE y LATER \nThe Security Guard waits as an ATTENDANT speaks to an ELDERLY \nJAPANESE MAN sitting at the dining table, back to u s. \nATTENDANT \n(in Japanese) \nHe was delirious. But he asked for \nyou by name. And... \n(to the Security Guard) 



{'generate': {'answer': 'The tool used in dreams in the movie Inception is called a "totem."'}}

----------------

