In [1]:
import os, getpass
from dotenv import load_dotenv, find_dotenv
from rapidfuzz import process
from langchain_deepseek import ChatDeepSeek
from langchain_core.prompts import ChatPromptTemplate
import json
from sentence_transformers import SentenceTransformer
import lancedb

### Initialize deepseek, lancedb and embedding model

In [2]:
# LLM
_ = load_dotenv(find_dotenv())
def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("DEEPSEEK_API_KEY")
llm = ChatDeepSeek(model="deepseek-chat")

In [3]:
# Lance DB
uri="db://screenplay-agent-msgtqr"
api_key="sk_WZI5Y7IPRBEPBIM7JTOQMWAQDGUM22643EDAWLBZSOJ2Z3SBRMWA===="
region="us-east-1"
table_name = "documents"

db = lancedb.connect(
  uri=uri,
  api_key=api_key,
  region=region
)

table = db.open_table(table_name)

In [4]:
# Embedding Model
model = SentenceTransformer('all-mpnet-base-v2')

In [5]:
# Movie List
with open('../../data/movie_list.json', 'r') as f:
    movie_list = json.load(f)
movie_titles = [key for key in movie_list.keys()]

In [6]:
def fuzzy_match_title(user_input, titles, threshold=80):
    match = process.extractOne(user_input, titles, score_cutoff=threshold)
    return match[0] if match else None


In [7]:
def check_movie_exists(movie_title):
    """
    Pass the original movie title to the function to check if the movie exists in the database.

    :param movie_title: The exact movie title to search for in the database.
    :type movie_title: str
    :return: The details of the movie if it exists, based on the fuzzy-matched title.
    :rtype: dict
    """
    fuzzy_movie = fuzzy_match_title(movie_title, movie_titles)
    if fuzzy_movie and movie_list[fuzzy_movie]:
        return f"Script for {fuzzy_movie} is available"
    return False
    # return f"Script for {fuzzy_movie} is {movie_list[fuzzy_movie]}"
check_movie_exists("The Dark Knight")

False

In [8]:
def retrieve_documents(title, text_query):
    """
    Retrieves a list of document texts based on the given movie title and text query.

    :param str title: The title of the movie
    :param str text_query: The textual content or query to match with the database.
    :return: A list of top three matched document texts.
    :rtype: list[str]
    """
    vector_query = model.encode(text_query)
    return table.search(query_type="hybrid").where(f"title='{title}'", prefilter=True).limit(3).vector(vector_query).text(text_query).to_pandas()['text'].to_list()

# retrieve_documents("12 Monkeys", "Parking Lot Airport")

In [10]:
# user_input = "12 Monkeys airport parking lot scene"
# check_movie_exists(user_input)

In [9]:
from langgraph.graph import MessagesState
from langchain_core.messages import SystemMessage, HumanMessage

# System message
sys_msg = SystemMessage(content="You are an agent that can answer questions about movie scripts. When user asks something ask check if the movie exists first.")
tools = [check_movie_exists, retrieve_documents]
llm_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)
# Node
def assistant(state: MessagesState):
   return {"messages": [llm_with_tools.invoke([sys_msg] + state["messages"])]} # Storing only the last 6 messages to reduce context length

In [10]:
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from IPython.display import Image, display
from langgraph.checkpoint.memory import MemorySaver

# Graph
builder = StateGraph(MessagesState)

# Define nodes: these do the work
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
    # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
    tools_condition,
)
builder.add_edge("tools", "assistant")
# react_graph_memory = builder.compile(checkpointer=memory)
# react_graph = builder.compile()

# Show
# display(Image(react_graph.get_graph(xray=True).draw_mermaid_png()))
# display(Image(react_graph_memory.get_graph(xray=True).draw_mermaid_png()))

<langgraph.graph.state.StateGraph at 0x20342b809b0>

### Without Memory

In [11]:
react_graph = builder.compile()
react_graph.invoke({"messages": "What did Neo purchase at 7-11 according to Cypher's report in The Matrix?"})

{'messages': [HumanMessage(content='What does the Young Man say he is illustrating when discussing robbing banks compared to their current activities in Pulp Fiction?', additional_kwargs={}, response_metadata={}, id='f89889e1-25a6-499b-b9e7-ec87c435309d'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_0_2db115a8-5d60-4112-9040-5f0699c760a4', 'function': {'arguments': '{"movie_title":"Pulp Fiction"}', 'name': 'check_movie_exists'}, 'type': 'function', 'index': 0}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 343, 'total_tokens': 366, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 256}, 'prompt_cache_hit_tokens': 256, 'prompt_cache_miss_tokens': 87}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'id': 'a73655b5-19dc-465c-9ba7-74601b627d5a', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='r

In [12]:
react_graph.invoke({"messages": "What did Neo purchase at 7-11 according to Cypher's report in The Matrix?"})


{'messages': [HumanMessage(content="What did Neo purchase at 7-11 according to Cypher's report in The Matrix?", additional_kwargs={}, response_metadata={}, id='a16a45fc-6978-4b50-bd34-de2d322f8727'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_0_aefa4b5b-3600-4a2e-bd45-36001f8cd339', 'function': {'arguments': '{"movie_title":"The Matrix"}', 'name': 'check_movie_exists'}, 'type': 'function', 'index': 0}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 338, 'total_tokens': 361, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 256}, 'prompt_cache_hit_tokens': 256, 'prompt_cache_miss_tokens': 82}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'id': '0a7fa6df-53fe-4282-a414-a4c5d8ae36cb', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--f4911f89-c054-455f-81f4-5f20a751ec23-0', tool_calls=[{'

### With Memory


In [13]:
# Memory
memory = MemorySaver()
# Specify a thread
config = {"configurable": {"thread_id": "1"}}
react_graph_memory = builder.compile(checkpointer=memory)

In [14]:
react_graph_memory.invoke({"messages": "Hi, what can you do?"}, config)

{'messages': [HumanMessage(content='Hi, what can you do?', additional_kwargs={}, response_metadata={}, id='e9932dd9-97ee-4a25-96e7-6c50de8807d8'),
  AIMessage(content="I can help you answer questions about movie scripts! For example, I can:\n\n1. **Check if a movie exists** in the database.\n2. **Retrieve specific parts of a movie script** based on your query (e.g., a character's dialogue, a scene description, or a particular line).\n3. **Provide summaries or details** about scenes, characters, or plot points if the information is available in the script.\n\nJust let me know the movie title and what you're looking for, and I'll do my best to assist you!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 326, 'total_tokens': 438, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 64}, 'prompt_cache_hit_tokens': 64, 'prompt_cache_miss_tokens': 262}, 'model_name': 'deepseek-c

In [15]:
react_graph_memory.invoke({"messages": "Do you have Batman Begins Script?"}, config)

{'messages': [HumanMessage(content='Hi, what can you do?', additional_kwargs={}, response_metadata={}, id='e9932dd9-97ee-4a25-96e7-6c50de8807d8'),
  AIMessage(content="I can help you answer questions about movie scripts! For example, I can:\n\n1. **Check if a movie exists** in the database.\n2. **Retrieve specific parts of a movie script** based on your query (e.g., a character's dialogue, a scene description, or a particular line).\n3. **Provide summaries or details** about scenes, characters, or plot points if the information is available in the script.\n\nJust let me know the movie title and what you're looking for, and I'll do my best to assist you!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 326, 'total_tokens': 438, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 64}, 'prompt_cache_hit_tokens': 64, 'prompt_cache_miss_tokens': 262}, 'model_name': 'deepseek-c

In [16]:
react_graph_memory.invoke({"messages": "12 monkeys airport parking lot scene"}, config)

{'messages': [HumanMessage(content='Hi, what can you do?', additional_kwargs={}, response_metadata={}, id='e9932dd9-97ee-4a25-96e7-6c50de8807d8'),
  AIMessage(content="I can help you answer questions about movie scripts! For example, I can:\n\n1. **Check if a movie exists** in the database.\n2. **Retrieve specific parts of a movie script** based on your query (e.g., a character's dialogue, a scene description, or a particular line).\n3. **Provide summaries or details** about scenes, characters, or plot points if the information is available in the script.\n\nJust let me know the movie title and what you're looking for, and I'll do my best to assist you!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 326, 'total_tokens': 438, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 64}, 'prompt_cache_hit_tokens': 64, 'prompt_cache_miss_tokens': 262}, 'model_name': 'deepseek-c

In [17]:
react_graph_memory.invoke({"messages": "Do you have F1 movie script?"}, config)

{'messages': [HumanMessage(content='Hi, what can you do?', additional_kwargs={}, response_metadata={}, id='e9932dd9-97ee-4a25-96e7-6c50de8807d8'),
  AIMessage(content="I can help you answer questions about movie scripts! For example, I can:\n\n1. **Check if a movie exists** in the database.\n2. **Retrieve specific parts of a movie script** based on your query (e.g., a character's dialogue, a scene description, or a particular line).\n3. **Provide summaries or details** about scenes, characters, or plot points if the information is available in the script.\n\nJust let me know the movie title and what you're looking for, and I'll do my best to assist you!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 326, 'total_tokens': 438, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 64}, 'prompt_cache_hit_tokens': 64, 'prompt_cache_miss_tokens': 262}, 'model_name': 'deepseek-c