In [2]:
import os
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, ToolMessage
from operator import add as add_messages
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.tools import tool
from pydantic_settings import BaseSettings
# from file_path import file_path
from pathlib import Path

class Settings(BaseSettings):
    GOOGLE_API_KEY:str

setting = Settings()

os.environ["GOOGLE_API_KEY"] = setting.GOOGLE_API_KEY

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [3]:
# path = file_path(input("Enter or paste path of pdf file you want to load to llm's memory"))]
desktop = os.path.join(os.path.expanduser("~"), "Desktop")      # → /Users/you/Desktop
file_name = "africa_econ.pdf"                                      # change this
file_path = os.path.join(desktop, file_name)

try:
    if not os.path.isfile(file_path):
        raise FileNotFoundError("The path does not exist")
    
    file_pages = PyPDFLoader(file_path).load()
except Exception as e:
    print(str(e))
    raise

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=120)
docs = splitter.split_documents(file_pages)

In [5]:
chroma_dir = "./Africonomy"

if not os.path.isdir(chroma_dir):
    os.mkdir(chroma_dir)

try:
        vector_store = Chroma.from_documents(
        documents=docs,
        persist_directory=chroma_dir,
        embedding=embeddings,
        collection_name="Africonomy"
    )
except Exception as e:
      print(str(e))
      raise

In [6]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={"k":80, "score_threshold":0.5}
)

In [41]:
@tool
def retriever_tool(query:str)-> str:
    """Searches the vector database to retrive documents based on recieved query."""

    docs = retriever.invoke(query)

    if not docs:
        return "No document matches the query in the vector db"
    
    response: list[str] =[]
    
    for i, doc in enumerate(docs):
        response.append(f"document {i+1}: {doc}")

    return "\n ".join(response)


In [42]:
tools = [retriever_tool]
llm = llm.bind_tools(tools)

class AgenState(TypedDict):
    messages:Annotated[Sequence[BaseMessage], add_messages]

tools_dict = {tool.name:tool for tool in tools}

sys_prompt = """You are an intelligent research assistant. Answer quetions based on the 
documents loaded into your memory and assist the user with researches.

- Always site contents from the document
- Each sentence should begin on new line
- If user needs to be more specific, inform as such.
"""

In [44]:
def call_llm(state:AgenState)-> AgenState:
    """Calls the llm to invoke tool based on query recieved from user."""

    messages = list(state['messages']) + [SystemMessage(content=sys_prompt)]

    response = llm.invoke(messages)

    print("\n AI: ", response.content)

    return {"messages":[response]}

In [45]:
def should_continue(state:AgenState)->bool:
    """Checks the last message for tool calls to determine if to call another tool or 
    end the conversation.
    """

    last_msg = state['messages'][-1]

    return hasattr(last_msg, "tool_calls") and len(last_msg.tool_calls) > 0

In [46]:
def invoke_tool(state:AgenState)-> AgenState:
    """Invokes the needed tool and checks that the tool name exists before call."""

    tool_calls = state['messages'][-1].tool_calls #type: ignore

    results:Sequence[BaseMessage] = []

    for t in tool_calls: #type:ignore
        if not t['name'] in tools_dict: #type:ignore
            print("Invalid tool call.")
            result = f"{t['name']} is not a defined tool from the tools list."
        else:
            print(f"calling tool: {t['name']} ")
            result = tools_dict[t['name']].invoke(t['args'].get('query', '')) #type: ignore

        results.append(ToolMessage(tool_call_id=t['id'], tool_name=t['name'], content=str(result)))

    return {"messages": results}
        


In [48]:
graph = StateGraph(AgenState)

graph.add_node('llm', call_llm)
graph.add_node('retriever', invoke_tool)
graph.add_edge('retriever', 'llm')
graph.set_entry_point('llm')

graph.add_conditional_edges(
    "llm",
    should_continue,
    {
        True:'retriever',
        False:END
    }
)

app = graph.compile()

In [51]:
query = input("\n Enter query about the document African Economic outlook 2025")

while query != 'exit':
    print("\n User: ", query)

    result = app.invoke({"messages": [HumanMessage(content=query)]})
    
    query = input("\n Enter query about the document African Economic outlook 2025")

# Have to mention the name of file to trigger tool call else it'll keep asking for query.


 User:  What is the research about

 AI:  Please provide me with the query you want me to use to find the research topic.

 User:  The research on African economy

 AI:  
calling tool: retriever_tool 

 AI:  The African Economic Outlook 2025 focuses on making Africa’s capital work better for Africa’s development. It includes country notes for various African nations and regions, and discusses topics such as industrial policy, inflationary pressures, investment growth, and strategies for sustainable economic growth. The report also touches on the African Continental Free Trade Area, and the role of international and regional development banks in supporting African economies.
