In [119]:

from pymongo import MongoClient
from langchain.agents import tool
from typing import Sequence, Annotated, TypedDict
from typing import Annotated
from langgraph.graph.message import add_messages
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.messages import ToolMessage, BaseMessage, HumanMessage, SystemMessage
from langchain_community.document_loaders import PyPDFLoader
from langgraph.graph import END, StateGraph
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_community.document_transformers.openai_functions import create_metadata_tagger
from langchain_core.documents.base import Document
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel
from setting import settings
import os
from langchain_core.utils.utils import convert_to_secret_str


In [120]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", api_key=settings.GOOGLE_API_KEY)
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001", google_api_key=convert_to_secret_str(settings.GOOGLE_API_KEY))

dbname = "africonomy"
collection_name = "africa_outlook"
client = MongoClient(settings.MONGO_URI)
db = client[dbname]
collection = db[collection_name]

In [121]:
class AgentState(TypedDict):
    messages:Annotated[Sequence[BaseMessage], add_messages]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

In [124]:
file_dir = os.path.join(os.path.expanduser("~"), "Desktop")
file_name = "africa_econ.pdf"
file_path = os.path.join(file_dir, file_name)

if not os.path.isfile(file_path):
    raise FileNotFoundError("The file is not found in provided directory")

try:
    pages = PyPDFLoader(file_path).load()
except Exception as e:
    print(str(e))


In [125]:
docs = text_splitter.split_documents(pages)

meta_docs:list[Document] = []
for doc in docs:
    meta_data = {
        "title": " ".join(doc.page_content.split(" ")[:20]),
        "description": " ".join(doc.page_content.split(" ")[20:][:100]),
        "keywords": doc.page_content.split(" ")[:20] 
    }

    meta_docs.append(Document(page_content=doc.page_content, metadata=meta_data))

vector_store = MongoDBAtlasVectorSearch.from_documents(
    meta_docs,
    embeddings,
    collection=collection
)

In [144]:
vector_index_model = SearchIndexModel(
    name="afronomy_index",
    type="vectorSearch",
    definition={
        "fields":[
            {
                "type":"vector",
                "path":"embedding",
                "numDimensions":3072,
                "similarity":"cosine",
                "quantization":"scalar"
            },
            {
                "type": "filter",
                "path": "description",
            }
        ]
    }
)

result = collection.create_search_index(model=vector_index_model)
print(result)

afronomy_index


In [145]:
vector_store = MongoDBAtlasVectorSearch.from_connection_string(
    connection_string=settings.MONGO_URI,
    namespace=f"{dbname}.{collection_name}",
    embedding=embeddings,
    index_name="afronomy_index"
)

In [173]:
@tool()
def retrieval_tool(query:str):
    """Uses the query to perform vector search on the specified mongodb database"""

    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={
            "k":100,
            "score_threshold":0.01,
            "pre-filter":{
                "description": {
                    "$regex":str(query)
                }
            }
        }
    )

    results = retriever.invoke(query)
    print("Total documents found: ",len(results))

    if not results:
        return "Vector search returned nothing"
    
    return results

tools = [retrieval_tool]

tool_dict = {tool.name:tool for tool in tools}
llm = llm.bind_tools(tools)

In [None]:

sys_prompt = """
    You are a research assistant who can analyze given document and give insightful details about provided texts.
    There is a tool that calls the vector database to ensure context based documents are provided for accurate responses.

    - If a user needs to provide more details inform as such.
    - If a user asks unrelated questions, tell the user it is beyond the scope of the research.
"""

def agent_call(state:AgentState) -> AgentState:
    """Invokes the llm with state messages"""

    messages = [SystemMessage(content=sys_prompt)] + list(state['messages'])

    response = llm.invoke(messages)

    print("\n AI: ", response.content)

    return {"messages": [response]}

In [175]:
def should_continue(state:AgentState)->bool:
    """Returns a bool value. True if last message in state has tool_calls attribute 
    and the length of the tool_calls list is greater than 0 else returns false.
    """

    last_msg = state['messages'][-1]

    return hasattr(last_msg, "tool_calls") and len(last_msg.tool_calls) > 0

In [176]:
def tool_call(state:AgentState)->AgentState:
    """Invokes tool and feeds the response to the llm for generation.
    Also prevents hallucination by ensuring tool exists and not just made up by agent.
    """

    last_msg = state['messages'][-1]

    tool_calls = last_msg.tool_calls

    results = []

    for tool in tool_calls:
        if not tool['name'] in tool_dict:
            result = "Tool doesn't exist in tools list"
            print("Invalid tool name.")

        else:
            print("Calling this tool: ", tool['name'])
            result = tool_dict[tool['name']].invoke(tool['args'].get('query', ''))

        results.append(ToolMessage(tool_call_id=tool['id'], name=tool['name'], content=str(result)))

    return {"messages": results}

In [177]:
graph = StateGraph(AgentState)

graph.add_node('tools', tool_call)
graph.add_node('agent', agent_call)
graph.add_edge('tools', 'agent')
graph.set_entry_point('agent')

graph.add_conditional_edges(
    'agent',
    should_continue,
    {
        True:'tools',
        False:END
    }
)

agent = graph.compile()

In [178]:
user_input = input("\n Enter your questions about the African Economic Outlook Paper: ")

while not user_input.lower() in ['exit', 'end', 'quit']:
    print("\n Human: ", user_input)
    agent.invoke({'messages': [HumanMessage(content=user_input)]})
    user_input = input("\n Enter your questions about the African Economic Outlook Paper: ")



 Human:  what is 1+1

 AI:  This is beyond the scope of my research capabilities. I am designed to analyze documents and provide insights from text.

 Human:  explain Nigeria's economic growth projections

 AI:  
Calling this tool:  retrieval_tool
Total documents found:  100

 AI:  Nigeria's economic growth is projected to moderate to 3.2% in 2025 and 3.1% in 2026, influenced by increased global uncertainty. The economy is expected to be driven by services and industrial expansion as inflation moderates and oil production increases. However, these projections are subject to downside risks, including rising geopolitical tensions, policy uncertainty, volatile commodity prices, lower oil prices, slowdown in reform momentum, insecurity, and adverse weather events.

 Human:  what is Ghana's response to the debt issues

 AI:  
Calling this tool:  retrieval_tool
Total documents found:  100

 AI:  Ghana's response to debt issues includes:

1.  **Default and Restructuring:** In December 2022, 