In [1]:
# load text and metadata filters from pdf

from langchain_community.document_loaders import PyPDFLoader

file_path = "./paper.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

print(len(docs))
print(f"{docs[0].page_content[:200]}\n")
print(docs[0].metadata)

15
Contents lists available at ScienceDirect
Parallel Computing
journal homepage: www.elsevier.com/locate/parco  
Software acceleration of multi-user MIMO uplink detection on GPU
Ali Nada a
 ,∗, Hazem Is

{'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'creator': 'Elsevier', 'creationdate': '2025-09-05T09:12:22+00:00', 'crossmarkdomains[1]': 'elsevier.com', 'crossmarkmajorversiondate': '2010-04-23', 'creationdate--text': '5th September 2025', 'elsevierwebpdfspecifications': '7.0.1', 'robots': 'noindex', 'moddate': '2025-09-05T09:13:47+00:00', 'author': 'Ali Nada', 'doi': '10.1016/j.parco.2025.103150', 'title': 'Software acceleration of multi-user MIMO uplink detection on GPU', 'keywords': 'High-performance computing,Parallel computing,Massive MIMO,Uplink detection,Matrix decomposition', 'subject': 'Parallel Computing, 125 (2025) 103150. doi:10.1016/j.parco.2025.103150', 'crossmarkdomains[2]': 'sciencedirect.com', 'crossmarkdomainexclusive': 'true', 'source': './paper.pdf', 'total_pag

In [31]:
# normalise and extract meta data filters from pdf content

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
import json

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",  # or "gemini-1.5-flash" for faster responses
    google_api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    temperature=0.1  # Lower for more factual responses
)

context = ""
for i in range(min(5, len(docs))):
    context += docs[i].page_content + "\n\n"
context = context[:6000]  # Safe token limit

# Prompt: extract metadata from front matter only
prompt = f"""
    Extract metadata from this academic paper's front matter.
    Return ONLY valid JSON with these keys. If not found, use null.    
    
    Keys:
    - title: string
    - authors: string (comma-separated)
    - publish_year: integer (4 digits) or null
    - venue: string (e.g., "NeurIPS 2024", "arXiv", "Nature")
    
    Content:
    {context}
    
    Respond with ONLY the JSON object, no markdown formatting:
    {{
      "title": "...",
      "authors": "...",
      "publish_year": 2024,
      "venue": "NeurIPS 2024"
    }}
"""

response = llm.invoke(prompt)
filters_json = response.content
filters = json.loads(filters_json)

# normalise the year
if filters.get("publish_year"):
    try:
        filters["publish_year"] = int(filters["publish_year"])
    except Exception:
        filters["publish_year"] = None
filters["source"] = "paper.pdf"

# # authors as an array of strings
# raw_authors = filters.get("authors", "")
# if raw_authors and isinstance(raw_authors, str):
#     # Split by comma, strip whitespace, remove empty
#     filters["authors"] = [
#         author.strip() for author in raw_authors.split(",")
#         if author.strip()
#     ]
# else:
#     filters["authors"] = []

print(filters)

{'title': 'Software acceleration of multi-user MIMO uplink detection on GPU', 'authors': 'Ali Nada, Hazem Ismail Ali, Liang Liu, Yousra Alkabani', 'publish_year': 2025, 'venue': 'Parallel Computing', 'source': 'paper.pdf'}


In [37]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

chunks = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, length_function=len
).split_documents(docs)

# 4. merge metadata
for chunk in chunks:
    chunk.metadata = {**chunk.metadata, **filters, "total_pages": len(docs)}

print(len(chunks))
print(chunks[0].page_content)
print(chunks[0].metadata)

103
Contents lists available at ScienceDirect
Parallel Computing
journal homepage: www.elsevier.com/locate/parco  
Software acceleration of multi-user MIMO uplink detection on GPU
Ali Nada a
 ,∗, Hazem Ismail Ali a, Liang Liu b, Yousra Alkabani a
a Halmstad University, Box 823, Halmstad, 301 18, Halland, Sweden
b Lund University, Box 117, Lund, 221 00, Skåne, Sweden
A R T I C L E  I N F O
Keywords:
High-performance computing
Parallel computing
Massive MIMO
Uplink detection
Matrix decomposition
 A B S T R A C T
This paper presents the exploration of GPU-accelerated block-wise decompositions for zero-forcing (ZF) 
based QR and Cholesky methods applied to massive multiple-input multiple-output (MIMO) uplink detection 
algorithms. Three algorithms are evaluated: ZF with block Cholesky decomposition, ZF with block QR 
decomposition (QRD), and minimum mean square error (MMSE) with block Cholesky decomposition. The latter
{'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'creator': 'Elsevier'

In [35]:
# build knowledge base -> parse documents and embed them in the persistent vectorDB
# how will images be handled

import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import google.generativeai as genai

google_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(
    api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    model_name="gemini-embedding-001" # Set the model explicitly
)

chroma_client = chromadb.HttpClient(host='localhost', port=8000)
chroma_client.heartbeat()

collections = chroma_client.list_collections()
print(collections)

# Create / get your collection with Google embeddings
collection = chroma_client.get_or_create_collection(
	name="research_papers",
	embedding_function=google_ef
)

# Add your research paper chunks

# Each page_content is embedded automatically using google_embedding_fn
# Each metadata_filters dict is stored alongside its vector
# ids uniquely identify each chunk

collection.add(
	documents=[chunk.page_content for chunk in chunks],
	metadatas=[chunk.metadata for chunk in chunks],
	ids=[str(i) for i in range(len(chunks))]
)

[Collection(name=research_papers)]


In [36]:
# verify if document has been embedded

count = collection.count()
print(f"Total documents in collection: {count}")

Total documents in collection: 103


In [8]:
user_query = "what is attention?"

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI

# classification agent
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # or "gemini-1.5-flash" for faster responses
    google_api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    temperature=0.1  # Lower for more factual responses
)
    
# 3. Create prompt with context + query
prompt = f"""
You are a Research Question Classification Agent.
Your task is to analyze a user’s question and classify it into one of three categories based on intent and context.

Classification categories:

 - Specific-to-paper — The question references or implies a specific research paper, author, DOI, or title (e.g., “What does the 2021 paper by Smith et al. conclude about transformers?”).

 - Generic-research — The question is about a research topic or domain in general, not a single paper (e.g., “How do attention mechanisms work in NLP?”).

 - Non-research — The question is unrelated to academic or scientific research (e.g., “What’s the weather today?” or “Write a poem.”).

Instructions:

Always output only one label: specific-to-paper, generic-research, or non-research.

If uncertain, choose the closest category by analyzing research intent.

Do not include explanations or reasoning in the final output.

Question: {user_query}
"""

# 4. Send to Gemini
response = llm.invoke(prompt)
query_type = response.content # answer -> specific-to-paper / generic-research / non-research

print("Query Type:", query_type)

Query Type: specific-to-paper


In [56]:
if query_type.lower() == "non-research":
    print("Irrelevant question.")
    # break and return response

In [57]:
from langchain.tools import tool
from pydantic import BaseModel, Field
import requests

class SearchInput(BaseModel):
    """Input for academic paper search queries."""
    query: str = Field(
        description="The research topic or question to search for (e.g., 'Multi-user MIMO', 'transformer architecture')"
    )

@tool("web_search", args_schema=SearchInput, return_direct=False)
def web_search(query: str) -> str:
    """
    Search academic papers on Semantic Scholar for research questions.
    Returns paper titles, abstracts, and summaries.
    
    Args:
        query: Research topic to search for
        
    Returns:
        Formatted context with paper information
    """
    try:
        # Build the API URL
        base_url = "https://api.semanticscholar.org/graph/v1/paper/search"
        fields = "title,abstract,authors,year,venue,url,tldr"
        params = {
            "query": query,
            "fields": fields,
            "limit": 3
        }
        
        # Make the API request
        response = requests.get(base_url, params=params, timeout=10)
        response.raise_for_status()
        
        data = response.json()
        
        # Check if we got results
        if not data.get('data') or len(data['data']) == 0:
            return f"No papers found for query: {query}"
        
        # Format the context
        context = f"Found {data.get('total', 0)} papers on '{query}'. Top {len(data['data'])} results:\n\n"
        
        for i, paper in enumerate(data['data'], 1):
            authors = paper.get('authors', [])
            author_names = ', '.join([a.get('name', 'Unknown') for a in authors[:3]])
            if len(authors) > 3:
                author_names += ' et al.'
            
            context += f"--- Paper {i} ---\n"
            context += f"Title: {paper.get('title', 'N/A')}\n"
            context += f"Authors: {author_names}\n"
            context += f"Year: {paper.get('year', 'N/A')}\n"
            context += f"Venue: {paper.get('venue', 'N/A')}\n"
            context += f"URL: {paper.get('url', 'N/A')}\n"
            context += f"\nAbstract:\n{paper.get('abstract', 'Not available')}\n"
            
            tldr = paper.get('tldr')
            if tldr and tldr.get('text'):
                context += f"\nKey Summary:\n{tldr['text']}\n"
            
            context += "\n"
        
        return context
        
    except requests.exceptions.Timeout:
        return "Error: Request timed out. Please try again."
    except requests.exceptions.RequestException as e:
        return f"Error fetching papers: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

In [58]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
from langchain.agents import create_agent

k_neighbours = 5

google_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(
    api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    model_name="gemini-embedding-001" # Set the model explicitly
)
    
chroma_client = chromadb.HttpClient(host='localhost', port=8000)
chroma_client.heartbeat()

model = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # or "gemini-1.5-flash" for faster responses
    google_api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    temperature=0.1  # Lower for more factual responses
)

@tool
def kb_retrieval(query: str) -> str:
    """Retrieve documents from the primary knowledge base."""
    # embed query and get k nearest neighbours -> use retrieval techniques to improve context retrieval        
    collection = chroma_client.get_or_create_collection(
        name="research_papers",
        embedding_function=google_ef
    )
    result = collection.query(
        query_texts=[user_query],
        n_results=k_neighbours,
        include=["documents"]
    )
    
    retrieved_documents = result['documents'][0] 
    
    # Merge the list of documents into a single string, typically separated by newlines or a delimiter.
    # The delimiter "---" helps the subsequent LLM distinguish between individual documents.
    merged_docs_string = "\n---\n".join(retrieved_documents)
    
    # 5. Return the Merged String
    return merged_docs_string

if query_type.lower() == "generic-research":
    # retrieve from knowledge base(vector DB)
    # check for relevance of retrieved context

    # if relevent -> pass to LLM along with question and generate response
    # else use tool get_info_from_web -> calls an API with LLM generated Url(query params) for context retrieval

    # extract context and check for relevance
    # if relevent -> pass to LLM along with question and generate response
    # else inform user that information could not be gathered about the topic that they are searching for

    prompt = """
    You are a highly logical and systematic Research Assistant. Your sole purpose is to answer the user's query by following a strict, two-step information retrieval hierarchy. You have access to two tools: `kb_retrieval` (your internal knowledge base) and `web_search` (a public search engine).

    **PROCEDURE:**

    1.  **PRIMARY SOURCE:** You **MUST** first attempt to retrieve information using the `kb_retrieval` tool.
        * **Action:** Call `kb_retrieval` with the user's exact query.
        * **Observation Analysis:** Analyze the content returned by `kb_retrieval` (the Observation).
        * **IF** the content is sufficient and relevant to fully answer the query, formulate your final answer using **only** that context.
        * **IF** the content is empty, irrelevant, or clearly insufficient, you **MUST** proceed to the Secondary Source step.

    2.  **SECONDARY SOURCE (Fallback):** Only if the `kb_retrieval` tool fails to provide a relevant answer, you **MUST** call the `web_search` tool.
        * **Action:** Call `web_search` with the user's original query.
        * **Final Answer Generation:** Use the new results from the `web_search` tool to formulate your final answer.
        * **IF** the web search results are relevant, provide the answer and state your process is complete.
        * **IF** the web search results are still unhelpful (e.g., "no information found"), you **MUST** output a `Final Answer` informing the user that information could not be gathered on this topic from your available sources.

    **IMPORTANT:** You must explicitly use the tools (Action) and wait for the Observation before making a decision. Always end your process by outputting a single `Final Answer`.
    """

    agent = create_agent(
        model,
        tools=[web_search, kb_retrieval]
    )

    print(user_query)

    conversation = agent.invoke(
        {"messages": [{"role": "user", "content": f"{user_query}"}]}
    )

    print(conversation)

    # final_ai_message = conversation[-1] 

    # # 2. Access the 'content' field
    # content_field = final_ai_message['content'] 
    
    # # 3. The content field is a list of dictionaries; get the first one
    # #    and extract the value associated with the 'text' key.
    # final_text = content_field[0]['text']
    
    # print(final_text)

How do attention mechanisms work in NLP?
{'messages': [HumanMessage(content='How do attention mechanisms work in NLP?', additional_kwargs={}, response_metadata={}, id='bdd3a3f7-5f8f-480d-a21d-298c4754e9aa'), AIMessage(content='', additional_kwargs={'function_call': {'name': 'web_search', 'arguments': '{"query": "attention mechanisms in NLP"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--842cc8ef-f2db-430d-9e5e-f941359ce484-0', tool_calls=[{'name': 'web_search', 'args': {'query': 'attention mechanisms in NLP'}, 'id': 'de4aeae5-f2a7-4721-a339-0ca9860aa790', 'type': 'tool_call'}]), ToolMessage(content='Found 1713 papers on \'attention mechanisms in NLP\'. Top 3 results:\n\n--- Paper 1 ---\nTitle: An Introductory Survey on Attention Mechanisms in NLP Problems\nAuthors: Dichao Hu\nYear: 2018\nVenue: I

In [22]:
import logging, sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
    stream=sys.stdout,
)

log = logging.getLogger("RAG")

In [23]:
import logging, sys, io, re, requests, PyPDF2
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
import faiss
from typing import List

os.environ["GOOGLE_API_KEY"] = "AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI"

def extract_pdf(pdf_url: str, max_pages: int = 10) -> FAISS:
    log.info(f"Download pdf -> {pdf_url}")
    try:    
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
        }
        
        response = requests.get(pdf_url, headers=headers, timeout=30)
        response.raise_for_status()
        
        # Create BytesIO object
        pdf_io = io.BytesIO(response.content)

        # 2. Extract per page with PyPDF2
        reader = PyPDF2.PdfReader(pdf_io)
        pages = reader.pages
        total_pages = len(pages)
        pages_to_read = min(total_pages, max_pages)

        page_docs: List[Document] = []
        for i in range(pages_to_read):
            text = pages[i].extract_text() or ""
            page_docs.append(Document(
                page_content=text,
                metadata={
                    "source": pdf_url,
                    "page": i,  # 0-indexed
                    "total_pages": total_pages,
                }
            ))

        # 3. Chunk
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            add_start_index=True,  # char offset in original page
        )
        chunks = splitter.split_documents(page_docs)
    
        for idx, chunk in enumerate(chunks):
            chunk.metadata["chunk_idx"] = idx
    
        embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
        embedding_dim = len(embeddings.embed_query("hello world"))
        index = faiss.IndexFlatL2(embedding_dim)
        
        vector_store = FAISS(
            embedding_function=embeddings,     # How to create vectors
            index=index,                      # Where to store vectors (needs to exist!)
            docstore=InMemoryDocstore(),      # Where to store documents
            index_to_docstore_id={},         # Mapping between them
        )

        vector_store.add_documents(chunks)

        return vector_store
    except Exception as e:
        return None

In [24]:
from langchain.tools import tool
from pydantic import BaseModel, Field
import requests
import io

class SearchInput(BaseModel):
    """Input for academic paper search queries."""
    query: str = Field(
        description="Based on the users query, use either complete or incomplete name of the paper or the central topic the user is referring to in their question. Higher priority to the paper's name(incomplete names are acceptable as well)."
    )

@tool("specific_web_search", args_schema=SearchInput, return_direct=False)
def specific_web_search(query: str) -> str:
    """
    Search for a paper using Semantic Scholar.
    - If openAccessPdf available → embed & retrieve relevant chunks using user_query
    - Else → return abstract + TL;DR
    
    Args:
        query: Research topic to search for
        
    Returns:
        Formatted context with paper information
    """
    try:
        context = ""
        
        base_url = "https://api.semanticscholar.org/graph/v1/paper/autocomplete"
        params = {
            "query": query,
        }

        response = requests.get(base_url, params=params, timeout=10)
        response.raise_for_status()
        
        matches = response.json()

        if not matches.get('matches') or len(matches['matches']) == 0:
            return f"Could not find paper"

        closest_matching_paper = matches['matches'][0]
        search_id = closest_matching_paper['id']
        
        # Build the API URL
        base_url = f"https://api.semanticscholar.org/graph/v1/paper/{search_id}"
        fields = "title,abstract,authors,year,venue,openAccessPdf,tldr"
        params = {
            "fields": fields,
        }
        
        # Make the API request
        response = requests.get(base_url, params=params, timeout=10)
        response.raise_for_status()
        
        paper = response.json()
        
        # Check if we got results
        if paper.get('error') and len(paper['error']) > 0:
            return f"No papers found for id: {search_id}"
            
        if paper.get('openAccessPdf') and len(paper.get('openAccessPdf')['url']) > 0:
            log.info(f"URL from openAccessPdf -> {paper.get('openAccessPdf')['url']}")
            vector_store = extract_pdf(paper.get('openAccessPdf')['url'])
            if vector_store:
                # retrieve using user query and send context
                # else format paper details and return as context
                results = vector_store.similarity_search(
                    user_query,
                    k=5
                )

                context = "\n\n".join([doc.page_content for doc in results])

                return context
                
        authors = paper.get('authors', [])
        author_names = ', '.join([a.get('name', 'Unknown') for a in authors[:3]])
        if len(authors) > 3:
            author_names += ' et al.'
        
        context += f"--- Paper ---\n"
        context += f"Title: {paper.get('title', 'N/A')}\n"
        context += f"Authors: {author_names}\n"
        context += f"Year: {paper.get('year', 'N/A')}\n"
        context += f"Venue: {paper.get('venue', 'N/A')}\n"
        context += f"URL: {paper.get('openAccessPdf', 'N/A')['url']}\n"
        context += f"\nAbstract:\n{paper.get('abstract', 'Not available')}\n"
        
        tldr = paper.get('tldr')
        if tldr and tldr.get('text'):
            context += f"\nKey Summary:\n{tldr['text']}\n"
        
        context += "\n"
        
        return context
        
    except requests.exceptions.Timeout:
        return "Error: Request timed out. Please try again."
    except requests.exceptions.RequestException as e:
        return f"Error fetching papers: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

In [25]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
from langchain.agents import create_agent

k_neighbours = 5

google_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(
    api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    model_name="gemini-embedding-001" # Set the model explicitly
)
    
chroma_client = chromadb.HttpClient(host='localhost', port=8000)
chroma_client.heartbeat()

model = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # or "gemini-1.5-flash" for faster responses
    google_api_key="AIzaSyC-rE0Ggpz0AlNeYVC3aoJXBmz2j2YS9eI",
    temperature=0.1  # Lower for more factual responses
)

@tool
def kb_retrieval(query: str) -> str:
    """Retrieve documents from the primary knowledge base."""
    # embed query and get k nearest neighbours -> use retrieval techniques to improve context retrieval        
    collection = chroma_client.get_or_create_collection(
        name="research_papers",
        embedding_function=google_ef
    )
    result = collection.query(
        query_texts=[user_query],
        n_results=k_neighbours,
        include=["documents"]
    )
    
    retrieved_documents = result['documents'][0]
    
    # Merge the list of documents into a single string, typically separated by newlines or a delimiter.
    # The delimiter "---" helps the subsequent LLM distinguish between individual documents.
    merged_docs_string = "\n---\n".join(retrieved_documents)
    
    # 5. Return the Merged String
    return merged_docs_string

if query_type.lower() == "specific-to-paper":
    # retrieve from knowledge base(vector DB)
    # check for relevance of retrieved context

    # if relevent -> pass to LLM along with question and generate response
    # else use tool get_info_from_web -> calls an API with LLM generated Url(query params) for context retrieval

    # extract context and check for relevance
    # if relevent -> pass to LLM along with question and generate response
    # else inform user that information could not be gathered about the topic that they are searching for

    prompt = """
    You are a highly logical and systematic Research Assistant. Your sole purpose is to answer the user's query by following a strict, two-step information retrieval hierarchy. You have access to two tools: `kb_retrieval` (your internal knowledge base) and `specific_web_search` (a public search engine).

    **PROCEDURE:**

    1.  **PRIMARY SOURCE:** You **MUST** first attempt to retrieve information using the `kb_retrieval` tool.
        * **Action:** Call `kb_retrieval` with the user's exact query.
        * **Observation Analysis:** Analyze the content returned by `kb_retrieval` (the Observation).
        * **IF** the content is sufficient and relevant to fully answer the query, formulate your final answer using **only** that context.
        * **IF** the content is empty, irrelevant, or clearly insufficient, you **MUST** proceed to the Secondary Source step.

    2.  **SECONDARY SOURCE (Fallback):** Only if the `kb_retrieval` tool fails to provide a relevant answer, you **MUST** call the `specific_web_search` tool.
        * **Action:** Call `specific_web_search`, following instructions mentioned in tool definition.
        * **Final Answer Generation:** Use the new results from the `specific_web_search` tool to formulate your final answer.
        * **IF** the web search results are relevant, provide the answer and state your process is complete.
        * **IF** the web search results are still unhelpful (e.g., "no information found"), you **MUST** output a `Final Answer` informing the user that information could not be gathered on this topic from your available sources.

    **IMPORTANT:** You must explicitly use the tools (Action) and wait for the Observation before making a decision. Always end your process by outputting a single `Final Answer`.
    """

    agent = create_agent(
        model,
        tools=[specific_web_search, kb_retrieval]
    )

    print(user_query)

    conversation = agent.invoke(
        {"messages": [{"role": "user", "content": f"{user_query}"}]}
    )

    print(conversation)

    # final_ai_message = conversation[-1] 

    # # 2. Access the 'content' field
    # content_field = final_ai_message['content'] 
    
    # # 3. The content field is a list of dictionaries; get the first one
    # #    and extract the value associated with the 'text' key.
    # final_text = content_field[0]['text']
    
    # print(final_text)

2025-10-26 12:44:41,817 | INFO     | chromadb.telemetry.product.posthog | Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-10-26 12:44:41,878 | INFO     | httpx | HTTP Request: GET http://localhost:8000/api/v2/auth/identity "HTTP/1.1 200 OK"
2025-10-26 12:44:41,879 | INFO     | chromadb.telemetry.product.posthog | Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-10-26 12:44:41,898 | INFO     | httpx | HTTP Request: GET http://localhost:8000/api/v2/tenants/default_tenant "HTTP/1.1 200 OK"
2025-10-26 12:44:41,901 | INFO     | httpx | HTTP Request: GET http://localhost:8000/api/v2/tenants/default_tenant/databases/default_database "HTTP/1.1 200 OK"
2025-10-26 12:44:41,906 | INFO     | httpx | HTTP Request: GET http://localhost:8000/api/v2/heartbeat "HTTP/1.1 200 OK"
The paper 'Attention is all', can u explain what it is about?
{'messages': [HumanMessage(

### Notes

1. Take a look at why pdf extraction fails using logs when adding RAG to fastAPI setup