Step-by-Step Guide: Full Code to Build Agentic RAG Using Function Calling

In [None]:
""" Export OpenAI and TAVILY API Keys """
%env OPENAI_API_KEY=Your OpenAI API Key
%env TAVILY_API_KEY=Your TAVILY API Key

In [None]:
!pip3 install -r requirements.txt

In [16]:
from langchain_community.document_loaders import PyPDFLoader

def load_pdf(pdf_path: str, page1_on_second: bool = True) -> str:
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()
    parts = []
    
    for p in pages:
        idx0 = p.metadata.get("page", 0)  
        label = idx0 if page1_on_second else (idx0 + 1)
        parts.append(f"[PAGE {label}]\n{p.page_content.strip()}")

    return "\n\n".join(parts).strip()

In [17]:
import spacy
from typing import List
from langchain.text_splitter import TokenTextSplitter
import re

def chunk_data(
    text: str,
    chunk_size: int = 1500,
    chunk_overlap: int = 225
) -> List[str]:
    nlp = spacy.blank("en")
    if "sentencizer" not in nlp.pipe_names:
        nlp.add_pipe("sentencizer")

    splitter = TokenTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        encoding_name="cl100k_base",
    )

    chunks: List[str] = []

    parts = re.split(r"\n?\[PAGE\s+(\d+)\]\n", text)
    it = iter(parts[1:])
    for page_label, page_text in zip(it, it):
        sentences = [s.text.strip() for s in nlp(page_text).sents if s.text.strip()]
        page_chunks = splitter.split_text("\n".join(sentences))
        for chunk in page_chunks:
            chunks.append(f"[Page {page_label}] {chunk}")

    return chunks

In [18]:
from qdrant_client.models import VectorParams, Distance
from qdrant_client.models import PointStruct
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(":memory:")
collection_name = "vector_store"
qdrant_client.create_collection(
    collection_name = collection_name,
    vectors_config=VectorParams(
        size=1536,
        distance=Distance.COSINE,
),)

True

In [19]:
import os
from openai import OpenAI

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def create_upload_embeddings(chunks):
    model_name = "text-embedding-3-small"
    response = openai_client.embeddings.create(input=chunks, model=model_name)

    embeddings = [record.embedding for record in response.data]

    points = [
        PointStruct(
            id=idx,
            vector=vec,
            payload={"text": text},
        )
        for idx, (vec, text) in enumerate(zip(embeddings, chunks))
    ]

    qdrant_client.upsert(
        collection_name=collection_name,
        wait=True,
        points=points,
    )

In [20]:
text = load_pdf("Veo-3-Model-Card.pdf")
chunks = chunk_data(text)

create_upload_embeddings(chunks)

In [21]:
def retrieve_document(query: str, top_k: int = 5) -> str:
    model_name = "text-embedding-3-small"
    query_embedding = openai_client.embeddings.create(
        input=[query], model=model_name
    ).data[0].embedding

    results = qdrant_client.query_points(collection_name, query_embedding, limit=top_k, with_payload=True)
    retrieved_texts = [output.payload["text"] for output in results.points if output.payload and "text" in output.payload]
    context = "\n\n---\n\n".join(retrieved_texts) if retrieved_texts else "no relevant context found"

    output = f"""Based on the following context:
        <context>
        {context}
        </context>

        Provide a relevant response to:

        <query>
        {query}
        </query>
        """.strip()
    
    return output

In [22]:
from tavily import TavilyClient

tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

def web_search(query: str, num_results: int = 10):
    try:
        result = tavily.search(
            query=query,
            search_depth="basic",
            max_results=num_results,
            include_answer=False,       
            include_raw_content=False,
            include_images=False
        )

        results = result.get("results", [])

        return {
            "query": query,
            "results": results, 
            "sources": [
                {"title": r.get("title", ""), "url": r.get("url", "")}
                for r in results
            ]
        }

    except Exception as e:
        return {
            "error": f"Search error: {e}",
            "query": query,
            "results": [],
            "sources": [],
        }

In [23]:
tool_schemas = [
   {
        "type": "function",
        "name": "retrieve_document",
        "description": """
        "Search the internal PDF file containing Veo Model Card.
        Use this tool when the user requests information about Veo 3
        that only appear in this document and
        for every answer you give include page-number citations in the form [page. X]. 
        """,
        "strict": True,
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Query to be searched in the PDF corpus.",
                },
            },
            "required": ["query"],
            "additionalProperties": False
        },
    },
    
   {
        "type": "function",
        "name": "web_search",
        "description": """Execute a web search to fetch up to date information. Synthesize a concise, 
        self-contained answer from the content of the results of the visited pages.
        Fetch pages, extract text, and provide the best available result while citing 1-3 sources (title + URL). "
        If sources conflict, surface the uncertainty and prefer the most recent evidence.
        """,
        "strict": True,
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Query to be searched on the web.",
                },
            },
            "required": ["query"],
            "additionalProperties": False
        },
    },
]

In [24]:
from datetime import datetime, timezone
import json

# tracker for the last model’s response id to maintain conversation’s state 
prev_response_id = None

# a list for storing tool’s results from the function call 
tool_results = []

while True:
    # if the tool results is empty prompt message 
    if len(tool_results) == 0:
        user_message = input("User: ")

        """ commands for exiting chat """
        if isinstance(user_message, str) and user_message.strip().lower() in {"exit", "q"}:
            print("Exiting chat. Goodbye!")
            break

    else:
        # set the user’s messages to the tool results to be sent to the model 
        user_message = tool_results.copy()
    
        # clear the tool results for the next call 
        tool_results = []

    # obtain current’s date to be passed into the model as an instruction to assist in decision making
    today_date = datetime.now(timezone.utc).date().isoformat()     

    response = openai_client.responses.create(
        model = "gpt-5-mini",
        input = user_message,
        instructions=f"Current date is {today_date}.",
        tools = tool_schemas,
        previous_response_id=prev_response_id,
        text = {"verbosity": "low"},
        reasoning={
            "effort": "low",
        },
        store=True,
        )
    
    prev_response_id = response.id

    # Handles model response’s output 
    for output in response.output:
        
        if output.type == "reasoning":
            print("Assistant: ","Reasoning ....")

            for reasoning_summary in output.summary:
                print("Assistant: ",reasoning_summary)

        elif output.type == "message":
            for item in output.content:
                print("Assistant: ",item.text)

        # checks if the output type is a function call and append the function call’s results to the tool results list
        elif output.type == "function_call":
            # obtain function name 
            function_name = globals().get(output.name)
            # loads function arguments 
            args = json.loads(output.arguments)
            function_response = function_name(**args)
            # append tool results list with the the function call’s id and function’s response 
            tool_results.append(
                {
                    "type": "function_call_output",
                    "call_id": output.call_id,
                    "output": json.dumps(function_response)
                }
            )

Assistant:  Reasoning ....
Assistant:  Reasoning ....
Assistant:  Veo 3 was trained on Google’s Tensor Processing Units (TPUs), including large TPU Pods and distributed training across multiple TPU devices [page. 2].
Assistant:  Reasoning ....
Assistant:  Reasoning ....
Assistant:  London (as of 2025-09-29 12:03 local): Sunny, 15°C (59°F); wind N ~5 km/h; humidity 77% — source: WeatherAPI (https://www.weatherapi.com/).
Assistant:  Reasoning ....
Assistant:  7/5 = 1.4
Exiting chat. Goodbye!
