# GenAI/RAG in Python 2025

## Session 05. The Foundations of Agentic AI

- How an LLM can propose a plan and write follow-up prompts to itself ("self-prompting").
- How to choose tools: either rely on the existing RAG (vectorized Italian recipes), or augment with Google Search when the RAG context looks weak or too narrow.
- How to log every step (intent → tool decisions → results → final answer) for transparent inspection.

In [None]:
import os
import requests
from datetime import datetime
import json
import ast
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine
from openai import OpenAI

### 1. Programmable Search Engine (PSE)

#### Create a Programmable Search Engine (PSE)

- 1. Go to Google’s Programmable Search Engine and create a search engine. For a general web search agent, configure it to search the entire web (not just selected sites); the engine gives you a Search engine ID (cx). 

- 2. Enable the Custom Search JSON API in your Google Cloud project and create an API key (standard key is fine). 

- 3. Quota & pricing: Typical baseline has been ~100 free queries/day, then $5 per 1,000 queries (and a site-restricted variant without daily limit).

Store credentials as env vars:

In [None]:
os.environ["GOOGLE_CSE_API_KEY"] = "your API key here"
os.environ["GOOGLE_CSE_CX"] = "your PSE cx id here"

#### Minimal Google Search client

In [None]:
GOOGLE_CSE_API_KEY = os.environ["GOOGLE_CSE_API_KEY"]
GOOGLE_CSE_CX = os.environ["GOOGLE_CSE_CX"]

def google_search(query: str, num: int = 5):
    """
    Calls Google's Custom Search JSON API and returns a list of {title, link, snippet}.
    """
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        "key": GOOGLE_CSE_API_KEY,
        "cx": GOOGLE_CSE_CX,
        "q": query,
        "num": min(max(num, 1), 10)  # API caps num<=10
    }
    r = requests.get(url, params=params, timeout=20)
    r.raise_for_status()
    data = r.json()
    items = data.get("items", []) or []
    return [
        {"title": it.get("title"), "link": it.get("link"), "snippet": it.get("snippet")}
        for it in items
    ]


#### Test Google Search client

In [None]:
q = "ragù alla napoletana"
receipts = google_search(query = q, num = 3)

In [None]:
receipts

### 2. Embedding Model

In [None]:
# Select the embedding model to use (as per OpenAI docs)  
model_name = "text-embedding-3-small"  

### 3. OpenAI Client

In [None]:
# Set your API key (ensure OPENAI_API_KEY is set in your environment)
api_key = os.getenv("OPENAI_API_KEY")

# Instantiate the OpenAI client with your API key  
client = OpenAI(api_key=api_key)

#### 3.1 Google Search Tool for our OpenAI Client

We’ll expose google_search as a tool so the model can request it only when needed.

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "google_search",
            "description": "Search the web for Italian cuisine info when RAG is insufficient.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "Search query to send to Google"},
                    "num":   {"type": "integer", "description": "How many results (1..10)", "minimum": 1, "maximum": 10}
                },
                "required": ["query"]
            },
        }
    }
]

#### 3.2 Google Search Tool dispatcher

In [None]:
def tool_dispatch(tool_call):
    if tool_call[0]["function"]["name"] == "google_search":
        args = tool_call[0]["function"]["parameters"]["properties"]
        # arguments arrives as a JSON string in chat.completions; parse it:
        return google_search(args["query"], args["num"])
    raise ValueError(f"Unknown tool {tool_call[0]["function"]["name"]}")

### 4. Load Embeddings: Italian Recipes

In [None]:
embeddings = pd.read_csv("_data/italian_recipes_embedded.csv")

In [None]:
embeddings.head(3)

In [None]:
type(embeddings["embedding"][0])

In [None]:
# --- Parse string embeddings into numpy arrays ---
embeddings['embedding_vector'] = embeddings['embedding'].apply(
    lambda x: np.array(ast.literal_eval(x), dtype=np.float32)
)
embeddings.head(3)

In [None]:
type(embeddings["embedding_vector"][0])

#### Similarity Search

In [None]:
def rag_retrieve(user_query: np.ndarray, 
                 top_k: int = 5, 
                 df: pd.DataFrame = None) -> pd.DataFrame:
    """
    Retrieve top-k most similar recipes from an in-memory embeddings DataFrame
    using cosine similarity (1 - cosine distance).

    Parameters
    ----------
    user_query : np.ndarray
        Embedding vector of the user query.
    top_k : int, default=5
        Number of items to retrieve.
    df : pd.DataFrame
        DataFrame containing: 'title' (str), 'receipt' (str), 'embedding_vector' (np.ndarray).

    Returns
    -------
    pd.DataFrame
        Columns: ['id', 'title', 'receipt', 'similarity']
    """
    if df is None or df.empty:
        raise ValueError("You must pass a DataFrame with embedded receipts.")

    # Compute similarity for each embedding vector
    similarities = []
    for _, row in df.iterrows():
        emb = row["embedding_vector"]
        if isinstance(emb, np.ndarray) and emb.size > 0:
            sim = 1 - cosine(user_query, emb)  # cosine similarity
        else:
            sim = -1  # placeholder for invalid rows
        similarities.append(sim)

    # Attach scores and sort
    df["similarity"] = similarities
    df_sorted = df.sort_values("similarity", ascending=False).head(top_k).reset_index(drop=True)

    # Create consistent SQL-like view
    result = pd.DataFrame({
        "id": df_sorted.index,
        "title": df_sorted["title"],
        "receipt": df_sorted["receipt"],
        "similarity": df_sorted["similarity"]
    })

    return result

#### Test Similarity Search

In [None]:
user_prompt = """
Hi! I’d like to cook a good Italian dish for lunch! I have potatoes, carrots, 
rosemary, and pork. Can you recommend a recipe and help me a bit with 
preparation tips?
"""

resp = client.embeddings.create(        
        model=model_name,                   
        input=[user_prompt]                        
    )
user_query = resp.data[0].embedding

prompt_recipes = rag_retrieve(user_query, top_k=5, df=embeddings)
print(prompt_recipes)

### 5. AI Agent

A tiny agent that:

1) Plans & decides whether to use Google Search (tool calling),
2) Always uses internal RAG first,
3) Optionally augments with web results,
4) Writes a final self-prompt and executes it,
5) Logs every step.

#### 5.0 Log

In [None]:
log = []  # each entry: {"ts": str, "event": str, "data": any}

def _log(event, data):
    log.append({"ts": datetime.utcnow().isoformat(), "event": event, "data": data})

#### 5.1 Retreival

In [None]:
top_k = 5
rag = rag_retrieve(user_query = user_query, top_k = top_k, df = embeddings)
_log("rag.retrieve", {"top_k": top_k, "title": rag["title"], "score": rag["similarity"]})
display(rag)

In [None]:
log

#### 5.2 Execution Plan

In [None]:
# Ask the model to PLAN: Should we call Google Search?
instruction = (
    "You are a planning assistant. Decide if web search is needed to improve answer quality "
    "for the provided user question."
    "Return JSON with fields: need_search (true/false), search_query (string), rationale (string), "
    "and then propose a short step-by-step plan for how you'll compose the final answer. "
    "The RAG context needs to encompass A. five (5) recipes in order to be accepted as"
    "strong and specific and B. all five (5) recipes must encompass "
    "exactly the ingredients that are mentioned in the user questions."
)
user_plan = (instruction
    + f"### USER QUESTION ###: {user_prompt}\n\n" 
    + f"### RAG CONTEXT ###:\n{rag['receipt']}"
)

In [None]:
user_plan

Produce execution plan:

In [None]:
plan_resp = client.chat.completions.create(
    model="gpt-4",
    messages= [{"role": "user", "content": user_plan}],
    tools=tools,  # tools available if the model wants to call them later
    temperature=0,
)
plan_text = plan_resp.choices[0].message.content
_log("plan.draft", plan_text)

In [None]:
plan = json.loads(log[1]['data'])
print(plan["need_search"])
print(plan["search_query"])
print(plan["rationale"])
print(plan["plan"])

#### 5.3 Search, if necessary:

Prepare tool:

In [None]:
tc = tools.copy()
tc[0]["function"]["parameters"]["properties"]["query"] = plan["search_query"]
tc[0]["function"]["parameters"]["properties"]["num"] = 10
tc

In [None]:
tool_outputs = []
if plan["need_search"]:
    result = tool_dispatch(tc)
    tool_outputs.append({"name": tc[0]["function"]["name"], 
                         "args": tc[0]["function"]["parameters"]["properties"], 
                         "result": result})
    _log("tools.executed", tool_outputs)

In [None]:
result

In [None]:
web_context = ""
for item in result:
    web_context += f"Title: {item['title']}\n"
    web_context += f"Link: {item['link']}\n"
    web_context += f"Snippet: {item['snippet']}\n\n"
print(web_context)

In [None]:
rag_context = "\n\n".join(rag["receipt"].astype(str).tolist())
print(rag_context)

### 6. Self-Prompting

In [None]:
# Ask the model to SELF-PROMPT: Should we call Google Search?
instruction = (
    "You are a prompt engineer. Compose the best possible prompt for "
    "a Large Language Model (LLM) "
    "to answer the provided user question in the ### USER QUESTION ### section."
    " The ### RAG CONTEXT ### section provides results obtained from the "
    "Retrieval Augmented Framework with similarity search in a vector database. "
    " The RAG CONTEXT resuls might be augmented by Google Search results in the "
    " ### WEB CONTEXT ### section."
    "Do not attempt to answer the user qestion; return only the prompt text. "
    "Be systematic, be detailed, introduce sections, and precise instructions for an LLM " 
    "on how to answer the user question." 
    "Assume that you have strings named user_prompt, web_context and rag_context in Python "
    " encompassing the user question and everything that is found under ### RAG CONTEXT ### " 
    "and ### WEB CONTEXT ###"
    "; produce your prompt as a Python string using user_prompt, web_context and rag_contex as variables in curly brackets." 
    " Do not produce a prompt that asks the user for any interaction: explain the user question "
    " to an LLM, provide the context, and instruct it how to help the user prepare a meal."
    " Remember: you are not about to answer to the user question. Your task is to produce a "
    " prompt for another LLM to answer the user question."
    " Remember to use web_context and rag_context as variables in curly brackets in your final "
    "response - a Python string."
    " You must liteary use the variable rag_context and the variable web_context in your output "
    " ; place the variables in curly brackets in your output string"
    "Make no introductions, just return the prompt as a string, with variables in curly brackets in it."
    " Do not attempt to answer the user question: your task is to instruct another LLM on how "
    "to answer to it. Instruct the LLM to point towards the web resources (URLs) provided to it " 
    "in the web_context section."
    " Begin your prompt to another LLM with: The user is asking"
)
user_plan = (instruction
    + f"### USER QUESTION ###: {user_prompt}\n\n" 
    + f"### RAG CONTEXT ###:\n{rag_context}\n\n" 
    + f"### WEB CONTEXT ###:\n{web_context}" + 
    """
    ### OUTPUT FORMAT ### 
    - A plain string 
    - that is an instruction to another LLM and the answer to the user question,
    - **always** using the variables named user_prompt, web_context, rag_context which **must be sorrounded by curly brackets** in your output string, 
    - **always** beginning with the words: The user is asking"
    """
)

In [None]:
prompt_resp = client.chat.completions.create(
    model="gpt-4",
    messages= [{"role": "user", "content": user_plan}],
    temperature=0,
)
final_prompt = prompt_resp.choices[0].message.content
_log("final_prompt", final_prompt)
print(final_prompt)

In [None]:
final_prompt = final_prompt.format(user_prompt = user_prompt, 
                                   rag_context=rag_context, 
                                   web_context=web_context)
print(final_prompt)

#### Execute the final prompt

In [None]:
final_resp = client.chat.completions.create(
    model="gpt-4",
    messages= [{"role": "user", "content": final_prompt}],
    temperature=0,
)
output = final_resp.choices[0].message.content
_log("output", output)
print(output)