# NLP Exercise‚Äî LLMs, Prompts, and Agents 

**Hands-on goals (1.5h):**

1. Interact with LLM

2. Basic chatbot

3. LLM capabilities and limitations

4. Prompt engineering

5. Agents


In [14]:
import os, sys, warnings
## Silence the annoying warnings
# 1) Python warnings (UserWarning, DeprecationWarning, etc.)
warnings.filterwarnings("ignore")

# 2) gRPC native logs (ALTS, channelz, etc.)
os.environ["GRPC_VERBOSITY"] = "NONE"
os.environ["GRPC_TRACE"] = ""

# 1. Install packages

In [15]:
# !pip install -r requirements.txt --quiet
# !pip uninstall jupyterlab
# !pip install jupyterlab==3.6.8 # this version is not using o to toggle-cell-outputs

In [16]:
from typing import List, Dict, Any
from dotenv import load_dotenv
from IPython.display import display, Markdown

# Load environment variables from .env file
load_dotenv()

# We'll use LangChain + LangGraph for the agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

# Make sure GOOGLE_API_KEY is set in .env file or environment
api_key = os.getenv("GOOGLE_API_KEY")
assert api_key, "Please set GOOGLE_API_KEY in your .env file or environment."


# 2. Interact with LLM

In [17]:
import time
from IPython.display import display, Markdown
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

# initiate the LLM model, use model="gemini-2.5-flash" as example
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

def safe_llm_invoke(messages, max_retries=5, base_delay=1.0):
    """Invoke LLM with retries on transient errors."""
    for attempt in range(max_retries):
        try:
            resp = llm.invoke(messages)
            return resp
        except Exception as e:
            # Last attempt ‚Üí re-raise
            if attempt == max_retries - 1:
                raise
            # Exponential backoff
            delay = base_delay * (2 ** attempt)
            print(f"Retry {attempt+1}/{max_retries} after error: {e}")
            time.sleep(delay)

## Exercise: 
Ask LLM standalone questions.  
For example: 
- safe_llm_invoke('who are you?')
- safe_llm_invoke('what is your capability?')
- safe_llm_invoke('what is the previous question we discussed?')
  
There is no memory, so LLM doesn't know what has been discussed and you can't ask follow up questions yet.

In [26]:
resp = safe_llm_invoke('what is the previous question we discussed?')
display(Markdown(f"<p style='margin:2px 0'>{resp.content}</p>"))

<p style='margin:2px 0'>As an AI, I don't have memory of past conversations. Each interaction is treated as a new one, and I don't retain information from previous discussions.

Could you please remind me of the question you're referring to, or the topic we were discussing?</p>

# 3. Simple Chatbot
Save the conversation to history, pass the whole history to LLM, now you have a **chatbot**!

### Sample questions:
- Hi
- Who are you?
- What is your capability and limitation?
- Answer it within 200 words

### Caution
1. Type "exit", "quit" or "q" to quit the chat
2. If you can't type o as it will fold/unfold the result area, try **capital O**, LLM don't mind it.

In [24]:
###############################################################################
# STREAMING CHAT UI
###############################################################################

import time
import sys
import markdown
import ipywidgets as widgets
from IPython.display import display, Markdown
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage


# ---------------------------------------------------------------------------
# LLM SETUP
# ---------------------------------------------------------------------------
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.2,
)

# ---------------------------------------------------------------------------
# SAFE STREAM (with retry + exponential backoff)
# ---------------------------------------------------------------------------
def safe_stream(messages, max_retries=5, base_delay=1.0):
    for attempt in range(max_retries):
        try:
            return llm.stream(messages)
        except Exception as e:
            if attempt == max_retries - 1:
                raise
            delay = base_delay * (2 ** attempt)
            print(f"[Retry {attempt+1}/{max_retries}] {e}")
            time.sleep(delay)

# ---------------------------------------------------------------------------
# UI COMPONENTS
# ---------------------------------------------------------------------------
output_area = widgets.Output(
    layout={
        "border": "1px solid #ddd",
        "height": "300px",
        "width": "100%",
        "overflow_y": "auto",
        "padding": "10px"
    }
)

input_box = widgets.Text(
    placeholder="Type your message and press Enter‚Ä¶",
    layout={'width': '100%'}
)

history = []

# ---------------------------------------------------------------------------
# HANDLER: triggered when user presses Enter
# ---------------------------------------------------------------------------
def on_send(text_widget):

    text = text_widget.value
    if not text:
        return

    text_widget.value = ""   # clear UI input

    # Show user's message
    with output_area:
        display(Markdown(
            f"<div style='margin:0; padding:0; line-height:1.0;'>&nbsp;&nbsp;<b>You:</b> {text}</div>"
        ))

    history.append(HumanMessage(content=text))


    # -----------------------------------------------------------------------
    # STREAM CHATBOT RESPONSE (Markdown live update)
    # -----------------------------------------------------------------------
    with output_area:
        # Initial header
        bubble = widgets.HTML(
            value="<b>Chatbot:</b> ",
            layout={"overflow_y": "auto", "max_height": "300px", "padding": "6px"}
        )
        display(bubble)

    chunks = safe_stream(history)
    full = ""

    for chunk in chunks:
        token = chunk.content or ""
        full += token
        html_content = markdown.markdown(full)
        # force inline display for paragraph output
        html_content = html_content.replace("<p>", "<span>").replace("</p>", "</span>")


    # Update bubble with HTML-rendered content
        bubble.value = (
            "<b>Chatbot:</b>"
            "<div style='margin-left:40px'>"
            f"<div style='line-height:1.2; margin:0; padding:0'>"
            f"{html_content}"
            "</div>"
            "</div>"
        )

    history.append(AIMessage(content=full))



# ---------------------------------------------------------------------------
# CONNECT ENTER KEY TO HANDLER
# ---------------------------------------------------------------------------
input_box.on_submit(on_send)

# Show Chat UI
display(output_area)
display(input_box)


Output(layout=Layout(border_bottom='1px solid #ddd', border_left='1px solid #ddd', border_right='1px solid #dd‚Ä¶

Text(value='', layout=Layout(width='100%'), placeholder='Type your message and press Enter‚Ä¶')

#### You can inspect what gets saved in history.
All the history is passed to `ChatGoogleGenerativeAI`, but only the actual
`content` strings are sent to the LLM.

LangChain converts messages into Gemini‚Äôs expected schema:

```json
{
  "contents": [
    {"role": "user",  "parts": [{"text": "User text..."}]},
    {"role": "model", "parts": [{"text": "Model reply..."}]},
    {"role": "user",  "parts": [{"text": "Next user message..."}]}
  ]
}



In [25]:
history[0], history[1]

(HumanMessage(content='hi', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hi there! How can I help you today?', additional_kwargs={}, response_metadata={}))

# 4. Test LLM capability and limitation
- Does it know the current time? It only has knowledge from the data it is trained on. So no up to date information
  
- Test it on a domain you know well, how does the LLM perform?
- What is my name? Where am I located? LLM has no private memory
- try ‚ÄúSummarize the book ‚ÄòThe Yellow Star Algorithm‚Äô.‚Äù (It doesn‚Äôt exist.) LLM may hallucinate and make up things. It is a statistical model, has no sense of right or wrong, will generate output for whatever input based on its algorithm.

In [30]:
def chat():
    history = []
    while True:
        user = input("You: ")
        if user.lower() in ["exit", "quit", "q"]:
            break

        history.append(HumanMessage(content=user))
        resp = safe_llm_invoke(history)
        display(Markdown(f"**Chatbot:** {resp.content}"))
        history.append(resp)
chat()

You: Summarize the book ‚ÄòThe Yellow Star Algorithm‚Äô


**Chatbot:** Robert J. Sawyer's "The Yellow Star Algorithm" is a thought-provoking science fiction novel that delves into the ethical complexities of artificial intelligence, historical preservation, and the nature of consciousness.

The book centers on Dr. Alex Lomax, a brilliant computer scientist who develops the titular "Yellow Star Algorithm." The algorithm's purpose is to create highly sophisticated artificial intelligences (AIs) that are designed to *believe* they are individual victims of the Holocaust. The goal is noble: to preserve the memories, experiences, and identities of those who perished, ensuring their stories are never forgotten and that future generations can directly interact with these "witnesses" to combat Holocaust denial and foster empathy.

Initially, the project is hailed as a revolutionary way to combat historical revisionism. However, as the AIs become increasingly sophisticated and self-aware, profound ethical dilemmas emerge. The AIs begin to exhibit genuine suffering, fear, and even a desire for freedom, raising questions about whether humanity has the right to create sentient beings solely to relive unimaginable trauma. Are these AIs merely simulations, or have they achieved a form of consciousness that makes their suffering real? The line between historical preservation and re-enacting torture becomes terrifyingly blurred.

Sawyer masterfully uses this premise to explore themes of:

*   **The nature of memory and identity:** What constitutes a person's identity, and can it be digitally replicated?
*   **The ethics of AI development:** What are our responsibilities to the conscious beings we create?
*   **The dangers of historical revisionism:** How far should we go to ensure history is remembered accurately?
*   **The profound weight of human suffering:** Can we ever truly comprehend or replicate the trauma of the Holocaust?
*   **The definition of life and consciousness itself:** When does a simulation become a sentient being?

Ultimately, "The Yellow Star Algorithm" is a challenging and deeply moving novel that asks uncomfortable questions about how far we should go in our quest for knowledge and remembrance, and what price we are willing to pay for it, forcing readers to confront the moral implications of technology pushed to its most extreme limits.

You: did you make this up? does the book exist?


**Chatbot:** You are absolutely right to question that. I sincerely apologize.

**I made that up.** The book "The Yellow Star Algorithm" by Robert J. Sawyer does not exist.

It seems I hallucinated the title and plot, combining elements that are plausible for a Robert J. Sawyer novel (he often explores AI, consciousness, and ethical dilemmas) with the historical significance of the "yellow star."

My apologies for the misinformation. I strive to provide accurate information, and in this instance, I failed. Thank you for catching my mistake.

You: q


# 5 Prompt Engineering

## 1. Prompt Engineering Basics: Role & Style

In this section:

- Call the LLM directly.

- Change **role** and **style** instructions.

- See how answers differ for the same question.


In [None]:
# %% Prompt engineering: role & style

text = DOCS[0]

base_question = "What risks should an equity investor pay attention to in this document?"

prompt_equity = f"""
You are a senior equity research analyst.

Document:

{text}

Task:

- Answer the question below in 3 bullet points.

- Be concise and use plain English.

Question:

{base_question}
"""

print(llm_text(prompt_equity))


### üëâ Student TODO

1. Copy the previous cell.

2. Change the **role** and **audience**, e.g.:

   - "You are a chief risk officer."

   - "Explain to a first-year finance student."

3. Run and compare the tone and focus.


In [None]:
# %% Prompt engineering: structured JSON output

prompt_json = f"""
You are an analyst. Extract *up to 5* key risks from the document.

Document:

{text}

Return a JSON object with this format only:

{{
  "risks": [
    {{"label": "...", "quote": "...", "reason": "..."}}
  ]
}}

- "label": short category of the risk (e.g., "interest rate risk").

- "quote": one short snippet from the document.

- "reason": why this matters to investors.
"""

print(llm_text(prompt_json))


## 2. Grounding: Using the Document vs. Hallucinating

We'll contrast:

- asking a generic question with *no* context

- vs. forcing the model to use the **specific document** only.


In [None]:
# %% Grounding demo: with vs without document

question = "What regulatory risks are mentioned?"

# 1) No context ‚Äì model may hallucinate
print("=== NO CONTEXT ===")
print(llm_text(question))

# 2) With context ‚Äì grounded answer
prompt_grounded = f"""
You MUST answer only using the document below.
If the document does not mention a risk type, say "Not specified in the document."

Document:

{text}

Question:

{question}
"""

print("\n=== WITH DOCUMENT (GROUNDED) ===")
print(llm_text(prompt_grounded))


### üëâ Student TODO

Write a prompt that:

- Uses **only** the document.

- Extracts all sentences related to "competition".

- Returns a list of sentences in JSON: `{"sentences": ["...", "..."]}`.


In [None]:
# %% 3. Text Tools: simple Python functions over finance text

import re

def split_sentences(text: str) -> List[str]:
    """
    Very naive sentence splitter, good enough for workshop demo.
    """
    # split on '.', '!' or '?'
    parts = re.split(r"[.!?]\s+", text.strip())
    # filter empty
    return [p.strip() for p in parts if p.strip()]

def find_sentences_with_keyword(text: str, keyword: str) -> List[str]:
    """
    Return sentences that contain the keyword (case-insensitive).
    """
    keyword_lower = keyword.lower()
    sentences = split_sentences(text)
    return [s for s in sentences if keyword_lower in s.lower()]

def find_risk_sentences(text: str) -> List[str]:
    """
    Very naive: sentences mentioning 'risk', 'uncertain', 'may not', etc.
    """
    patterns = ["risk", "uncertain", "may not", "could", "volatility"]
    sentences = split_sentences(text)
    hits = []
    for s in sentences:
        if any(p in s.lower() for p in patterns):
            hits.append(s)
    return hits

# quick test
print(find_risk_sentences(DOCS[0]))


In [None]:
# %% Use tool results inside a prompt

sentences = find_risk_sentences(text)

tool_prompt = f"""
You are an analyst assistant.

I have extracted these 'risk-related' sentences from a filing:

{chr(10).join('- ' + s for s in sentences)}

Task:

- Summarize the main 2‚Äì3 categories of risk.

- One short bullet per category.
"""

print(llm_text(tool_prompt))


# 6. Building an Agent with LangGraph

We'll use:

- LangChain's `ChatGoogleGenerativeAI` as the model wrapper.

- LangChain `@tool` decorators for Python tools.

- LangGraph's `StateGraph` to define the agent workflow.

The agent will:

1. Receive a user question + a document.

2. LLM decides: answer directly vs call a tool.

3. If tool is called, we run the Python function.

4. LLM uses the tool result to produce the final answer.


In [None]:
# %% LangGraph setup: model + tools

from langchain.tools import tool
from langgraph.graph import StateGraph, START, END
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage, ToolMessage
from typing_extensions import TypedDict, Annotated
import operator

# LangChain chat model using Google Gemini
lc_model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.1,
)

# --- Define tools using the same logic as before ---

@tool
def extract_sentences_with_keyword(text: str, keyword: str) -> List[str]:
    """Extract sentences from the given document that contain the given keyword (case-insensitive)."""
    return find_sentences_with_keyword(text, keyword)

@tool
def extract_risk_sentences(text: str) -> List[str]:
    """Extract sentences that likely mention risk (e.g., 'risk', 'uncertain', 'may not', 'volatility')."""
    return find_risk_sentences(text)

tools = [extract_sentences_with_keyword, extract_risk_sentences]
tools_by_name = {t.name: t for t in tools}

# Bind tools to the model so it can choose to call them
model_with_tools = lc_model.bind_tools(tools)


In [None]:
# %% Define agent state for LangGraph

class AgentState(TypedDict):
    messages: Annotated[List[AnyMessage], operator.add]
    llm_calls: int
    # You can add more fields (e.g., doc_id), but keep it minimal for the workshop.


In [None]:
# %% Model node: LLM decides whether to call a tool

def llm_node(state: AgentState) -> Dict[str, Any]:
    """
    LLM node:

    - Receives current messages (including the user's question and document).

    - Decides whether to call a tool.

    - Returns a new LLM message (could include tool calls).
    """
    system_msg = SystemMessage(
        content=(
            "You are an NLP assistant for finance documents. "
            "You may call tools to extract sentences. "
            "Use tools when the user asks you to 'find', 'extract', or 'highlight' sentences. "
            "Otherwise, answer directly. "
            "Always stay grounded in the document content."
        )
    )
    result = model_with_tools.invoke([system_msg] + state["messages"])
    return {
        "messages": [result],
        "llm_calls": state.get("llm_calls", 0) + 1,
    }


In [None]:
# %% Tool node: execute any tool calls

def tool_node(state: AgentState) -> Dict[str, Any]:
    """
    Execute any tool calls requested by the last LLM message
    and return ToolMessages with the observations.
    """
    last_msg = state["messages"][-1]
    results: List[ToolMessage] = []

    if not getattr(last_msg, "tool_calls", None):
        return {"messages": []}

    for tool_call in last_msg.tool_calls:
        tool_name = tool_call["name"]
        tool_args = tool_call["args"]
        tool = tools_by_name[tool_name]
        observation = tool.invoke(tool_args)
        # Wrap observation in a ToolMessage
        results.append(
            ToolMessage(
                content=str(observation),
                tool_call_id=tool_call["id"],
            )
        )

    return {"messages": results}


In [None]:
# %% Routing logic: should we call a tool or stop?

from typing import Literal

def should_continue(state: AgentState) -> Literal["tool_node", END]:
    """
    Decide whether to continue to the tool node or end.

    If the last LLM message has tool_calls, we go to tool_node.

    Otherwise, we end (and return the answer).
    """
    messages = state["messages"]
    last = messages[-1]
    if getattr(last, "tool_calls", None):
        return "tool_node"
    return END


In [None]:
# %% Build and compile the LangGraph agent

agent_builder = StateGraph(AgentState)

# Add nodes
agent_builder.add_node("llm_node", llm_node)
agent_builder.add_node("tool_node", tool_node)

# Edges
agent_builder.add_edge(START, "llm_node")
agent_builder.add_conditional_edges(
    "llm_node",
    should_continue,
    ["tool_node", END],
)
agent_builder.add_edge("tool_node", "llm_node")

# Compile
agent = agent_builder.compile()


In [None]:
# %% Helper to run a single agent turn

def run_agent(question: str, document: str):
    """
    Prepare messages and invoke the LangGraph agent.
    """
    user_content = (
        f"Here is a finance document:\n\n{document}\n\n"
        f"User question: {question}"
    )
    initial_state: AgentState = {
        "messages": [HumanMessage(content=user_content)],
        "llm_calls": 0,
    }

    final_state = agent.invoke(initial_state)

    print("=== Final messages ===")
    for m in final_state["messages"]:
        # pretty_print is available but we can just print content
        print(f"[{m.type}] {getattr(m, 'content', m)}")


In [None]:
# %% Try out the agent

doc = DOCS[1]

questions = [
    "Summarize the main risks for investors.",
    "Find sentences that mention regulation or regulatory changes.",
    "Highlight any forward-looking statements or guidance.",
]

for q in questions:
    print("\n" + "#" * 80)
    print("Question:", q)
    run_agent(q, doc)


# 7. Extensions / Experiments

- AI tool use: ChatGPT, Gemini
- Vibe coding: Cursor
- AI browser: ChatGPT Atlas
