In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature=0,
)

resp = llm.invoke("Reply with a single sentence: Azure test is working.")
print(resp.content)


The Azure test is working successfully.


In [3]:
pip install langchain-experimental


Collecting langchain-experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain-experimental)
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-core<0.4.0,>=0.3.28 (from langchain-experimental)
  Downloading langchain_core-0.3.79-py3-none-any.whl.metadata (3.2 kB)
INFO: pip is looking at multiple versions of langchain to determine which version is compatible with other requirements. This could take a while.
Collecting langchain<2.0.0,>=0.3.27 (from langchain-community<0.4.0,>=0.3.0->langchain-experimental)
  Downloading langchain-1.0.2-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain-1.0.1-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain-1.0.0-py3-none-any.whl.metadata (4.6 kB)
  Using cached langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.9 (from langchain<2.0.0,>=0.3.27->lang

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-classic 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
langchain-classic 1.0.0 requires langchain-text-splitters<2.0.0,>=1.0.0, but you have langchain-text-splitters 0.3.11 which is incompatible.
langchain-openai 1.0.1 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
langgraph-prebuilt 1.0.2 requires langchain-core>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.


In [3]:
pip install -U langchain langchain-core langchain-community langchain-openai langchain-experimental pydantic python-dotenv


Collecting langchain
  Using cached langchain-1.0.3-py3-none-any.whl.metadata (4.7 kB)
Collecting langchain-core
  Using cached langchain_core-1.0.2-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-community
  Using cached langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-text-splitters<2.0.0,>=1.0.0 (from langchain-classic<2.0.0,>=1.0.0->langchain-community)
  Using cached langchain_text_splitters-1.0.0-py3-none-any.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of langchain-experimental to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-experimental
  Using cached langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
  Downloading langchain_experimental-0.3.3-py3-none-any.whl.metadata (1.7 kB)
  Downloading langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)
  Downloading langchain_experimental-0.3.1.post1-py3-none-any.whl.metadata (1.7 kB)
INFO: 

In [2]:
pip install numexpr


Collecting numexpr
  Downloading numexpr-2.14.1-cp311-cp311-win_amd64.whl.metadata (9.3 kB)
Downloading numexpr-2.14.1-cp311-cp311-win_amd64.whl (160 kB)
Installing collected packages: numexpr
Successfully installed numexpr-2.14.1
Note: you may need to restart the kernel to use updated packages.


In [6]:
import re
import numexpr as ne

# Matches a sequence made of:
# - numbers: 12, 3.14, .75, 1e6, 3.2E-4
# - operators: + - * / %
# - parentheses: ( )
MATH_SEQ = re.compile(r"""
    (?:
        (?:\d+\.\d*|\.\d+|\d+)           # number: 12 | 3. | .75 | 12
        (?:[eE][+\-]?\d+)?               # optional exponent: e10 | E-3
      | [\+\-\*\/\%\(\)]                 # operators and parentheses
      | \s+                              # whitespace
    )+
""", re.VERBOSE)

def eval_math(expr: str) -> float:
    expr = expr.strip().replace("^", "**")
    # Safety: only numbers/operators/parens/whitespace allowed after our tokenizer
    if not re.fullmatch(r"[0-9\.\+\-\*\/\%\(\)\seE]+", expr):
        raise ValueError("Expression contains invalid characters.")
    result = ne.evaluate(expr)
    try:
        return float(result)
    except Exception:
        return float(result.item())

def run_math(query: str) -> dict:
    # normalize caret before extraction so “^” inside query becomes power
    q = query.replace("^", "**")
    # extract the LONGEST valid math sequence
    match_iter = list(MATH_SEQ.finditer(q))
    if not match_iter:
        raise ValueError("No math expression found.")
    expr = max((m.group(0) for m in match_iter), key=len).strip()
    value = eval_math(expr)
    return {"mode": "math", "expression": expr, "answer": value, "sources": []}

# Quick tests
print(run_math("What is (23 * 47) + 199?"))
print(run_math("Compute 19^3 + 47"))                 # should be 6866
print(run_math("CAGR approx: (210/120)**(1/3) - 1")) # ~0.200...


{'mode': 'math', 'expression': '(23 * 47) + 199', 'answer': 1280.0, 'sources': []}
{'mode': 'math', 'expression': '19**3 + 47', 'answer': 6906.0, 'sources': []}
{'mode': 'math', 'expression': '(210/120)**(1/3) - 1', 'answer': 0.20507113208761507, 'sources': []}


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()


True

In [3]:
#Minimal search helper
from tavily import TavilyClient
from langchain_openai import AzureChatOpenAI

tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

llm = AzureChatOpenAI(
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature=0,
)

def run_search(query: str, max_results: int = 5) -> dict:
    """
    Searches the web and summarizes findings into 3 bullets with sources.
    """
    results = tavily.search(query=query, max_results=max_results)  # returns dict with 'results' list
    items = results.get("results", []) if isinstance(results, dict) else results

    # Keep top 3 sources for display
    sources = [it.get("url") for it in items[:3] if isinstance(it, dict) and it.get("url")]

    # Build a compact context for the LLM
    snippets = []
    for it in items[:3]:
        if not isinstance(it, dict):
            continue
        title = it.get("title", "")
        content = (it.get("content") or "")[:500]
        snippets.append(f"TITLE: {title}\nSNIPPET: {content}\nURL: {it.get('url','')}")

    prompt = (
        "You are a concise researcher. Using ONLY the information in the snippets below, "
        "write EXACTLY three bullet points summarizing the answer to the user's query. "
        "No speculation. No extra commentary.\n\n"
        f"USER QUERY:\n{query}\n\n"
        "SNIPPETS:\n" + "\n\n---\n\n".join(snippets)
    )

    summary = llm.invoke(prompt).content.strip()
    return {
        "mode": "search",
        "answer": summary,
        "sources": sources
    }

# Quick smoke tests (pick any topic)
print(run_search("Latest update on large language models in healthcare")["answer"])
print(run_search("Who won the 2024 Nobel Prize in Physics?")["sources"])


- Large language models (LLMs) are being developed, implemented, and evaluated in healthcare settings to improve patient care, accelerate medical research, and optimize healthcare system efficiency.  
- LLMs are being explored for tasks such as extraction, labeling, and interpretation in healthcare applications.  
- Research collections and studies are actively showcasing innovations and insights into the use of LLMs in healthcare.
['https://www.nobelprize.org/prizes/physics/2024/summary/', 'https://www.artsci.utoronto.ca/news/geoffrey-hinton-wins-2024-nobel-prize-physics', 'https://www.reddit.com/r/math/comments/1fyzz6t/the_nobel_prize_in_physics_2024_was_awarded_to/']


In [4]:
import re

def decide_mode(query: str) -> str:
    # simple heuristic: presence of math operators or keywords
    if re.search(r"[0-9]+\s*[\+\-\*\/\^\%]\s*[0-9]+", query) or re.search(r"\b(calc|compute|how many|what is)\b", query, re.IGNORECASE):
        return "math"
    return "search"


In [7]:
def answer_query(query: str) -> dict:
    mode = decide_mode(query)
    if mode == "math":
        return run_math(query)
    else:
        return run_search(query)

# Test both
print(answer_query("What is (23 * 47) + 199?"))
print(answer_query("Latest update on large language models in healthcare"))


{'mode': 'math', 'expression': '(23 * 47) + 199', 'answer': 1280.0, 'sources': []}
{'mode': 'search', 'answer': '- Large language models (LLMs) are being developed, implemented, and evaluated in healthcare settings to improve patient care, accelerate medical research, and optimize healthcare system efficiency.  \n- LLMs are being explored for tasks such as extraction, labeling, and interpretation in healthcare applications.  \n- Research collections and studies are showcasing innovations and insights into the use of LLMs in healthcare.', 'sources': ['https://www.cureus.com/collections/51-large-language-models-in-healthcare', 'https://www.mdpi.com/2673-7426/4/2/62', 'https://www.jmir.org/2025/1/e79379']}


In [9]:
from pydantic import BaseModel, Field, ValidationError, field_validator
from typing import List

class AgentResponse(BaseModel):
    mode: str = Field(description="math|search|error")
    answer: str
    sources: List[str] = Field(default_factory=list)

    @field_validator("mode")
    @classmethod
    def check_mode(cls, v):
        if v not in {"math", "search", "error"}:
            return "error"
        return v

def safe_return(payload: dict) -> dict:
    try:
        parsed = AgentResponse(**payload)
        return parsed.model_dump()
    except ValidationError as e:
        # Fallback so your client never breaks
        return {"mode": "error", "answer": f"Invalid payload: {e}", "sources": []}


In [10]:
def answer_query(query: str) -> dict:
    mode = decide_mode(query)
    out = run_math(query) if mode == "math" else run_search(query)
    return safe_return(out)

# test
print(answer_query("What is (23*47)+199?"))
print(answer_query("Latest updates on LLMs in healthcare"))


{'mode': 'error', 'answer': 'Invalid payload: 1 validation error for AgentResponse\nanswer\n  Input should be a valid string [type=string_type, input_value=1280.0, input_type=float]\n    For further information visit https://errors.pydantic.dev/2.12/v/string_type', 'sources': []}
{'mode': 'search', 'answer': '- LLMs have significant untapped potential in healthcare, as analyzed by Stanford.  \n- They could address critical gaps, such as helping patients interpret the severity of health issues.  \n- A study highlighted concerns about LLMs suggesting inferior treatments based on patient race.', 'sources': ['https://research.aimultiple.com/large-language-models-in-healthcare/', 'https://www.insideprecisionmedicine.com/topics/informatics/can-large-language-models-transform-healthcare/', 'https://www.forbes.com/sites/janicegassam/2025/10/27/new-healthcare-study-warns-about-the-hidden-dangers-of-ai-at-work/']}


In [11]:
import time

def with_retries(fn, *, tries=3, delay=0.6, backoff=1.8, exceptions=(Exception,), **kwargs):
    last_err = None
    for i in range(tries):
        try:
            return fn(**kwargs)
        except exceptions as e:
            last_err = e
            if i < tries - 1:
                time.sleep(delay)
                delay *= backoff
    raise last_err


In [12]:
def run_search(query: str, max_results: int = 5) -> dict:
    try:
        results = with_retries(
            tavily.search,
            tries=3,
            delay=0.6,
            backoff=1.8,
            exceptions=(Exception,),
            query=query,
            max_results=max_results,
            include_answer=False,
            include_raw_content=True,
            timeout=30,  # seconds
        )
        items = results.get("results", []) if isinstance(results, dict) else results
        # de-dup URLs and keep top 3
        seen, sources = set(), []
        for it in items:
            url = (it.get("url") or "").strip()
            if url and url not in seen:
                seen.add(url)
                sources.append(url)
            if len(sources) == 3:
                break

        snippets = []
        for it in items[:3]:
            if not isinstance(it, dict):
                continue
            title = (it.get("title") or "")[:120]
            content = (it.get("content") or "").replace("\n", " ")[:600]
            snippets.append(f"TITLE: {title}\nSNIPPET: {content}\nURL: {it.get('url','')}")

        prompt = (
            "You are a concise researcher. Using ONLY the information in the snippets below, "
            "write EXACTLY three bullet points answering the user's query. "
            "No speculation. If insufficient evidence, say so explicitly.\n\n"
            f"USER QUERY:\n{query}\n\nSNIPPETS:\n" + "\n\n---\n\n".join(snippets)
        )

        summary = llm.invoke(prompt).content.strip()
        return {"mode": "search", "answer": summary, "sources": sources}

    except Exception as e:
        return {"mode": "error", "answer": f"Search failed: {e}", "sources": []}


In [13]:
print(answer_query("Who won the 2024 Nobel Prize in Physics?"))
print(answer_query("Compare GPT style models in healthcare in 3 bullets with sources"))


{'mode': 'search', 'answer': '- The 2024 Nobel Prize in Physics was awarded jointly to John J. Hopfield and Geoffrey Hinton.  \n- The award recognized their foundational discoveries and inventions enabling machine learning with artificial neural networks.  \n- Geoffrey Hinton is a University Professor Emeritus at the University of Toronto.', 'sources': ['https://www.reuters.com/science/hopfield-hinton-win-2024-nobel-prize-physics-2024-10-08/', 'https://www.artsci.utoronto.ca/news/geoffrey-hinton-wins-2024-nobel-prize-physics', 'https://www.nobelprize.org/prizes/physics/2024/summary/']}
{'mode': 'search', 'answer': '- ChatGPT-3.5 and ChatGPT-4 were evaluated for their ability to address complex clinical and ethical dilemmas, with performance analyzed across emergency medicine, internal medicine, and ethical questions. (Source: https://pmc.ncbi.nlm.nih.gov/articles/PMC11240076/)\n\n- A comparison of ChatGPT 3.5, Claude 3.5 Sonnet, and Gem models showed varying strengths in clinical text 