In [1]:
# TASK-7.1: Import core framework classes (deferred until implemented)
# Ensure local `src` is on sys.path so notebook kernels can import the package
import sys
from pathlib import Path
src = Path("../src").resolve()
if str(src) not in sys.path:
    sys.path.insert(0, str(src))

from research_agent_framework.llm.client import MockLLM, LLMConfig
from research_agent_framework.adapters.search.mock_search import MockSearchAdapter

# Expose demo imports for following cells
__all__ = ["MockLLM", "LLMConfig", "MockSearchAdapter"]


In [2]:
# TASK-7.1: Bootstrap environment
# Use the project's bootstrap to configure logging and settings for demos
from research_agent_framework.bootstrap import bootstrap
bootstrap()


In [3]:
# TASK-7.1: sys.path fix for local imports (kept minimal)
import sys
from pathlib import Path
src = Path("../src").resolve()
if str(src) not in sys.path:
    sys.path.insert(0, str(src))


# Consolidated Research Agent Demo

This notebook demonstrates the use of the `research_agent_framework` package. Each section will be updated as new features are implemented.

---


# Consolidated Research Agent Demo Notebook

This notebook demonstrates the use of the `research_agent_framework` package and its components. Each section is marked for traceability to the corresponding PRD task.

---


In [4]:
# TASK-7.1: Bootstrap environment (safe to call multiple times in a notebook)
from research_agent_framework.bootstrap import bootstrap
bootstrap(force=False)


In [5]:
# TASK-2.3: models demo
from research_agent_framework.config import Settings, get_settings
from research_agent_framework.models import Scope, ResearchTask, EvalResult, SerpResult
from pydantic import TypeAdapter, HttpUrl
from assertpy import assert_that

# Construct model instances
scope = Scope(topic='Coffee Shops', description='Find coffee shops in SF', constraints=['no paid sources'])
task = ResearchTask(id='t-001', query='best coffee in soma')
eval_result = EvalResult(task_id=task.id, success=True, score=0.95, feedback='Looks good')
# Use TypeAdapter to validate/construct an HttpUrl (pydantic v2)
url_adapter = TypeAdapter(HttpUrl)
validated_url = url_adapter.validate_python('https://example.com')
serp = SerpResult(title='Cafe Example', url=validated_url, snippet='Great coffee', raw={'id': 1})

# Example asserts using assertpy
assert_that(scope.topic).is_equal_to('Coffee Shops')
assert_that(scope.constraints).contains('no paid sources')
assert_that(task.id).is_equal_to('t-001')
assert_that(eval_result.success).is_true()
assert_that(serp.url).is_instance_of(HttpUrl)

from rich.console import Console
from typing import cast
from research_agent_framework.logging import LoggingProtocol
s = get_settings()
c = cast(Console, s.console)
logger = cast(LoggingProtocol, s.logger)

assert_that(c).is_not_none()

# Use the bootstrap-provided console/logger for output
c.print(f"{scope=}")
c.print(f"{task=}")
c.print(f"{eval_result=}")
c.print(f"{serp=}")


In [6]:
# TASK-3: renderer example
from pydantic import TypeAdapter, HttpUrl
from research_agent_framework.prompts import renderer
from research_agent_framework.models import (
    SerpResult, Location, Address, Coordinates, Rating, PriceLevel, ProviderMeta,
)
from research_agent_framework.config import get_settings
from typing import cast
from rich.console import Console
from research_agent_framework.logging import LoggingProtocol
settings = get_settings()
c = cast(Console, settings.console)
logger = cast(LoggingProtocol, settings.logger)

# Build nested models (full demo)
coords = Coordinates(lat=37.7749, lon=-122.4194)
addr = Address(street='123 Example St', city='San Francisco', region='CA', postal_code='94103', country='US')
loc = Location(name='Cafe Nested', address=addr, coords=coords)
rating = Rating(score=4.6, count=128)
provider = ProviderMeta(provider='mock', id=42, raw={'provider_field': 'value'})
url_adapter = TypeAdapter(HttpUrl)
u = url_adapter.validate_python('https://example.com/nested')
serp_model = SerpResult(title='Nested Cafe', url=u, snippet='A nested example', raw={'id': 'nested-1'}, location=loc, rating=rating, price_level=PriceLevel.MODERATE, categories=['cafe','coffee'], provider_meta=provider)

# Render templates
clarify_context = {"messages": "User: What are the best coffee shops in SF?", "date": "2025-09-05"}
clarify_rendered = renderer.render_template("clarify_with_user_instructions.j2", clarify_context)
agent_context = {"date": "2025-09-05"}
agent_rendered = renderer.render_template("research_agent_prompt.j2", agent_context)

# Output using bootstrap-provided console (as `c`) and logger
logger.info('research_agent_prompt.j2 output:\n%s', agent_rendered)
c.print('clarify_with_user_instructions.j2 output:\n')
c.print(clarify_rendered)
c.print('Nested SerpResult:')
c.print(serp_model.model_dump())


In [7]:
# TASK-4.3: Import and use MockLLM and MockSearchAdapter for deterministic demo
from research_agent_framework.llm.client import MockLLM, LLMConfig
from research_agent_framework.adapters.search.mock_search import MockSearchAdapter
from research_agent_framework.adapters.search.schema import SerpReply
from research_agent_framework.config import get_settings
from typing import cast, List, Union, Sequence
from rich.console import Console
from research_agent_framework.logging import LoggingProtocol
from research_agent_framework.models import SerpResult
s = get_settings()
c = cast(Console, s.console)
logger = cast(LoggingProtocol, s.logger)
import asyncio

mock_config = LLMConfig(api_key="test", model="mock-model")
mock_llm = MockLLM(mock_config)
searcher = MockSearchAdapter()

# Typed helper to normalize search adapter outputs to a list-like sequence of results
def ensure_results_list(results) -> Sequence[Union[SerpResult, tuple[object, ...]]]:
    """Normalize different adapter return shapes to a uniform sequence.

    Supported shapes:
    - `SerpReply` (typed Pydantic model with `.results`)
    - legacy `list[SerpResult]`
    - an object exposing `.results` attribute
    - any iterable
    - fallback to single-item list
    """
    if isinstance(results, SerpReply):
        return results.results
    if isinstance(results, list):
        return results
    maybe = getattr(results, "results", None)
    if isinstance(maybe, list):
        return maybe
    try:
        return list(results)
    except TypeError:
        return [results]

async def demo_llm_and_search():
    prompt = "What are the best coffee shops in SF?"
    llm_out = await mock_llm.generate(prompt)
    results = await searcher.search(prompt)
    logger.info('MockLLM output: %s', llm_out)
    c.print('MockSearchAdapter results:')

    items = ensure_results_list(results)

    for r in items:
        if isinstance(r, tuple):
            title = r[0] if len(r) > 0 else ''
            url = r[1] if len(r) > 1 else ''
            snippet = r[2] if len(r) > 2 else ''
            c.print(f'- {title} ({url}) - {snippet}')
        else:
            title = getattr(r, 'title', '')
            url = getattr(r, 'url', '')
            snippet = getattr(r, 'snippet', '')
            c.print(f'- {title} ({url}) - {snippet}')

# Run the demo in a notebook-friendly way: use `await` inside Jupyter, otherwise use asyncio.run()
from IPython.core.getipython import get_ipython
if get_ipython() is not None:
    import nest_asyncio; nest_asyncio.apply()
    await demo_llm_and_search()
else:
    import asyncio
    asyncio.run(demo_llm_and_search())


In [8]:
# DEMO-6: SerpAPI adapter `from_raw` and raw-preservation demo
from research_agent_framework.adapters.search.serpapi_search import SerpAPISearchAdapter
from research_agent_framework.adapters.search.mock_search import MockSearchAdapter
from research_agent_framework.adapters.search.schema import SerpRequest
from research_agent_framework.config import get_settings
from typing import cast
from rich.console import Console
import asyncio

s = get_settings()
c = cast(Console, s.console)

async def demo_adapter():
    # SerpAPI stub via from_raw
    serp_adapter = SerpAPISearchAdapter.from_raw({'provider':'serpapi-stub'})
    reply = await serp_adapter.search(SerpRequest(query='demo', limit=1))
    c.print('SerpAPI stub reply meta provider:', reply.meta.provider)
    if len(reply.results) > 0:
        c.print('SerpAPI stub first result raw:', reply.results[0].raw)

    # Empty-limit behavior
    empty_reply = await serp_adapter.search(SerpRequest(query='demo', limit=0))
    c.print('Empty reply results length:', len(empty_reply.results))

    # Mock adapter from_raw and back-compat search
    mock = MockSearchAdapter.from_raw({'provider':'mock'})
    mock_results = await mock.search('test')
    items = getattr(mock_results, 'results', mock_results)
    if not isinstance(items, list):
        items = list(items)
    for r in items:
        c.print('mock result raw:', getattr(r, 'raw', r))

# Run the demo in a notebook-friendly way: use `await` inside Jupyter, otherwise use asyncio.run()
from IPython.core.getipython import get_ipython
if get_ipython() is not None:
    import nest_asyncio; nest_asyncio.apply()
    await demo_adapter()
else:
    import asyncio
    asyncio.run(demo_adapter())


In [9]:
# TASK-4A.3: Property-based example for MockLLM (kept as demonstration)
from research_agent_framework.llm.client import LLMConfig, MockLLM
from hypothesis import given, strategies as st
import asyncio
import pytest
from assertpy import assert_that
from research_agent_framework.config import get_settings
from typing import cast
from rich.console import Console
from research_agent_framework.logging import LoggingProtocol
s = get_settings()
c = cast(Console, s.console)
logger = cast(LoggingProtocol, s.logger)

# Example: deterministic output for random prompt/config
@pytest.mark.asyncio
@given(
    prompt=st.text(min_size=1, max_size=200),
    api_key=st.text(min_size=1, max_size=20),
    model=st.text(min_size=1, max_size=20),
)
async def demo_mockllm_property_valid(prompt, api_key, model):
    config = LLMConfig(api_key=api_key, model=model)
    client = MockLLM(config)
    result = await client.generate(prompt)
    assert_that(result).is_equal_to(f"mock response for: {prompt}")

# Run a single example for demonstration
async def run_demo():
    config = LLMConfig(api_key="demo-key", model="demo-model")
    client = MockLLM(config)
    result = await client.generate("Show me the best coffee shops in SF")
    logger.info('MockLLM property-based output: %s', result)
    c.print('MockLLM property-based output:', result)

# Run the demo in a notebook-friendly way: use `await` inside Jupyter, otherwise use asyncio.run()
from IPython.core.getipython import get_ipython
if get_ipython() is not None:
    import nest_asyncio; nest_asyncio.apply()
    await run_demo()
else:
    import asyncio
    asyncio.run(run_demo())


In [10]:
# TASK-5.1: Demonstrate ResearchAgent plan() and run() using MockLLM
from research_agent_framework.agents.base import ResearchAgent
from research_agent_framework.models import Scope
from research_agent_framework.llm.client import MockLLM, LLMConfig
from research_agent_framework.adapters.search.mock_search import MockSearchAdapter
from research_agent_framework.config import get_settings
from typing import cast
from rich.console import Console
from research_agent_framework.logging import LoggingProtocol
s = get_settings()
c = cast(Console, s.console)
logger = cast(LoggingProtocol, s.logger)
import asyncio

agent = ResearchAgent(llm_client=MockLLM(LLMConfig(api_key='demo', model='demo')),
                        search_adapter=MockSearchAdapter())

scope = Scope(topic='Coffee Shops', description='Find notable coffee shops in SF', constraints=['no paid sources'])
plans = agent.plan(scope)

c.print('Planned tasks:')
for t in plans:
    c.print('-', t.id, t.query)

async def run_first():
    res = await agent.run(plans[0])
    logger.info('Run result: %s', res)
    c.print('Run result:', res)

# Run the demo in a notebook-friendly way: use `await` inside Jupyter, otherwise use asyncio.run()
from IPython.core.getipython import get_ipython
if get_ipython() is not None:
    import nest_asyncio; nest_asyncio.apply()
    await run_first()
else:
    import asyncio
    asyncio.run(run_first())


# Notebook Status

This notebook contains runnable demos that reflect the current test suite and mock implementations in `src/research_agent_framework`.

Sections included:

- Models demonstration
- Prompt renderer example
- MockLLM + MockSearchAdapter demo
- Minimal property-based demonstration for MockLLM

All status/instruction text removed; cells are focused on runnable demos and examples.


---

This consolidated demo notebook is focused on runnable examples that match the code and tests in the repository. Use the demo cells to validate the deterministic mock implementations and renderer output.

---

In [11]:
# Demo: SimpleScorer usage (robust imports and validation)
import sys
from pathlib import Path
from typing import Optional

# Robustly find the repository root by searching for 'src/research_agent_framework'
repo_cwd = Path.cwd().resolve()
found_src: Optional[Path] = None
for candidate in [repo_cwd] + list(repo_cwd.parents):
    if (candidate / "src" / "research_agent_framework").exists():
        found_src = (candidate / "src").resolve()
        break

# Fallback to relative ../src (kept for compatibility)
if found_src is None:
    candidate = (repo_cwd / ".." / "src").resolve()
    if (candidate / "research_agent_framework").exists():
        found_src = candidate

if found_src is None:
    # Nothing we can do programmatically; show helpful message
    print("Could not locate 'src/research_agent_framework' relative to notebook cwd:", repo_cwd)
    raise SystemExit("Please ensure the notebook is running with the repo root as CWD or add src to sys.path manually.")

if str(found_src) not in sys.path:
    sys.path.insert(0, str(found_src))

from pydantic import TypeAdapter, HttpUrl
from research_agent_framework.agents.scoring import SimpleScorer
from research_agent_framework.models import SerpResult, Location, Coordinates, Rating
from research_agent_framework.config import get_settings
from typing import cast
from rich.console import Console

# use the project's console for nicer output
s = get_settings()
c = cast(Console, s.console)

scorer = SimpleScorer()
loc = Location(coords=Coordinates(lat=37.77, lon=-122.42), distance=500)
# Validate URL as HttpUrl to satisfy SerpResult typing
url_adapter = TypeAdapter(HttpUrl)
validated_url = url_adapter.validate_python("https://example.com/demo")
serp = SerpResult(title="Demo Place", url=validated_url, snippet="demo", raw={}, location=loc, rating=Rating(score=4.2, count=123), price_level=None)
res = scorer.score(serp)

c.print("Scoring result:")
c.print(res)


## Additional Demos to match test coverage

The following demo cells exercise adapters and flows that are referenced by tests but not yet shown in the notebook:

- `TavilySearchAdapter` (from_raw + raw-preservation + empty-limit)
- Supervisor / policy demo (deterministic example, guarded)
- Discovery smoke demo (guarded)
- End-to-end flow demo (run an agent through a small plan and print the results, guarded)

These cells are intentionally defensive (try/except) so the notebook remains runnable across different repo layouts.

In [12]:
# Tavily adapter demo: from_raw, normal search, empty-limit and malformed-raw preservation
try:
    from research_agent_framework.adapters.search.tavily_search import TavilySearchAdapter
    from research_agent_framework.adapters.search.schema import SerpRequest
    from research_agent_framework.config import get_settings
    from typing import cast
    from rich.console import Console

    s = get_settings()
    c = cast(Console, s.console)

    adapter = TavilySearchAdapter.from_raw({'provider': 'tavily-stub', 'url': 'not-a-url'})
    c.print('Created TavilySearchAdapter via from_raw:', adapter)

    # Normal search
    reply = await adapter.search(SerpRequest(query='espresso', limit=1))
    c.print('Tavily reply meta provider:', getattr(reply.meta, 'provider', None))
    if getattr(reply, 'results', None):
        c.print('Tavily first result raw:', reply.results[0].raw)

    # Empty-limit behavior
    empty = await adapter.search(SerpRequest(query='x', limit=0))
    c.print('Tavily empty results length:', len(getattr(empty, 'results', [])))

except Exception as e:
    print('Tavily demo: import/run failed or class missing. Error:', e)
    print('If the import path differs, adapt the import to the actual adapter module.')


In [13]:
# Supervisor demo (simplified & guarded): prefer deep_research_from_scratch state tools
try:
    from deep_research_from_scratch.state_multi_agent_supervisor import ConductResearch, ResearchComplete, SupervisorState
    from deep_research_from_scratch.multi_agent_supervisor import supervisor_agent as SupervisorAgent
    from research_agent_framework.config import get_settings
    from rich.console import Console
    from typing import cast

    s = get_settings()
    c = cast(Console, s.console)

    c.print('Supervisor state tools available (deep_research_from_scratch):', ConductResearch, ResearchComplete)
    c.print('SupervisorAgent from deep_research_from_scratch.multi_agent_supervisor:', SupervisorAgent)

    # The `@tool` decorator wraps the BaseModel into a StructuredTool-like object; do not call model_validate on the wrapper.
    # Instead, construct a tool-call dictionary that matches the supervisor's expected input shape (see tests).
    tool_call = {"name": "ConductResearch", "id": "demo-1", "args": {"research_topic": "Test coffee shops in SF"}}
    c.print('Example ConductResearch tool_call dict (use this to exercise supervisor_tools):', tool_call)

    # Example SupervisorState structure (matches tests):
    example_state: SupervisorState = {
        "supervisor_messages": [],
        "research_brief": "demo brief",
        "research_iterations": 0,
        "notes": [],
        "raw_notes": [],
    }
    c.print('Example SupervisorState (dict):', example_state)

    c.print('To run the supervisor path, tests call `supervisor_tools(state)` with a SupervisorState like the above.')

except Exception as e:
    print('Supervisor demo (simplified): import or execution failed. Error:', e)
    print('This demo prefers deep_research_from_scratch.* modules; adapt if your layout differs.')


In [14]:
# Discovery smoke demo (guarded). Tests sometimes expect discovery helpers to run without error.
try:
    import research_agent_framework.bootstrap as rb
    discover = getattr(rb, 'discover_plugins', None)
    if callable(discover):
        print('Running discover_plugins()...')
        plugins = discover()
        print('discover_plugins returned:', plugins)
    else:
        print('No discover_plugins helper; running rb.bootstrap() as a safe fallback to exercise wiring.')
        rb.bootstrap()
        print('rb.bootstrap completed (check logs for details).')

except Exception as e:
    print('Discovery smoke demo: bootstrap/discovery failed or missing. Error:', e)
    print('If tests rely on a specific discovery helper, add a small wrapper function that is safe to call from notebooks.')


No discover_plugins helper; running rb.bootstrap() as a safe fallback to exercise wiring.
rb.bootstrap completed (check logs for details).


In [15]:
# End-to-end flow demo (guarded): run a ResearchAgent through a small plan and print results.
try:
    from research_agent_framework.agents.base import ResearchAgent
    from research_agent_framework.llm.client import MockLLM, LLMConfig
    from research_agent_framework.adapters.search.mock_search import MockSearchAdapter
    from research_agent_framework.models import Scope
    from research_agent_framework.config import get_settings
    from rich.console import Console
    from typing import cast

    s = get_settings()
    c = cast(Console, s.console)
    agent = ResearchAgent(llm_client=MockLLM(LLMConfig(api_key='demo', model='demo')), search_adapter=MockSearchAdapter())
    scope = Scope(topic='Coffee Shops', description='End-to-end demo: find one coffee shop', constraints=[])
    c.print('Planning...')
    plan = agent.plan(scope)
    c.print('Plan:', plan)

    if plan:
        c.print('Running first planned task (notebooks should guard nested event loops).')
        try:
            # run in a notebook-friendly way
            if (get_ip := globals().get('get_ipython')) and get_ip() is not None:
                import nest_asyncio; nest_asyncio.apply()
                res = await agent.run(plan[0])
            else:
                import asyncio
                res = asyncio.run(agent.run(plan[0]))
            c.print('End-to-end run result:', res)
        except Exception as run_e:
            c.print('agent.run failed in demo; adapt call to your ResearchAgent API. Error:', run_e)
    else:
        c.print('Plan was empty; check ResearchAgent.plan behavior.')

except Exception as e:
    print('End-to-end demo: import/run failed. Error:', e)
    print('Adapt the import paths or function calls to match your repo layout.')


In [16]:
# Corrected Supervisor demo (uses deep_research_from_scratch.* module path which exists in this repo)
try:
    # Prefer the actual package used by tests and source files
    from deep_research_from_scratch.state_multi_agent_supervisor import ConductResearch, ResearchComplete, SupervisorState
    # Some supervisor logic lives in deep_research_from_scratch.multi_agent_supervisor; import if available
    try:
        from deep_research_from_scratch.multi_agent_supervisor import supervisor_tools
        supervisor_available = True
    except Exception:
        supervisor_tools = None
        supervisor_available = False

    from research_agent_framework.config import get_settings
    from rich.console import Console
    from typing import cast

    s = get_settings()
    c = cast(Console, s.console)

    c.print('Supervisor state tools available:', ConductResearch, ResearchComplete)
    if supervisor_available:
        c.print('supervisor_tools is importable from deep_research_from_scratch.multi_agent_supervisor')
    else:
        c.print('No supervisor_tools found at deep_research_from_scratch.multi_agent_supervisor; only state tools are available to demo.')

    # Construct an example tool_call dict rather than attempting to construct the wrapped tool object directly
    tool_call = {"name": "ConductResearch", "id": "demo-1", "args": {"research_topic": "Test coffee shops in SF"}}
    c.print('Example ConductResearch tool_call dict to feed into supervisor_tools:', tool_call)

    # Demonstrate constructing a SupervisorState-like structure matching tests (use dict form)
    example_state: SupervisorState = {
        "supervisor_messages": [],
        "research_brief": "demo brief",
        "research_iterations": 0,
        "notes": [],
        "raw_notes": [],
    }
    c.print('Example SupervisorState (dict):', example_state)

    if supervisor_available:
        c.print('You may call: await supervisor_tools(example_state) to exercise the supervisor flow (guarded).')

except Exception as e:
    print('Corrected Supervisor demo: import or execution failed. Error:', e)
    print('This demo targets the `deep_research_from_scratch` package where supervisor state is defined in this repo.')


## Adapter & SerpResult.from_raw demos

This section demonstrates the behavior covered by the test suite:

- `SerpResult.from_raw` normalization and raw-preservation (happy path).
- `SerpResult.from_raw` rejects non-dict raw payloads.
- `MockSearchAdapter` behavior for empty query (back-compat) and `SerpRequest(limit=0)` returning an empty `SerpReply`.


In [17]:
# Demo: SerpResult.from_raw happy path and error behavior
from research_agent_framework.models import SerpResult
from research_agent_framework.adapters.search.mock_search import MockSearchAdapter
from research_agent_framework.adapters.search.schema import SerpRequest, SerpReply
from pydantic import TypeAdapter, HttpUrl
from rich.console import Console
from research_agent_framework.config import get_settings
from typing import cast
s = get_settings()
c = cast(Console, s.console)

# Happy path: normalize common keys and preserve raw
raw = {'title': 'Demo Cafe', 'url': 'https://example.com/demo', 'snippet': 'Demo snippet', 'provider': 'mock', 'id': 100}
r = SerpResult.from_raw(raw)
c.print('SerpResult.from_raw created:', r.model_dump())

# Error path: passing non-dict should raise (demonstrated here; notebooks will show the exception)
try:
    # use cast to bypass static type warning in notebook analysis tools
    bad = cast(dict, 'not-a-dict')
    SerpResult.from_raw(bad)  # raises TypeError at runtime
except Exception as e:
    c.print('SerpResult.from_raw(non-dict) raised:', type(e).__name__, str(e))

# Adapter demos: empty query back-compat and SerpRequest(limit=0)
searcher = MockSearchAdapter()
# Back-compat empty string -> should return empty list or SerpReply with empty results
res = await searcher.search('')
c.print('MockSearchAdapter.search(\'\') returned type:', type(res))
if isinstance(res, list):
    c.print('Empty-list back-compat result length:', len(res))
else:
    # Normalize SerpReply or objects exposing .results
    items = getattr(res, 'results', res)
    items_list = list(items)
    c.print('Empty SerpReply meta provider:', getattr(getattr(res, 'meta', None), 'provider', None))
    c.print('Empty SerpReply results length:', len(items_list))

# SerpRequest(limit=0) -> SerpReply with empty results
reply = await searcher.search(SerpRequest(query='x', limit=0))
if isinstance(reply, SerpReply):
    c.print('SerpRequest(limit=0) reply.meta.provider:', getattr(reply.meta, 'provider', None))
    c.print('SerpRequest(limit=0) results length:', len(list(reply.results)))
else:
    items = getattr(reply, 'results', reply)
    c.print('SerpRequest(limit=0) results length (fallback):', len(list(items)))
