# © Artur Czarnecki. All rights reserved.
# Intergrax framework – proprietary and confidential.
# Use, modification, or distribution without written permission is prohibited.

# 04_websearch_context_demo.ipynb

This notebook demonstrates how to use **RuntimeEngine** with:

- session-based chat,
- optional RAG (attachments ingested into a vector store),
- **live web search** via `WebSearchExecutor`,

to achieve a "ChatGPT-like" experience with browsing.

The core configuration (LLM adapter, embeddings, vector store, runtime config)
is initialized in a single cell, while the rest of the notebook focuses on
testing and inspecting the web search behavior.


In [None]:
# 1. Imports and environment setup
# --------------------------------

import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..")))

import os
from dotenv import load_dotenv

# Drop-in knowledge runtime pieces
from intergrax.runtime.drop_in_knowledge_mode.engine.runtime import RuntimeEngine
from intergrax.runtime.drop_in_knowledge_mode.config import RuntimeConfig

# Web search integration
from intergrax.websearch.service.websearch_executor import WebSearchExecutor
from intergrax.websearch.providers.google_cse_provider import GoogleCSEProvider
# from intergrax.websearch.providers.bing_provider import BingWebProvider  # optional


# 1.1 Load environment variables (API keys, etc.)
load_dotenv()

# Make sure the env vars for web search are available.
# If you want to use Bing as well, uncomment the Bing provider later.
os.environ["GOOGLE_CSE_API_KEY"] = os.getenv("GOOGLE_CSE_API_KEY") or ""
os.environ["GOOGLE_CSE_CX"] = os.getenv("GOOGLE_CSE_CX") or ""
os.environ["BING_SEARCH_V7_API_KEY"] = os.getenv("BING_SEARCH_V7_API_KEY") or ""





# Core runtime configuration (web search)

In [None]:
# 2. Core runtime configuration (web search)
# -----------------------------------------------------------------------------

# 2.1 Session store – simple in-memory storage for chat messages & metadata.
from intergrax.llm_adapters.llm_provider import LLMProvider
from intergrax.llm_adapters.llm_provider_registry import LLMAdapterRegistry
from intergrax.runtime.drop_in_knowledge_mode.engine.runtime_context import RuntimeContext
from intergrax.runtime.drop_in_knowledge_mode.session.in_memory_session_storage import InMemorySessionStorage
from intergrax.runtime.drop_in_knowledge_mode.session.session_manager import SessionManager

llm_adapter = LLMAdapterRegistry.create(LLMProvider.OLLAMA)

session_manager = SessionManager(
    storage=InMemorySessionStorage()
)


# Web search executor – wraps one or more providers (Google CSE, Bing, etc.)
websearch_executor = WebSearchExecutor(
    providers=[
        GoogleCSEProvider(),
    ],
    max_text_chars=None,
)

# RuntimeConfig – single source of truth for drop-in knowledge runtime.
config = RuntimeConfig(
    # LLM & embeddings & vector store
    llm_adapter=llm_adapter,

    # RAG settings
    enable_rag=False,

    # Web search settings – THIS is the feature under test in this notebook.
    enable_websearch=True,
    websearch_executor=websearch_executor,
    max_docs_per_query=4,  # how many docs we inject into the system prompt

    # Tools / memory – kept off here to isolate the web search behavior.
    tools_mode="off",
    enable_user_profile_memory=True,
)

context = RuntimeContext(
    config=config,
    session_manager=session_manager,
)

# 2.7 Drop-in knowledge runtime – chat engine with RAG + web search.
runtime = RuntimeEngine(context=context)
print(runtime)

<intergrax.runtime.drop_in_knowledge_mode.engine.runtime.DropInKnowledgeRuntime object at 0x000002550055CB30>


### 3. Create a fresh chat session for this web search demo

We create a brand new session in the in-memory store.
All questions in this notebook will be sent under the same `session_id`
so that the runtime can keep short-term chat history (like ChatGPT).

Helper: `ask(question: str)` for interactive testing

This helper:
1. Builds a `RuntimeRequest` bound to the current session.
2. Sends it through `RuntimeEngine.ask(...)`.
3. Prints:
   - The user question
   - The model answer (truncated optionally)
   - Routing info (was RAG used? was web search used?)
   - Basic debug info (web search, RAG).

In [None]:
import json
import uuid

from intergrax.runtime.drop_in_knowledge_mode.prompts.websearch_prompt_builder import DefaultWebSearchPromptBuilder
from intergrax.runtime.drop_in_knowledge_mode.responses.response_schema import RuntimeAnswer, RuntimeRequest
from intergrax.runtime.drop_in_knowledge_mode.tracing.trace_models import TraceComponent
from intergrax.websearch.service.websearch_config import WebSearchConfig, WebSearchStrategyType
from intergrax.websearch.service.websearch_context_generator import create_websearch_context_generator

# 3.1 Create a fresh session for this demo.
session = await session_manager.create_session(
    session_id=str(uuid.uuid4()),
    user_id="demo-user-websearch",
    tenant_id="demo-tenant",
    workspace_id="demo-workspace",
    metadata={"notebook": "04_websearch_drop_in_web_demo"},
)

print("Session created:")
print(f"- id: {session.id}")
print(f"- user_id: {session.user_id}")
print(f"- tenant_id: {session.tenant_id}")
print(f"- workspace_id: {session.workspace_id}")


async def ask(question: str, *, show_full_answer: bool = True) -> RuntimeAnswer:
    """
    Convenience helper for this notebook.

    - Sends the user's question through RuntimeEngine.
    - Prints the answer and basic routing/debug information.
    """
    print("\n" + "=" * 80)
    print(f"USER QUESTION:\n{question}")
    print("=" * 80)

    request = RuntimeRequest(
        session_id=session.id,
        user_id=session.user_id,
        tenant_id=session.tenant_id,
        workspace_id=session.workspace_id,
        message=question,
        attachments=[],          # In this notebook we focus on web search (no files)
        metadata={"source": "04_websearch_demo"},
    )

    answer = await runtime.run(request)

    # 1) Print assistant answer
    print("\nASSISTANT ANSWER:\n")
    if show_full_answer:
        print(answer.answer)
    else:
        print(answer.answer[:700] + ("..." if len(answer.answer) > 700 else ""))

    # 2) Routing information (did RAG / websearch fire?)
    print("\n--- ROUTING INFO ---")
    print(f"strategy:           {answer.route.strategy}")
    print(f"used_rag:           {answer.route.used_rag}")
    print(f"used_websearch:     {answer.route.used_websearch}")
    print(f"used_tools:         {answer.route.used_tools}")
    print(f"used_user_profile:  {answer.route.used_user_profile}")

    # 3) Debug trace: show only relevant slices (websearch / rag)
    
    events = answer.trace_events or []

    print("\n--- TRACE: RAG ---")
    rag_events = [e for e in events if e.component == TraceComponent.RAG]
    if rag_events:
        for e in rag_events:
            print(json.dumps(e.payload or {}, indent=2, ensure_ascii=False))
    else:
        print("(no RAG trace events)")

    print("\n--- TRACE: WEBSEARCH ---")
    web_events = [e for e in events if e.component == TraceComponent.WEBSEARCH]
    if web_events:
        for e in web_events:
            print(json.dumps(e.payload or {}, indent=2, ensure_ascii=False))
    else:
        print("(no websearch trace events)")

    return answer


web_q = (
    "What are the most recent major changes to the OpenAI API regarding the Responses API "
    "and tool calling? Provide a concise technical summary with the date of the change."
)

# Enable websearch for runtime and choose strategy for context generation.
config.websearch_config = WebSearchConfig(
    strategy=WebSearchStrategyType.MAP_REDUCE
)
config.websearch_config.llm.map_adapter = config.llm_adapter
config.websearch_config.llm.reduce_adapter = config.llm_adapter

# Run full RuntimeEngine pipeline (this is the real test).
_ = await ask(web_q, show_full_answer=True)


Session created:
- id: 9e5c1c73-b9cc-4be4-915e-bb8872f978de
- user_id: demo-user-websearch
- tenant_id: demo-tenant
- workspace_id: demo-workspace

USER QUESTION:
What are the most recent major changes to the OpenAI API regarding the Responses API and tool calling? Provide a concise technical summary with the date of the change.

ASSISTANT ANSWER:

According to the OpenAI API documentation, here are some recent major changes:

1. **Improved Response Formatting** (February 2022): The Responses API now returns more detailed response objects, including metadata about the generated text.
2. **Async Support for Text Generation** (April 2022): The Completions API now supports asynchronous responses for text generation requests, allowing for improved performance and scalability.
3. **New `max_tokens` Parameter** (May 2022): A new parameter was introduced to control the maximum number of tokens in generated text, improving flexibility and reducing potential security risks.
4. **Enhanced Error 