# © Artur Czarnecki. All rights reserved.
# Intergrax framework – proprietary and confidential.
# Use, modification, or distribution without written permission is prohibited.

# Notebook 11 — ChatGPT-like E2E (behavior-level)

This notebook is an **integration / behavior** test that exercises the nexus Runtime end-to-end, in a way that resembles a real ChatGPT usage pattern.

## What we test (behavior)
- Multi-session behavior (A/B/C) with **isolated session history** and **shared user LTM**
- User LTM persistence + recall across sessions
- Session-level consolidation (history → summary) without cross-session leakage
- RAG ingestion + Q&A over a document
- Websearch as a context layer that affects the final answer
- Tools execution (tool + LLM) without breaking the **user-last invariant**
- Reasoning enabled for observability, but **not persisted into user-visible history**

## What we do NOT test (intentionally out of scope for this notebook)
- Retry / fallback logic for empty LLM outputs
- Formal “adapter contract” beyond `generate_messages(...) -> str`
- Tools contract v1 (final answer vs context-only)
- Prompt tuning, quality scoring, reranking
- Debug refactors (e.g., removing getattr) unless strictly required to run the notebook

## Hard invariants
- **User-last invariant:** the last message sent to the core LLM must always be `role="user"`
- No empty assistant answers in the final output
- No memory leakage between sessions (history isolation)

## Requirements
This notebook expects your local environment to provide:
- Intergrax sources on PYTHONPATH
- LLM credentials (e.g., `OPENAI_API_KEY`) if using OpenAI adapters
- Vectorstore backend deps (Chroma/Qdrant) if enabling RAG/LTM vector retrieval


In [1]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..")))

In [2]:
from dotenv import load_dotenv

load_dotenv()

os.environ["GOOGLE_CSE_API_KEY"] = os.getenv("GOOGLE_CSE_API_KEY") or ""

In [None]:
from intergrax.runtime.nexus.responses.response_schema import RuntimeAnswer
from intergrax.runtime.nexus.tracing.trace_query import TraceQuery
from intergrax.runtime.nexus.tracing.adapters.llm_usage_snapshot import LLMUsageSnapshotDiag
from intergrax.runtime.nexus.tracing.adapters.llm_usage_finalize import LLMUsageFinalizeDiag


def get_llm_usage_snapshot(answer: RuntimeAnswer) -> LLMUsageSnapshotDiag:
    q = TraceQuery.from_iter(answer.trace_events)
    snap = q.first_payload(LLMUsageSnapshotDiag)
    assert snap is not None, "Missing LLMUsageSnapshotDiag in trace_events."
    return snap


def get_llm_usage_finalize(answer: RuntimeAnswer) -> LLMUsageFinalizeDiag:
    q = TraceQuery.from_iter(answer.trace_events)
    fin = q.first_payload(LLMUsageFinalizeDiag)
    assert fin is not None, "Missing LLMUsageFinalizeDiag in trace_events."
    return fin


def assert_llm_usage_basic(answer: RuntimeAnswer) -> LLMUsageSnapshotDiag:
    snap = get_llm_usage_snapshot(answer)

    assert snap.calls >= 1, f"Expected calls >= 1, got {snap.to_dict()}"
    assert snap.total_tokens > 0, f"Expected total_tokens > 0, got {snap.to_dict()}"
    assert snap.total_tokens == snap.input_tokens + snap.output_tokens, (
        f"Expected total_tokens == input_tokens + output_tokens, got {snap.to_dict()}"
    )
    assert snap.errors >= 0, f"Expected errors >= 0, got {snap.to_dict()}"

    return snap


In [None]:
import os
from datetime import datetime
from pathlib import Path
import uuid

from intergrax.llm_adapters.llm_provider import LLMProvider
from intergrax.llm_adapters.llm_provider_registry import LLMAdapterRegistry
from intergrax.runtime.nexus.config import RuntimeConfig
from intergrax.runtime.nexus.engine.runtime import RuntimeEngine
from intergrax.runtime.nexus.engine.runtime_context import RuntimeContext
from intergrax.runtime.nexus.session.in_memory_session_storage import InMemorySessionStorage
from intergrax.runtime.nexus.session.session_manager import SessionManager
from intergrax.memory.user_profile_manager import UserProfileManager
from intergrax.memory.stores.in_memory_user_profile_store import InMemoryUserProfileStore
from intergrax.rag.embedding_manager import EmbeddingManager
from intergrax.rag.vectorstore_manager import VSConfig, VectorstoreManager
from intergrax.runtime.user_profile.session_memory_consolidation_service import SessionMemoryConsolidationService
from intergrax.runtime.user_profile.user_profile_instructions_service import UserProfileInstructionsService
from intergrax.utils.time_provider import SystemTimeProvider
from intergrax.websearch.providers.google_cse_provider import GoogleCSEProvider
from intergrax.websearch.service.websearch_executor import WebSearchExecutor

# =====================================================================
# Global test identifiers / paths (no tests executed in this cell)
# =====================================================================

USER_ID = "user_chatgpt_like_001"

SESSION_A = "sess_chatgpt_like_A"
SESSION_B = "sess_chatgpt_like_B"
SESSION_C = "sess_chatgpt_like_C"

RUN_ID = SystemTimeProvider.utc_now().strftime("%Y%m%d_%H%M%S") + "_" + uuid.uuid4().hex[:8]

BASE_DIR = Path(os.getcwd()).resolve()
ARTIFACTS_DIR = BASE_DIR / "_artifacts" / "notebook_11_chatgpt_like" / RUN_ID
ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)

# Separate vectorstore collections (1 instance = 1 collection_name)
# - RAG_DOCS: document ingestion/retrieval
# - USER_LTM: user long-term memory retrieval
RAG_DIR = ARTIFACTS_DIR / "vs_rag_docs"
LTM_DIR = ARTIFACTS_DIR / "vs_user_ltm"
RAG_DIR.mkdir(parents=True, exist_ok=True)
LTM_DIR.mkdir(parents=True, exist_ok=True)

# ---------------------------------------------------------------------
# LLM adapter (real adapter, no wrappers)
# - assumes env is configured (OPENAI_API_KEY etc.)
# ---------------------------------------------------------------------
llm_adapter = LLMAdapterRegistry.create(LLMProvider.OLLAMA)

# ---------------------------------------------------------------------
# Embeddings + vectorstore (real managers)
# Pick providers you actually use in your repo/env.
# ---------------------------------------------------------------------
embed_manager = EmbeddingManager(
    provider="ollama",
)

rag_vs = VectorstoreManager(
    config=VSConfig(
        provider="chroma",
        collection_name=f"rag_docs_{RUN_ID}",
        chroma_persist_directory=str(RAG_DIR),
    )
)

# User LTM vectorstore
ltm_vs = VectorstoreManager(
    VSConfig(
        provider="chroma",
        collection_name=f"user_ltm_{RUN_ID}",
        chroma_persist_directory=str(LTM_DIR),
    )
)

# ---------------------------------------------------------------------
# Stores
# ---------------------------------------------------------------------
session_store = InMemorySessionStorage()
user_profile_store = InMemoryUserProfileStore()

# ---------------------------------------------------------------------
# Managers
# ---------------------------------------------------------------------
user_profile_manager = UserProfileManager(
    store=user_profile_store,
    embedding_manager=embed_manager,
    vectorstore_manager=ltm_vs,
)

user_profile_instructions_service = UserProfileInstructionsService(
    llm=llm_adapter,
    manager=user_profile_manager,
)

session_memory_consolidation_service = SessionMemoryConsolidationService(
    llm=llm_adapter,
    profile_manager=user_profile_manager,
    instructions_service=user_profile_instructions_service,
)

session_manager = SessionManager(
    storage=session_store,
    user_profile_manager=user_profile_manager,
    session_memory_consolidation_service=session_memory_consolidation_service
)

websearch_executor = WebSearchExecutor(
    providers=[
        GoogleCSEProvider(),
    ],
    max_text_chars=None,
)

# ---------------------------------------------------------------------
# Base Runtime config (DO NOT mutate this object in build_runtime)
# ---------------------------------------------------------------------
base_config = RuntimeConfig(
    llm_adapter=llm_adapter,
    embedding_manager=embed_manager,
    vectorstore_manager=rag_vs,
    websearch_executor=websearch_executor,
    enable_user_profile_memory=True,
    enable_org_profile_memory=False,
    enable_user_longterm_memory=True,
    enable_rag=True,
    enable_websearch=True,
    tools_mode="off",
)

def _clone_runtime_config(cfg: RuntimeConfig) -> RuntimeConfig:
    """
    Create a shallow clone of RuntimeConfig.
    Supports either dataclass-like configs or plain attribute containers.
    """
    if hasattr(cfg, "copy") and callable(getattr(cfg, "copy")):
        try:
            return cfg.copy()
        except TypeError:
            pass

    data = dict(cfg.__dict__)
    return RuntimeConfig(**data)

# =====================================================================
# Runtime factory
# =====================================================================
def build_runtime(*, override_config: dict | None = None, **runtime_kwargs) -> RuntimeEngine:
    """
    Build a RuntimeEngine instance.
    - override_config: dict of RuntimeConfig fields to override (shallow).
    - runtime_kwargs: runtime init kwargs (e.g., ingestion_service, context_builder, prompt builders).
    """
    cfg = _clone_runtime_config(base_config)

    if override_config:
        for k, v in override_config.items():
            setattr(cfg, k, v)

    context = RuntimeContext(
        config=cfg,
        session_manager=session_manager,
        ingestion_service=runtime_kwargs.get("ingestion_service"),
        context_builder=runtime_kwargs.get("context_builder"),
        rag_prompt_builder=runtime_kwargs.get("rag_prompt_builder"),
        websearch_prompt_builder=runtime_kwargs.get("websearch_prompt_builder"),
        history_prompt_builder=runtime_kwargs.get("history_prompt_builder"),
    )

    return RuntimeEngine(context=context)

print("BOOTSTRAP OK")
print("RUN_ID:", RUN_ID)
print("ARTIFACTS_DIR:", str(ARTIFACTS_DIR))
print("USER_ID:", USER_ID)
print("SESSION_A:", SESSION_A)
print("SESSION_B:", SESSION_B)
print("SESSION_C:", SESSION_C)





  RUN_ID = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + "_" + uuid.uuid4().hex[:8]


[intergraxVectorstoreManager] Initialized provider=chroma, collection=rag_docs_20251226_091323_5de869a1
[intergraxVectorstoreManager] Existing count: 0
[intergraxVectorstoreManager] Initialized provider=chroma, collection=user_ltm_20251226_091323_5de869a1
[intergraxVectorstoreManager] Existing count: 0
BOOTSTRAP OK
RUN_ID: 20251226_091323_5de869a1
ARTIFACTS_DIR: D:\Projekty\intergrax\notebooks\nexus\_artifacts\notebook_11_chatgpt_like\20251226_091323_5de869a1
USER_ID: user_chatgpt_like_001
SESSION_A: sess_chatgpt_like_A
SESSION_B: sess_chatgpt_like_B
SESSION_C: sess_chatgpt_like_C


## Cell 2 — Session A: onboarding + LTM write

Goal:
- Simulate a real onboarding turn (user introduces themselves and preferences).
- Run the full runtime pipeline via `runtime.run(...)`.
- Close the session to trigger **session → LTM consolidation**.
- Minimal asserts:
  1) Assistant answer is non-empty
  2) Session history is persisted
  3) User LTM contains at least one entry after closing the session


In [5]:
from intergrax.runtime.nexus.responses.response_schema import RuntimeRequest

# ---------------------------------------------------------------------
# Build runtime (no special overrides yet)
# ---------------------------------------------------------------------
runtime = build_runtime()

# ---------------------------------------------------------------------
# Session A — onboarding message (behaves like real ChatGPT)
# ---------------------------------------------------------------------
onboarding_message = (
    "Hi. I am Artur. I build Integrax and Mooff. "
    "I prefer concise, technical answers. Never use emojis in code or technical docs. "
    "Please remember this for future sessions."
)

request_a = RuntimeRequest(
    user_id=USER_ID,
    session_id=SESSION_A,
    message=onboarding_message,
)

answer_a = await runtime.run(request_a)

# ---------------------------------------------------------------------
# Minimal assert #1: non-empty assistant answer
# ---------------------------------------------------------------------
assert isinstance(answer_a.answer, str) and answer_a.answer.strip(), "Empty assistant answer in Session A."

# ---------------------------------------------------------------------
# Minimal assert #2: session history persisted (via SessionManager storage)
# ---------------------------------------------------------------------
history_a = await session_manager.get_history(session_id=SESSION_A)
assert len(history_a) >= 2, f"Expected >=2 messages in history, got {len(history_a)}."

# ---------------------------------------------------------------------
# Trigger consolidation to LTM by closing the session
# (This is the behavior boundary: Session -> LTM)
# ---------------------------------------------------------------------
await session_manager.close_session(session_id=SESSION_A)

# ---------------------------------------------------------------------
# Minimal assert #3: LTM entries were created (semantic recall evidence)
# We do a vector search against the user's LTM store.
# ---------------------------------------------------------------------
ltm_search = await user_profile_manager.search_longterm_memory(
        user_id=USER_ID,
        query="Artur Integrax Mooff preferences concise technical no emojis",
        top_k=5,
        score_threshold=0.0,
    )

assert ltm_search.get("used_longterm") is True, "Expected long-term memory retrieval to be enabled."
assert (ltm_search.get("debug") or {}).get("hits_count", 0) > 0, "Expected at least one LTM entry after closing Session A."

print("SESSION A OK")
print("Answer length:", len(answer_a.answer))
print("History messages:", len(history_a))
print("LTM hits_count:", ltm_search.get("hits_count"))
print("LTM debug:", ltm_search.get("debug"))
print("LTM hits:", len(ltm_search.get("hits") or []))


[intergraxVectorstoreManager] Upserting 1 items (dim=1536) to provider=chroma...
[intergraxVectorstoreManager] Upsert complete. New count: 1
[intergraxVectorstoreManager] Upserting 1 items (dim=1536) to provider=chroma...
[intergraxVectorstoreManager] Upsert complete. New count: 2
[intergraxVectorstoreManager] Upserting 1 items (dim=1536) to provider=chroma...
[intergraxVectorstoreManager] Upsert complete. New count: 3
[intergraxVectorstoreManager] Upserting 1 items (dim=1536) to provider=chroma...
[intergraxVectorstoreManager] Upsert complete. New count: 4
SESSION A OK
Answer length: 335
History messages: 2
LTM hits_count: None
LTM debug: {'enabled': True, 'used': True, 'reason': 'hits', 'where': {'user_id': 'user_chatgpt_like_001', 'deleted': 0}, 'top_k': 5, 'threshold': 0.0, 'raw_ids': ['8ae1799e8a50424ebaa502498b1b51b8', '6662c8be36c84eb3bb60790ba15e4924', '5f18da4f7b8e4548bdaffe0ca684855a', '3a9d6f3525cc4819aa8cdfab88560317'], 'raw_scores': [0.23801147937774658, -0.192785859107971

## Cell 3 — Session B: recall (ChatGPT behavior)

Goal:
- Start a new session_id (fresh history).
- Ask the system to recall facts from Session A using User LTM.
- Minimal asserts:
  1) Non-empty assistant answer
  2) trace shows User LTM was used
  3) Answer contains recalled facts/preferences
  4) Session B history is isolated from Session A


In [None]:
from intergrax.runtime.nexus.responses.response_schema import RuntimeRequest
from intergrax.runtime.nexus.tracing.memory.user_longterm_memory_summary import UserLongtermMemorySummaryDiagV1
from intergrax.runtime.nexus.tracing.trace_query import TraceQuery

runtime = build_runtime()

recall_prompt = (
    "Before we continue: remind me who I am and what I build. "
    "Also remind me what answer style I prefer."
)

request_b = RuntimeRequest(
    user_id=USER_ID,
    session_id=SESSION_B,
    message=recall_prompt,
)

answer_b = await runtime.run(request_b)

# 1) Non-empty answer
assert isinstance(answer_b.answer, str) and answer_b.answer.strip(), "Empty assistant answer in Session B."

# 1b) LLM usage must be present and non-empty
assert_llm_usage_basic(answer_b, require_adapters=["core_adapter"])

# 2) Debug evidence: User LTM used
q = TraceQuery.from_iter(answer_b.trace_events)

ltm = q.one_payload(UserLongtermMemorySummaryDiagV1)

# NOTE:
# This assertion is valid only if earlier cells have already persisted something to user LTM.
# If you run this cell standalone, it may fail.
assert ltm.enabled, f"Expected LTM enabled in Session B. diag={ltm.to_dict()}"
assert ltm.used_user_longterm_memory, f"Expected LTM to be used in Session B. diag={ltm.to_dict()}"
assert ltm.hits_count > 0, f"Expected LTM hits_count > 0 in Session B. diag={ltm.to_dict()}"

# 3) Behavior evidence: answer contains recalled facts/preferences
ans_norm = answer_b.answer.lower()
expected_any = [
    "artur",
    "intergrax",
    "mooff",
    "concise",
    "technical",
    "never use emojis",
    "no emojis",
]
assert any(k in ans_norm for k in expected_any), (
    "Expected the answer to include recalled facts/preferences from Session A. "
    f"Answer was:\n{answer_b.answer}"
)

# 4) Session history isolation sanity check
history_b = await session_manager.get_history(session_id=SESSION_B)
assert len(history_b) >= 2, f"Expected >=2 messages in Session B history, got {len(history_b)}."

# First user message in Session B should be the recall prompt (not Session A onboarding)
first_user_b = next((m for m in history_b if getattr(m, "role", None) == "user"), None)
assert first_user_b is not None, "Expected a user message in Session B history."
assert recall_prompt.strip() in (first_user_b.content or ""), "Session B history isolation issue."

print("SESSION B OK")
print("Answer length:", len(answer_b.answer))
print("LTM debug:", ltm_dbg)
print("History messages:", len(history_b))


SESSION B OK
Answer length: 294
LTM debug: {'enabled': True, 'used': True, 'reason': 'hits', 'where': {'user_id': 'user_chatgpt_like_001', 'deleted': 0}, 'top_k': 8, 'threshold': None, 'raw_ids': ['5f18da4f7b8e4548bdaffe0ca684855a', '3a9d6f3525cc4819aa8cdfab88560317', '8ae1799e8a50424ebaa502498b1b51b8', '6662c8be36c84eb3bb60790ba15e4924'], 'raw_scores': [-0.2620260715484619, -0.44957518577575684, -0.49666833877563477, -0.5566085577011108], 'raw_metadatas': [{'entry_id': '5f18da4f7b8e4548bdaffe0ca684855a', 'user_id': 'user_chatgpt_like_001', 'kind': 'session_summary', 'source': 'session_consolidation', 'deleted': 0, 'tags': 'session_summary'}, {'entry_id': '3a9d6f3525cc4819aa8cdfab88560317', 'deleted': 0, 'kind': 'preference', 'user_id': 'user_chatgpt_like_001', 'source': 'session_consolidation', 'tags': 'communication,tone'}, {'deleted': 0, 'tags': 'user,project', 'kind': 'user_fact', 'user_id': 'user_chatgpt_like_001', 'source': 'session_consolidation', 'entry_id': '8ae1799e8a50424eba

## Cell 4 — Ingestion + RAG (document Q&A)

Goal:
- Ingest a real document into the **RAG vectorstore** via the runtime attachment ingestion flow.
- Ask a question that can be answered only from the document.
- Ask a second question that requires **RAG + User LTM** at the same time.

Minimal asserts:
1) Ingestion step completed (debug evidence)
2) RAG was used (debug evidence: rag.used or rag_chunks > 0)
3) Answer is non-empty and includes document facts
4) For the combined question: both RAG and LTM were used


In [None]:
from intergrax.runtime.nexus.responses.response_schema import RuntimeRequest
from intergrax.llm.messages import AttachmentRef
from intergrax.runtime.nexus.ingestion.attachments import FileSystemAttachmentResolver
from intergrax.runtime.nexus.ingestion.ingestion_service import AttachmentIngestionService
from intergrax.runtime.nexus.tracing.attachments.attachments_context_summary import AttachmentsContextSummaryDiagV1
from intergrax.runtime.nexus.tracing.memory.user_longterm_memory_summary import UserLongtermMemorySummaryDiagV1
from intergrax.runtime.nexus.tracing.rag.rag_summary import RagSummaryDiagV1
from intergrax.runtime.nexus.tracing.trace_query import TraceQuery

# ---------------------------------------------------------------------
# Create a real document file to ingest
# ---------------------------------------------------------------------
DOC_SESSION = "sess_chatgpt_like_RAG"

doc_path = ARTIFACTS_DIR / "rag_doc_001.md"
doc_text = """# Integrax — RAG Demo Document

This document is used for an E2E test of nexus Runtime.

Key modules:
- nexus Runtime
- User Long-Term Memory (LTM)
- Websearch

Important constants:
- The default max entries per LTM query is 8.
- The project codename for the demo is "NEBULA-11".

Behavior requirement:
- Answers must be concise and technical.
"""
doc_path.write_text(doc_text, encoding="utf-8")

attachment = AttachmentRef(
    id="rag_doc_001",
    type="md",
    uri=doc_path.as_uri(),          # raw path is supported by FileSystemAttachmentResolver
    metadata={"label": "RAG Demo Document"}
)

# ---------------------------------------------------------------------
# Build ingestion service (indexes into rag_vs)
# ---------------------------------------------------------------------
resolver = FileSystemAttachmentResolver()

ingestion_service = AttachmentIngestionService(
    resolver=resolver,
    embedding_manager=embed_manager,
    vectorstore_manager=rag_vs,     # IMPORTANT: documents go to RAG vectorstore
)

runtime = build_runtime(ingestion_service=ingestion_service)

# ---------------------------------------------------------------------
# Turn 1: Upload + ingestion
# ---------------------------------------------------------------------
request_ingest = RuntimeRequest(
    user_id=USER_ID,
    session_id=DOC_SESSION,
    message="I uploaded a document. Please ingest it and confirm.",
    attachments=[attachment],
)

answer_ingest = await runtime.run(request_ingest)

assert isinstance(answer_ingest.answer, str) and answer_ingest.answer.strip(), "Empty assistant answer after ingestion."

# LLM usage must be present and non-empty
assert_llm_usage_basic(answer_ingest, require_adapters=["core_adapter"])

q = TraceQuery.from_iter(answer_ingest.trace_events)

# per-attachment results
results = q.all_payloads(AttachmentsContextSummaryDiagV1)
assert len(results) > 0, "Expected at least one AttachmentsContextSummaryDiagV1 payload."

# summary (optional but usually present)
summary = q.first_payload(AttachmentsContextSummaryDiagV1)
assert summary is not None, "Expected AttachmentsContextSummaryDiagV1 payload."

print("INGEST OK")
print("Ingestion summary:", summary.to_dict())
print("Ingestion results:")
for r in results:
    print("-", r.to_dict())
# ---------------------------------------------------------------------
# Turn 2: Pure document Q&A (RAG must be used)
# ---------------------------------------------------------------------
q_doc = "In the uploaded document: what is the demo codename and what are the three key modules?"

request_doc_qa = RuntimeRequest(
    user_id=USER_ID,
    session_id=DOC_SESSION,
    message=q_doc,
)

answer_doc = await runtime.run(request_doc_qa)

assert isinstance(answer_doc.answer, str) and answer_doc.answer.strip(), "Empty assistant answer in document Q&A."

# LLM usage must be present and non-empty
assert_llm_usage_basic(answer_doc, require_adapters=["core_adapter"])

q = TraceQuery.from_iter(answer_doc.trace_events)

rag = q.one_payload(RagSummaryDiagV1)
assert rag.rag_enabled, f"Expected RAG enabled. diag={rag.to_dict()}"
assert rag.used_rag, f"Expected RAG to be used. diag={rag.to_dict()}"
assert rag.chunks_count > 0, f"Expected chunks_count > 0. diag={rag.to_dict()}"

ans_doc_norm = answer_doc.answer.lower()
assert "nebula-11" in ans_doc_norm, "Expected the answer to include the codename from the document."
assert ("drop-in" in ans_doc_norm) or ("long-term" in ans_doc_norm) or ("websearch" in ans_doc_norm), (
    "Expected the answer to include at least one key module from the document."
)

print("DOC QA OK")
print("Answer length:", len(answer_doc.answer))
print("RAG chunks:", rag.chunks_count)

# ---------------------------------------------------------------------
# Turn 3: Combined question (RAG + LTM)
# - Must use RAG (document facts) and LTM (user preference / identity).
# ---------------------------------------------------------------------
q_combined = (
    "Using the uploaded document AND what you remember about me: "
    "write a concise technical answer that (1) states who I am and what I build, "
    "(2) lists the document's key modules, and (3) includes the codename."
)

request_combined = RuntimeRequest(
    user_id=USER_ID,
    session_id=DOC_SESSION,
    message=q_combined,
)

answer_combined = await runtime.run(request_combined)

assert isinstance(answer_combined.answer, str) and answer_combined.answer.strip(), "Empty assistant answer in combined RAG+LTM question."

# LLM usage must be present and non-empty
assert_llm_usage_basic(answer_combined, require_adapters=["core_adapter"])

q = TraceQuery.from_iter(answer_combined.trace_events)

# --- RAG evidence (typed) ---
rag = q.one_payload(RagSummaryDiagV1)
assert rag.rag_enabled, f"Expected RAG enabled. diag={rag.to_dict()}"
assert rag.used_rag, f"Expected RAG to be used in combined question. diag={rag.to_dict()}"
assert rag.chunks_count > 0, f"Expected chunks_count > 0. diag={rag.to_dict()}"

# --- LTM evidence (typed) ---
ltm = q.one_payload(UserLongtermMemorySummaryDiagV1)
assert ltm.enabled, f"Expected LTM enabled. diag={ltm.to_dict()}"
assert ltm.used_user_longterm_memory, f"Expected LTM to be used in combined question. diag={ltm.to_dict()}"
assert ltm.hits_count > 0, f"Expected LTM hits_count > 0. diag={ltm.to_dict()}"
assert ltm.context_blocks_count > 0, f"Expected LTM context_blocks_count > 0. diag={ltm.to_dict()}"
assert ltm.context_preview.strip(), f"Expected non-empty LTM context_preview. diag={ltm.to_dict()}"

# --- Behavior evidence: contains memory facts + doc codename ---
ans2 = answer_combined.answer.lower()
assert "artur" in ans2, "Expected the combined answer to include 'Artur' from LTM."
assert ("intergrax" in ans2) or ("mooff" in ans2), "Expected the combined answer to include Integrax/Mooff from LTM."
assert "nebula-11" in ans2, "Expected the combined answer to include the document codename."

print("COMBINED RAG+LTM OK")
print("Answer length:", len(answer_combined.answer))
print("RAG chunks:", rag.chunks_count)
print("LTM hits:", ltm.hits_count)
print("LTM blocks:", ltm.context_blocks_count)


[intergraxVectorstoreManager] Upserting 1 items (dim=1536) to provider=chroma...
[intergraxVectorstoreManager] Upsert complete. New count: 1
INGEST OK
Ingestion debug: [{'attachment_id': 'rag_doc_001', 'attachment_type': 'md', 'num_chunks': 1, 'vector_ids_count': 1, 'metadata': {'source_path': 'D:\\Projekty\\intergrax\\notebooks\\nexus\\_artifacts\\notebook_11_chatgpt_like\\20251226_091323_5de869a1\\rag_doc_001.md', 'session_id': 'sess_chatgpt_like_RAG', 'user_id': 'user_chatgpt_like_001', 'tenant_id': None, 'workspace_id': None}}]
DOC QA OK
Answer length: 281
RAG debug: {'enabled': True, 'used': True, 'hits_count': 1, 'where_filter': {'session_id': 'sess_chatgpt_like_RAG', 'user_id': 'user_chatgpt_like_001'}, 'top_k': 8, 'score_threshold': None, 'hits': [{'id': 'rag_doc_001-0', 'score': 0.9068, 'metadata': {'user_id': 'user_chatgpt_like_001', 'parent_id': '69bb64d5a70e4b5b', 'ext': '.md', 'attachment_id': 'rag_doc_001', 'session_id': 'sess_chatgpt_like_RAG', 'source_path': 'D:\\Projek

## Cell 5 — Websearch (current knowledge, no documents)

Goal:
- Ask a question that requires up-to-date knowledge.
- Ensure there are no documents in context (disable RAG for this cell).
- Verify that:
  1) Websearch was used (debug evidence)
  2) Websearch produced context blocks / sources
  3) The final answer references the retrieved web context (behavior evidence)

Minimal asserts:
- Non-empty assistant answer
- websearch.used == True (or equivalent)
- websearch.context_blocks_count > 0 (or sources_count > 0)


In [None]:
from intergrax.runtime.nexus.responses.response_schema import RuntimeRequest
from intergrax.runtime.nexus.tracing.adapters.llm_usage_snapshot import LLMUsageSnapshotDiag
from intergrax.runtime.nexus.tracing.trace_query import TraceQuery
from intergrax.runtime.nexus.tracing.websearch.websearch_summary import WebsearchSummaryDiagV1
from intergrax.websearch.service.websearch_config import WebSearchConfig, WebSearchStrategyType

WEB_SESSION = "sess_chatgpt_like_WEB"

# Build a fully configured websearch config (no touching private runtime._config)
ws_cfg = WebSearchConfig(strategy=WebSearchStrategyType.MAP_REDUCE)
ws_cfg.llm.map_adapter = llm_adapter
ws_cfg.llm.reduce_adapter = llm_adapter
# optional:
# ws_cfg.llm.rerank_adapter = llm_adapter

runtime_web = build_runtime(
    override_config={
        "enable_rag": False,
        "enable_websearch": True,
        "websearch_config": ws_cfg,
    }
)

web_q = (
    "What are the most recent major changes to the OpenAI API regarding the Responses API "
    "and tool calling? Provide a concise technical summary with the date of the change."
)

request_web = RuntimeRequest(
    user_id=USER_ID,
    session_id=WEB_SESSION,
    message=web_q,
)

answer_web = await runtime_web.run(request_web)

# 1) Non-empty answer (runtime answered)
assert isinstance(answer_web.answer, str) and answer_web.answer.strip(), "Empty assistant answer in Websearch session."

# 1b) LLM usage must be present and non-empty
usage = assert_llm_usage_basic(answer_web, require_adapters=["core_adapter"])

adapters = usage.get("adapters") or {}
assert "web_map_adapter" in adapters, f"Missing web_map_adapter. present={list(adapters.keys())}"
assert "web_reduce_adapter" in adapters, f"Missing web_reduce_adapter. present={list(adapters.keys())}"
assert int((adapters["web_map_adapter"] or {}).get("calls", 0) or 0) >= 1, f"web_map_adapter.calls invalid: {adapters['web_map_adapter']}"
assert int((adapters["web_reduce_adapter"] or {}).get("calls", 0) or 0) >= 1, f"web_reduce_adapter.calls invalid: {adapters['web_reduce_adapter']}"

# 2) Routing says websearch was used
route = answer_web.route
assert route is not None, "Missing route in RuntimeAnswer."
assert route.used_websearch is True, f"Expected used_websearch=True. route={route}"

q = TraceQuery.from_iter(answer_web.trace_events)

# --- Websearch summary (typed) ---
ws = q.one_payload(WebsearchSummaryDiagV1)

assert ws.enabled is True, f"Expected websearch enabled. diag={ws.to_dict()}"
assert ws.configured is True, f"Expected websearch configured. diag={ws.to_dict()}"
assert ws.used_websearch is True, f"Expected used_websearch True. diag={ws.to_dict()}"

# 3) Websearch produced context blocks + preview
ctx_blocks = int(ws.context_blocks_count)
assert ctx_blocks > 0, f"Expected websearch context blocks > 0. diag={ws.to_dict()}"

preview = (ws.context_preview or "").strip()
assert preview, f"Expected non-empty websearch context preview. diag={ws.to_dict()}"

# Raw results preview is no longer available as a typed field.
# Replace it with a stable assertion on results_count.
assert int(ws.results_count) > 0, f"Expected websearch results_count > 0. diag={ws.to_dict()}"

# No evidence / errors should be false/empty in the happy path.
assert not ws.no_evidence, f"Expected no_evidence=False. diag={ws.to_dict()}"
assert (ws.error_type or "") == "", f"Expected no error_type. diag={ws.to_dict()}"
assert (ws.error_message or "") == "", f"Expected no error_message. diag={ws.to_dict()}"

# --- LLM usage (typed) ---
usage = q.first_payload(LLMUsageSnapshotDiag)
assert usage is not None, "Expected LLMUsageSnapshotDiag in trace events."

assert usage.calls >= 1, f"Expected usage.calls >= 1. diag={usage.to_dict()}"
assert usage.total_tokens == usage.input_tokens + usage.output_tokens, (
    f"Expected total_tokens == input_tokens + output_tokens. diag={usage.to_dict()}"
)

assert answer_web.answer is not None and answer_web.answer.strip() != "", "Expected a non-empty final answer"

print("WEBSEARCH OK")
print("Answer length:", len(answer_web.answer))
print("used_websearch (route):", route.used_websearch)
print("Websearch results_count:", ws.results_count)
print("Websearch blocks:", ctx_blocks)
print("Websearch preview chars:", ws.context_preview_chars)
print("Websearch no_evidence:", ws.no_evidence)
print("LLM usage calls:", usage.calls)
print("LLM usage total_tokens:", usage.total_tokens)

WEBSEARCH OK
Answer length: 393
used_websearch: True
Websearch blocks: 1
Websearch preview chars: 587
Websearch no_evidence: False
Websearch docs preview: [{'title': 'Strategie projektowania promptów \xa0|\xa0 Gemini API \xa0|\xa0 Google AI for Developers', 'url': 'https://ai.google.dev/gemini-api/docs/prompting-strategies?hl=pl'}, {'title': 'Java Jobs for December 2025 | Freelancer', 'url': 'https://www.freelancer.pl/jobs/java'}, {'title': 'Relation 2025 - Data&AI Warsaw Tech Summit | Data&AI Warsaw Tech Summit', 'url': 'https://dataiwarsaw.tech/relation-2025/'}, {'title': 'Promocja urodzinowa Ebookpoint 2025 – Informatyka | Świat Czytników', 'url': 'https://swiatczytnikow.pl/ebookpoint-informatyka/'}, {'title': 'Reddit - The heart of the internet', 'url': 'https://www.reddit.com/r/apple/comments/13ajz6z/boring_report_a_news_app_that_uses_ai_language/'}]
Websearch raw preview top urls: ['https://ai.google.dev/gemini-api/docs/prompting-strategies?hl=pl', 'https://www.freelancer.pl/jobs

In [9]:
dbg

{'session_id': 'sess_chatgpt_like_WEB',
 'user_id': 'user_chatgpt_like_001',
 'memory_layer': {'implemented': True,
  'has_user_profile_instructions': True,
  'has_org_profile_instructions': False,
  'enable_user_profile_memory': True,
  'enable_org_profile_memory': False},
 'steps': [{'timestamp': '2025-12-26T09:14:12.772073+00:00',
   'component': 'engine',
   'step': 'memory_layer',
   'message': 'Profile-based instructions loaded for session.',
   'data': {'has_user_profile_instructions': True,
    'has_org_profile_instructions': False,
    'enable_user_profile_memory': True,
    'enable_org_profile_memory': False}},
  {'timestamp': '2025-12-26T09:14:12.772073+00:00',
   'component': 'engine',
   'step': 'history',
   'message': 'Conversation history built for LLM.',
   'data': {'history_length': 1,
    'base_history_length': 1,
    'history_includes_current_user': True}},
  {'timestamp': '2025-12-26T09:14:12.772073+00:00',
   'component': 'reasoning',
   'step': 'apply_reasoning_t