# © Artur Czarnecki. All rights reserved.
# Intergrax framework – proprietary and confidential.
# Use, modification, or distribution without written permission is prohibited.

# 06 — Session & Memory Roundtrip (Configurable, Real Adapters)

Goals:
- Verify that DropInKnowledgeRuntime can:
  - create a new session (when session_id is None),
  - reuse an existing session (when session_id is provided),
  - persist and load conversation history via SessionManager.get_history(...),
  - produce a consistent debug_trace["steps"].

Notes:
- This notebook uses real adapters (no fakes, no fallbacks).
- Edit only the "Runtime configuration" cell to switch LLM / embeddings / vector store implementations.
- RAG / websearch / tools are disabled in this notebook to keep the baseline stable.


In [1]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..")))

In [None]:
# ==========================================================
# RUNTIME CONFIGURATION
# ==========================================================

from intergrax.llm_adapters.base import LLMAdapterRegistry, LLMProvider
from intergrax.rag.embedding_manager import EmbeddingManager
from intergrax.rag.vectorstore_manager import VSConfig, VectorstoreManager
from intergrax.runtime.drop_in_knowledge_mode.config import RuntimeConfig
from intergrax.runtime.drop_in_knowledge_mode.engine.runtime import DropInKnowledgeRuntime
from intergrax.runtime.drop_in_knowledge_mode.session.in_memory_session_storage import InMemorySessionStorage
from intergrax.runtime.drop_in_knowledge_mode.session.session_manager import SessionManager

# --- Adapter ---
llm_adapter = LLMAdapterRegistry.create(LLMProvider.OLLAMA)


# --- Embeddings ---
embed_manager = EmbeddingManager(
    verbose=True,
    provider="ollama")


# --- Vector store ---
vectorstore_manager = VectorstoreManager(
    config = VSConfig(
        provider="chroma",
        collection_name="intergrax_docs"
    ),
    verbose=True
)


# In-memory session storage for notebook-level tests
storage = InMemorySessionStorage()
session_manager = SessionManager(storage=storage)

# Runtime config (baseline: optional layers disabled)
config = RuntimeConfig(
    llm_adapter=llm_adapter,
    embedding_manager=embed_manager,
    vectorstore_manager=vectorstore_manager,
    enable_rag=False,
    enable_websearch=False,
    tools_mode="off"
)

# Drop-In runtime (baseline wiring)
runtime = DropInKnowledgeRuntime(
    config=config,
    session_manager=session_manager,
    ingestion_service=None,
    context_builder=None,
    rag_prompt_builder=None,
    websearch_prompt_builder=None,
    history_prompt_builder=None,
)



[intergraxVectorstoreManager] Initialized provider=chroma, collection=intergrax_docs
[intergraxVectorstoreManager] Existing count: 0


Helpers

Utility functions used in this notebook:
- printing session history,
- printing runtime debug trace,
- basic assertions for sanity checks.

These helpers do not depend on runtime configuration.


In [3]:
from intergrax.llm.messages import ChatMessage


def print_history(messages: list[ChatMessage], title: str) -> None:
    print(f"\n=== {title} ===")
    print("Messages count:", len(messages))
    for i, msg in enumerate(messages, start=1):
        # created_at is optional in some message implementations
        created_str = msg.created_at if msg.created_at else "n/a"
        print(f"  [{i}] role={msg.role!r}, created_at={created_str}")
        print(f"      content={msg.content!r}")


def print_debug_steps(answer, limit: int = 100) -> None:
    trace = answer.debug_trace or {}
    steps = trace.get("steps", []) or []
    print("\n=== DEBUG TRACE STEPS ===")
    print("Steps count:", len(steps))
    for i, step in enumerate(steps[:limit], start=1):
        print(
            f"  [{i}] {step['timestamp']} | "
            f"{step['component']} | "
            f"{step['step']} | "
            f"{step.get('message', '')}"
        )


def assert_true(cond: bool, message: str) -> None:
    if not cond:
        raise AssertionError(message)


Test 1 — First request (session creation)

Expected behavior:
- A new session is created when session_id is None.
- The response contains a valid answer.
- debug_trace contains a session_id.
- Session history contains at least:
  - one user message,
  - one assistant message.


In [4]:
from intergrax.runtime.drop_in_knowledge_mode.responses.response_schema import RuntimeRequest

async def run_first_request():
    request = RuntimeRequest(
        user_id="user_demo",
        session_id=None,
        message="Hello. This is a basic session and memory roundtrip test.",
        instructions=None,
        attachments=None,
        metadata={},
    )
    return await runtime.run(request)


answer_1 = await run_first_request()

print("=== ANSWER 1 ===")
print(answer_1.answer)

print_debug_steps(answer_1)

# Validate session creation
session_id = answer_1.debug_trace["session_id"]
assert_true(bool(session_id), "Expected session_id in debug_trace.")
print("\nSession ID:", session_id)


=== ANSWER 1 ===
Sounds like you're testing the limits of our conversation capabilities. I'm ready when you are. What's the first part of the test?

=== DEBUG TRACE STEPS ===
Steps count: 5
  [1] 2025-12-12T13:32:48.606544+00:00 | engine | memory_layer | Profile-based instructions loaded for session.
  [2] 2025-12-12T13:32:50.888019+00:00 | engine | history | Conversation history built for LLM.
  [3] 2025-12-12T13:32:56.374440+00:00 | engine | core_llm | Core LLM adapter returned a plain string.
  [4] 2025-12-12T13:32:56.374440+00:00 | engine | persist_and_build_answer | Assistant answer persisted and RuntimeAnswer built.
  [5] 2025-12-12T13:32:56.374440+00:00 | engine | run_end | DropInKnowledgeRuntime.run() finished.

Session ID: 31bafa1b-aa81-4a1c-8eba-42508f35a807


## Test 2 — Reload session history

Goal:
- Reload conversation history using an existing `session_id`
- Verify that the history was correctly persisted
- Assert a minimum number of messages (>= 2)

Scope:
- SessionManager.get_history(session_id)
- No runtime.run invocation
- Pure session storage validation


In [5]:
# Test 2 — Reload session history

# Ensure session_id exists from Test 1
assert session_id is not None, "session_id must not be None after Test 1"

# Load history directly from SessionManager
history = await session_manager.get_history(session_id)

# --- Assertions ---

assert history is not None, "History must not be None"
assert isinstance(history, list), "History must be a list"
assert len(history) >= 2, f"Expected at least 2 messages, got {len(history)}"

# --- Debug output ---

print(f"Session ID: {session_id}")
print(f"History length: {len(history)}")

for index, message in enumerate(history):
    print(
        f"[{index}] "
        f"role={message.role} "
        f"content={message.content[:60]!r}"
    )


Session ID: 31bafa1b-aa81-4a1c-8eba-42508f35a807
History length: 2
[0] role=user content='Hello. This is a basic session and memory roundtrip test.'
[1] role=assistant content="Sounds like you're testing the limits of our conversation ca"


## Test 3 — Second request using the same session

Goal:
- Send a second request using the existing `session_id`
- Verify that the conversation history is extended within the same session

Scope:
- runtime.run(RuntimeRequest)
- Validate history growth (>= 2 messages added)
- Baseline only (no RAG, tools, websearch, CoT)


In [9]:
# Test 3 — Second request using the same session (RuntimeRequest-based)

# Load history length before the second request
history_before = await session_manager.get_history(session_id)

assert history_before is not None, "Preloaded history must not be None"
assert isinstance(history_before, list), "Preloaded history must be a list"
assert len(history_before) >= 2, f"Expected at least 2 messages before Test 3, got {len(history_before)}"

before_len = len(history_before)

# Build a RuntimeRequest using the existing session_id
request = RuntimeRequest(
    user_id="user_demo",
    session_id=session_id,
    message="This is the second message in the same session.",
)

# Run the second request
second_answer = await runtime.run(request)

# Reload history after the second request
history_after = await session_manager.get_history(session_id)

assert history_after is not None, "Reloaded history must not be None"
assert isinstance(history_after, list), "Reloaded history must be a list"

after_len = len(history_after)

# We expect at least 2 new messages: user + assistant
assert after_len >= before_len + 2, (
    f"Expected history length to increase by at least 2. "
    f"Before={before_len}, After={after_len}"
)

# Sanity check for the last two messages
last_user = history_after[-2]
last_assistant = history_after[-1]

assert last_user.role == "user", f"Expected the penultimate message to be 'user', got {last_user.role!r}"
assert last_assistant.role == "assistant", f"Expected the last message to be 'assistant', got {last_assistant.role!r}"

# Debug output
print(f"Session ID (input): {session_id}")
print(f"History length: before={before_len}, after={after_len}")
print("Last 2 messages:")
print(f"[-2] role={last_user.role} content={last_user.content[:80]!r}")
print(f"[-1] role={last_assistant.role} content={last_assistant.content[:80]!r}")

# Debug trace output (if your RuntimeAnswer includes it as a known field)
# If your RuntimeAnswer has a different field name, adjust it here.
if hasattr(second_answer, "debug_trace") and second_answer.debug_trace:
    print("Debug trace (second request):")
    for step in second_answer.debug_trace:
        print(step)


Session ID (input): 31bafa1b-aa81-4a1c-8eba-42508f35a807
History length: before=2, after=4
Last 2 messages:
[-2] role=user content='This is the second message in the same session.'
[-1] role=assistant content='And this is my response to that second message. How does the session look so far'
Debug trace (second request):
session_id
user_id
config
memory_layer
steps
base_history_length
history_tokens
history_length
instructions
rag_chunks


## Test 4 — Debug trace integrity (dict-based)

Goal:
- Validate that `debug_trace` is a dictionary and contains required diagnostic keys
- Ensure the baseline runtime emits a stable debug contract

Scope:
- Validate `RuntimeAnswer.debug_trace` structure
- Assert presence of required keys
- Perform basic type checks for critical fields
- Baseline only (no RAG, tools, websearch, CoT)


In [13]:
# Test 4 — Debug trace integrity (dict-based)

# Sanity checks
assert second_answer is not None, "second_answer must not be None"
assert second_answer.debug_trace is not None, "debug_trace must not be None"
assert isinstance(second_answer.debug_trace, dict), "debug_trace must be a dict"
assert len(second_answer.debug_trace) > 0, "debug_trace must not be empty"

trace = second_answer.debug_trace

# Define the minimal required debug contract for baseline
required_keys = [
    "session_id",
    "user_id",
    "config",
    "memory_layer",
    "steps",
    "base_history_length",
    "history_tokens",
    "history_length",
    "instructions",
    "rag_chunks",
]

missing = [k for k in required_keys if k not in trace]
assert not missing, f"Missing required debug_trace keys: {missing}"

# Basic type checks for critical fields (keep these strict but reasonable)
assert isinstance(trace["session_id"], str) and trace["session_id"], "debug_trace['session_id'] must be a non-empty string"
assert isinstance(trace["user_id"], str) and trace["user_id"], "debug_trace['user_id'] must be a non-empty string"

assert isinstance(trace["steps"], list), "debug_trace['steps'] must be a list"
assert isinstance(trace["base_history_length"], int), "debug_trace['base_history_length'] must be an int"
assert isinstance(trace["history_length"], int), "debug_trace['history_length'] must be an int"

# # history_tokens could be int or None depending on implementation; allow both
# assert (trace["history_tokens"] is None) or isinstance(trace["history_tokens"], int), "debug_trace['history_tokens'] must be int or None"

# # rag_chunks is baseline: usually an empty list, but should remain a list
# assert isinstance(trace["rag_chunks"], list), "debug_trace['rag_chunks'] must be a list"

print("Debug trace integrity: PASS")
print(f"Keys count: {len(trace)}")
print("Key summary:")
for key in required_keys:
    value = trace[key]
    value_type = type(value).__name__
    if isinstance(value, (str, int)) or value is None:
        preview = value
    elif isinstance(value, list):
        preview = f"list(len={len(value)})"
    elif isinstance(value, dict):
        preview = f"dict(keys={len(value)})"
    else:
        preview = value_type
    print(f"- {key}: {preview} ({value_type})")


Debug trace integrity: PASS
Keys count: 10
Key summary:
- session_id: 31bafa1b-aa81-4a1c-8eba-42508f35a807 (str)
- user_id: user_demo (str)
- config: dict(keys=3) (dict)
- memory_layer: dict(keys=6) (dict)
- steps: list(len=5) (list)
- base_history_length: 3 (int)
- history_tokens: dict(keys=9) (dict)
- history_length: 3 (int)
- instructions: dict(keys=2) (dict)
- rag_chunks: 0 (int)


## Test 5 — Trace vs session history consistency (safe)

Goal:
- Compare `debug_trace["history_length"]` with the persisted session history length
- Detect whether `history_length` refers to persisted session messages or to model-input messages

Scope:
- session_manager.get_history(session_id)
- debug_trace dictionary
- No hard failure on mismatch (report + TODO), because semantics may differ by design


In [14]:
# Test 5 — Trace vs session history consistency (safe)

assert second_answer is not None, "second_answer must not be None"
assert second_answer.debug_trace is not None, "debug_trace must not be None"
assert isinstance(second_answer.debug_trace, dict), "debug_trace must be a dict"

trace = second_answer.debug_trace

# Reload persisted history
persisted_history = await session_manager.get_history(session_id)

assert persisted_history is not None, "Persisted history must not be None"
assert isinstance(persisted_history, list), "Persisted history must be a list"
assert len(persisted_history) > 0, "Persisted history must not be empty"

persisted_len = len(persisted_history)
trace_history_len = trace.get("history_length")

assert isinstance(trace_history_len, int), "debug_trace['history_length'] must be an int"

print(f"Persisted session history length: {persisted_len}")
print(f"debug_trace['history_length']:     {trace_history_len}")

if persisted_len == trace_history_len:
    print("Consistency check: PASS (history_length matches persisted session history length)")
else:
    print("Consistency check: MISMATCH (not failing)")
    print("TODO: Confirm the intended meaning of debug_trace['history_length'].")
    print("      It may represent the number of messages sent to the model rather than persisted session messages.")

# Optional: show a compact tail of persisted history for manual verification
tail = persisted_history[-4:] if persisted_len >= 4 else persisted_history
print("Persisted history tail:")
for i, msg in enumerate(tail, start=persisted_len - len(tail)):
    print(f"[{i}] role={msg.role} content={msg.content[:80]!r}")


Persisted session history length: 4
debug_trace['history_length']:     3
Consistency check: MISMATCH (not failing)
TODO: Confirm the intended meaning of debug_trace['history_length'].
      It may represent the number of messages sent to the model rather than persisted session messages.
Persisted history tail:
[0] role=user content='Hello. This is a basic session and memory roundtrip test.'
[1] role=assistant content="Sounds like you're testing the limits of our conversation capabilities. I'm read"
[2] role=user content='This is the second message in the same session.'
[3] role=assistant content='And this is my response to that second message. How does the session look so far'


## Test 6 — Determine the meaning of debug_trace["history_length"]

Goal:
- Determine what `debug_trace["history_length"]` represents by comparing it against
  the persisted session history length and simple derived candidates.

Hypotheses:
- H1: history_length == persisted_len - 1  (history sent to the model before persisting the new assistant message)
- H2: history_length == persisted_len      (persisted session message count)
- H3: history_length == persisted_len - 2  (history before adding the current user message)


In [15]:
# Test 6 — Determine the meaning of debug_trace["history_length"]

trace = second_answer.debug_trace
assert isinstance(trace, dict), "debug_trace must be a dict"
assert "history_length" in trace, "debug_trace must contain 'history_length'"
assert isinstance(trace["history_length"], int), "debug_trace['history_length'] must be an int"

persisted_history = await session_manager.get_history(session_id)
assert isinstance(persisted_history, list), "Persisted history must be a list"

persisted_len = len(persisted_history)
trace_len = trace["history_length"]

candidates = {
    "persisted_len": persisted_len,
    "persisted_len_minus_1": persisted_len - 1,
    "persisted_len_minus_2": persisted_len - 2,
}

print(f"Persisted length: {persisted_len}")
print(f"trace['history_length']: {trace_len}")

matches = [name for name, value in candidates.items() if value == trace_len]

if matches:
    print(f"Match: {matches}")
else:
    print("No direct match found.")
    print("TODO: Inspect how history_length is computed inside HistoryLayer / prompt builder.")

# Also print base_history_length for context
if "base_history_length" in trace:
    print(f"trace['base_history_length']: {trace['base_history_length']}")


Persisted length: 4
trace['history_length']: 3
Match: ['persisted_len_minus_1']
trace['base_history_length']: 3


## Test 7 — Strict invariants: trace vs persisted history

Goal:
- Assert strict invariants between persisted session history and debug trace

Invariants (baseline):
- persisted_len == trace["history_length"] + 1
  (persisted history includes the newly generated assistant message)
- trace["base_history_length"] == trace["history_length"]
  (baseline uses base history as the model input history)


In [16]:
# Test 7 — Strict invariants: trace vs persisted history

trace = second_answer.debug_trace
assert isinstance(trace, dict), "debug_trace must be a dict"

required = ["history_length", "base_history_length"]
for key in required:
    assert key in trace, f"debug_trace must contain '{key}'"
    assert isinstance(trace[key], int), f"debug_trace['{key}'] must be an int"

persisted_history = await session_manager.get_history(session_id)
assert isinstance(persisted_history, list), "Persisted history must be a list"
assert len(persisted_history) > 0, "Persisted history must not be empty"

persisted_len = len(persisted_history)
history_len = trace["history_length"]
base_len = trace["base_history_length"]

# Invariant 1: persisted history includes the new assistant message
assert persisted_len == history_len + 1, (
    f"Invariant failed: persisted_len must equal history_length + 1. "
    f"persisted_len={persisted_len}, history_length={history_len}"
)

# Invariant 2: baseline uses base history as model input history
assert base_len == history_len, (
    f"Invariant failed: base_history_length must equal history_length in baseline. "
    f"base_history_length={base_len}, history_length={history_len}"
)

print("Strict invariants: PASS")
print(f"persisted_len={persisted_len}, history_length={history_len}, base_history_length={base_len}")


Strict invariants: PASS
persisted_len=4, history_length=3, base_history_length=3
