# Context Engineering Dashboard



```bash
# install from Github 

pip install git+https://github.com/cp71-dlai/context-engineering-dashboard.git@v0.2.0
```

In [None]:
# context engineering dashboard 

from context_engineering_dashboard import (
    ComponentType,
    ContextComponent,
    ContextTrace,
    ContextBuilder,
    ContextResource, 
    ResourceType,
    ContextDiff
)

# 3rd party integrations

from context_engineering_dashboard import (
    trace_openai, 
    trace_litellm
)

In [None]:
# reads from .env

import os
from dotenv import load_dotenv

load_dotenv()  

## 1 | Build a trace by hand

A **ContextTrace** is the core data structure. It holds a list of
**ContextComponents** (system prompt, user message, RAG docs, etc.)
and the model's context-window limit.

In [None]:
from context_engineering_dashboard import (
    ComponentType,
    ContextComponent,
    ContextTrace,
    ContextBuilder,
)

components = [
    ContextComponent("sys",  ComponentType.SYSTEM_PROMPT, "You are a helpful coding assistant.", token_count=500),
    ContextComponent("rag1", ComponentType.RAG,  "ChromaDB stores embeddings for semantic search.", token_count=4200, metadata={"score": 0.93}),
    ContextComponent("rag2", ComponentType.RAG,  "Collections group related documents together.", token_count=2800, metadata={"score": 0.85}),
    ContextComponent("hist", ComponentType.CHAT_HISTORY,  "User previously asked about installation.", token_count=1100),
    ContextComponent("user", ComponentType.USER_MESSAGE,  "How do I query a Chroma collection?", token_count=350),
]

trace = ContextTrace(
    context_limit=128_000,
    components=components,
    total_tokens=sum(c.token_count for c in components),
)

print(f"Tokens: {trace.total_tokens:,} / {trace.context_limit:,}  ({trace.utilization:.1f}% used)")

## 2 | Visualize the context window

`ContextBuilder` renders an interactive HTML widget right inside the notebook.
Each colored block represents one component, sized proportionally to its token count.

In [None]:
# Visualize the trace -- hover blocks for details, click to view content
ContextBuilder(trace=trace)

## 3 | Trace OpenAI call

Wrap any `openai` call in `trace_openai()`. The tracer captures messages,
token usage, latency, and the response -- then builds the trace for you.

In [None]:
from openai import OpenAI

client = OpenAI()  # uses OPENAI_API_KEY from environment

with trace_openai() as tracer:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a concise technical writer."},
            {"role": "user",   "content": "Explain what context engineering is and why it matters for LLM applications. Keep it to 3 sentences."},
        ],
        temperature=0.7,
    )

print("Response:", response.choices[0].message.content)
print()

openai_trace = tracer.result
print(f"Prompt tokens: {openai_trace.trace.usage['prompt_tokens']}")
print(f"Completion tokens: {openai_trace.trace.usage['completion_tokens']}")
print(f"Latency: {openai_trace.trace.latency_ms:.0f} ms")

In [None]:
# Visualize the captured trace -- click components to view, click text to edit
ContextBuilder(trace=openai_trace)

---
## 4 | Trace a LiteLLM Call

LiteLLM provides a unified API for 100+ LLM providers. The `trace_litellm()` 
tracer captures calls regardless of which backend you use.

In [None]:
import litellm

# Using OpenAI via LiteLLM (bare model name defaults to OpenAI)
with trace_litellm() as tracer:
    response = litellm.completion(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an expert on context engineering."},
            {"role": "user", "content": "What are the key components of an LLM context window? Answer in 2-3 sentences."},
        ],
        temperature=0.7,
    )

print("Response:", response.choices[0].message.content)

In [None]:
# Inspect the captured trace
litellm_trace = tracer.result

print(f"Provider:    {litellm_trace.trace.provider}")
print(f"Model:       {litellm_trace.trace.model}")
print(f"Prompt:      {litellm_trace.trace.usage.get('prompt_tokens', '?')} tokens")
print(f"Completion:  {litellm_trace.trace.usage.get('completion_tokens', '?')} tokens")
print(f"Latency:     {litellm_trace.trace.latency_ms:.0f} ms")

In [None]:
# Visualize the context window
ContextBuilder(trace=litellm_trace)

LiteLLM uses `provider/model-name` format to route to different backends:

| Model String | Provider |
|---|---|
| `gpt-4o` | OpenAI (default) |
| `anthropic/claude-3-opus` | Anthropic |
| `azure/gpt-4` | Azure OpenAI |
| `bedrock/anthropic.claude-v2` | AWS Bedrock |
| `gemini/gemini-pro` | Google |

The tracer automatically extracts the provider name for visualization.

## 5 | Build a resource by hand

**ContextResource** represents a pool of items (RAG documents, examples, tools, etc.)
that can be selected for inclusion in the context window. Use it to manage
what content is available vs. what actually goes into the LLM call.

In [None]:
# Create a trace

components = [
    ContextComponent("sys",  ComponentType.SYSTEM_PROMPT, "You are a helpful coding assistant.", token_count=500),
    ContextComponent("hist", ComponentType.CHAT_HISTORY,  "User previously asked about installation.", token_count=1100),
    ContextComponent("user", ComponentType.USER_MESSAGE,  "How do I query a Chroma collection?", token_count=350),
]

trace = ContextTrace(
    context_limit=128_000,
    components=components,
    total_tokens=sum(c.token_count for c in components),
)


In [None]:
# Create a resource pool from a list of documents
rag_docs = ContextResource.from_items(
    items=[
        {"id": "doc_1", "content": "ChromaDB is an open-source embedding database for AI applications.", "score": 0.95},
        {"id": "doc_2", "content": "Collections in Chroma store documents with their embeddings.", "score": 0.88},
        {"id": "doc_3", "content": "Query with collection.query(query_texts=['...'], n_results=10).", "score": 0.82},
        {"id": "doc_4", "content": "Metadata filtering: use where={'field': 'value'} in queries.", "score": 0.75},
        {"id": "doc_5", "content": "Chroma supports persistent storage with PersistentClient.", "score": 0.70},
    ],
    resource_type=ResourceType.RAG,
    name="Documentation",
)

# Select the top 3 documents for inclusion
rag_docs.select(["doc_1", "doc_2", "doc_3"])

print(f"Resource: {rag_docs.name}")
print(f"Total items: {len(rag_docs.items)}")
print(f"Selected: {len(rag_docs.selected_ids)}")
print(f"Selected tokens: {rag_docs.total_selected_tokens}")

In [None]:
# Convert selected items to ContextComponents for the trace
rag_components = rag_docs.to_components()

for comp in rag_components:
    print(f"  {comp.id}: {comp.token_count} tokens, type={comp.type.value}")

In [None]:
# Visualize with resources panel showing available vs. selected
# The left panel shows ALL items; the right shows what's in the context
ContextBuilder(trace=trace, resources=[rag_docs])

---
## 5 | Chroma Integration

`ContextResource.from_chroma()` wraps a Chroma collection so you can query it
and manage document selection for your context window.

In [None]:
import chromadb

client = chromadb.Client()
collection = client.get_or_create_collection(
    name="context_eng_docs",
    metadata={"description": "Context engineering reference docs"},
)

# Populate with realistic documentation chunks
doc_data = [
    {
        "id": "ce_overview",
        "text": (
            "Context engineering is the discipline of designing and optimizing the information "
            "provided to a large language model within its context window. Unlike prompt engineering, "
            "which focuses on instruction phrasing, context engineering considers the entire input."
        ),
        "meta": {"section": "overview", "page": 1},
    },
    {
        "id": "ce_rag_best",
        "text": (
            "RAG best practices: (1) Retrieve more than you need, then re-rank and prune. "
            "(2) Prefer smaller, focused chunks (200-400 tokens) over large passages. "
            "(3) Include metadata (source, date, score) so the model can weigh relevance."
        ),
        "meta": {"section": "rag", "page": 7},
    },
    {
        "id": "ce_tools",
        "text": (
            "Tool integration patterns: Function calling lets the model invoke external APIs. "
            "Each tool definition consumes tokens from the context window. Best practices: "
            "Only include tools relevant to the current task. Keep descriptions concise."
        ),
        "meta": {"section": "tools", "page": 18},
    },
]

collection.add(
    ids=[d["id"] for d in doc_data],
    documents=[d["text"] for d in doc_data],
    metadatas=[d["meta"] for d in doc_data],
)

print(f"Collection '{collection.name}' has {collection.count()} documents")

In [None]:
# Create a ContextResource from a Chroma collection
rag_resource = ContextResource.from_chroma(
    collection=collection,
    resource_type=ResourceType.RAG,
    name="Documentation",
)

# Query the resource (queries the underlying Chroma collection)
user_question = "What are the best practices for RAG?"

rag_resource.query(
    query_texts=[user_question],
    n_results=3,
)

print(f"Query: '{user_question}'\n")
print(f"Retrieved {len(rag_resource.items)} documents:")
for item in rag_resource.items:
    print(f"  [{item.id}] score={item.score:.3f}, {item.token_count} tokens")

In [None]:
# Select top 2 documents and visualize with resource pool
top_ids = [item.id for item in rag_resource.items[:2]]
rag_resource.select(top_ids)

# Build a trace with the selected documents
chroma_trace = ContextTrace(
    context_limit=128_000,
    components=[
        ContextComponent("sys", ComponentType.SYSTEM_PROMPT, "You are helpful.", token_count=5),
        ContextComponent("user", ComponentType.USER_MESSAGE, user_question, token_count=10),
    ] + rag_resource.to_components(),
    total_tokens=15 + rag_resource.total_selected_tokens,
)

# Show available pool (left) vs context (right)
ContextBuilder(trace=chroma_trace, resources=[rag_resource])

## 6 | Explore effect of compaction with a Sankey diff

Imagine you refactored a prompt: trimmed chat history and dropped a RAG doc.
`ContextDiff` shows token flow between the two versions.

In [None]:
# "Before" -- verbose prompt
before = ContextTrace(
    context_limit=128_000,
    components=[
        ContextComponent("sys",  ComponentType.SYSTEM_PROMPT, "...", token_count=3000),
        ContextComponent("hist", ComponentType.CHAT_HISTORY,  "...", token_count=18000),
        ContextComponent("rag1", ComponentType.RAG,  "...", token_count=12000),
        ContextComponent("rag2", ComponentType.RAG,  "...", token_count=5000),
        ContextComponent("user", ComponentType.USER_MESSAGE,  "...", token_count=400),
    ],
    total_tokens=38400,
)

# "After" -- compacted prompt (summarized history, dropped low-score doc)
after = ContextTrace(
    context_limit=128_000,
    components=[
        ContextComponent("sys",  ComponentType.SYSTEM_PROMPT, "...", token_count=3000),
        ContextComponent("hist", ComponentType.CHAT_HISTORY,  "...", token_count=6000),
        ContextComponent("rag1", ComponentType.RAG,  "...", token_count=12000),
        ContextComponent("user", ComponentType.USER_MESSAGE,  "...", token_count=400),
    ],
    total_tokens=21400,
)

diff = ContextDiff(before=before, after=after, before_label="Verbose", after_label="Compacted")
diff.sankey()

In [None]:
diff.summary()

## 6 | Save & reload traces

Traces serialize to JSON for reproducibility and sharing.

In [None]:
openai_trace.to_json("quick_start_trace.json")

reloaded = ContextTrace.from_json("quick_start_trace.json")
print(f"Reloaded: {len(reloaded.components)} components, {reloaded.total_tokens:,} tokens")