## Setup

In [None]:
# Install the SDK if needed
# %pip install docu-devs-api-client pydantic

In [None]:
import os
import json
from pydantic import BaseModel, Field
from docudevs import DocuDevsClient

API_KEY = os.getenv("DOCUDEVS_API_KEY", "your-api-key-here")
client = DocuDevsClient(token=API_KEY)

## The Use Case

Imagine you receive a supplier spreadsheet with just SKU codes:

```
SKU-12345, 50 units
SKU-67890, 25 units
```

But you need the **full product names**, **prices**, and **compliance info** that live
in your product catalog. Knowledge Search handles this by looking up each SKU during extraction.

## Step 1: Create a Knowledge Base

First, create a Case and upload your reference documents (product catalog, policy docs, etc.).
Then promote it to a knowledge base so it gets indexed for search.

In [None]:
from docudevs.models.cases_controller_create_case_request import CasesControllerCreateCaseRequest
from docudevs.models.upload_case_document_body import UploadCaseDocumentBody
from docudevs.types import File

def get_case_id(response):
    """Extract case ID from response (handles both 200 and 201)."""
    if response.parsed is not None:
        return response.parsed.id
    # SDK doesn't parse 201 responses yet, so parse manually
    data = json.loads(response.content.decode())
    return data["id"]

# Create a case for our product catalog
case_body = CasesControllerCreateCaseRequest(name="Product Catalog Q4 2024")
case_response = await client.create_case(body=case_body)

if case_response.status_code not in [200, 201]:
    print(f"Error creating case: {case_response.status_code}")
    print(f"Response: {case_response.content.decode()}")
else:
    case_id = get_case_id(case_response)
    print(f"Created case: {case_id}")

In [None]:
# Upload reference documents to the case
# In real usage, you'd upload your actual product catalog, specs, etc.

# Example: Create a simple product catalog as text
catalog_content = """
# Product Catalog

## SKU-12345: Premium Widget Pro
- Full Name: Premium Widget Pro X2000
- Price: $149.99
- Category: Electronics
- Compliance: CE, FCC, RoHS
- Description: High-performance widget with advanced features

## SKU-67890: Basic Gadget
- Full Name: Basic Gadget Standard Edition
- Price: $29.99
- Category: Accessories
- Compliance: CE
- Description: Entry-level gadget for everyday use

## SKU-11111: Deluxe Gizmo
- Full Name: Deluxe Gizmo Ultimate
- Price: $299.99
- Category: Professional
- Compliance: CE, FCC, RoHS, ISO 9001
- Description: Professional-grade gizmo with premium build quality
"""

# Upload as a text file
file_obj = File(payload=catalog_content.encode(), file_name="catalog.md", mime_type="text/markdown")
upload_body = UploadCaseDocumentBody(document=file_obj)

upload_response = await client.upload_case_document(case_id=case_id, body=upload_body)
print(f"Uploaded catalog document")

In [None]:
import asyncio

# Promote the case to a knowledge base
kb_response = await client.promote_knowledge_base(case_id=case_id)
print(f"Case {case_id} is now a knowledge base!")

# Wait for indexing - the knowledge base needs time to index documents
print("Waiting for indexing to complete...")
await asyncio.sleep(15)
print("Ready!")

## Step 2: Process with Knowledge Search

Now when we process a document, we attach a knowledge search tool that points to our knowledge base.

In [None]:
# Define what we want to extract
class EnrichedProduct(BaseModel):
    """Product info enriched with knowledge base data."""
    sku: str = Field(description="Product SKU code")
    full_name: str = Field(description="Full product name from catalog")
    quantity: int = Field(description="Quantity ordered")
    unit_price: float = Field(description="Price per unit from catalog")
    compliance: str = Field(description="Compliance certifications from catalog")

class OrderEnrichment(BaseModel):
    """Order with enriched product information."""
    products: list[EnrichedProduct]

schema_json = json.dumps(OrderEnrichment.model_json_schema())

In [None]:
# Create a simple order document (just SKUs and quantities)
order_content = """
Purchase Order #2024-001

Items:
- SKU-12345: 50 units
- SKU-67890: 100 units
- SKU-11111: 10 units
"""

# Process with knowledge search enabled
job_id = await client.submit_and_process_document(
    document=order_content.encode(),
    document_mime_type="text/plain",
    prompt="""
    Extract products from this order. For each SKU, use the search tool
    to look up the full product name, price, and compliance certifications.
    """,
    schema=schema_json,
    tools=[
        {
            "type": "KNOWLEDGE_BASE_SEARCH",
            "config": {
                "caseId": str(case_id),
                "topK": 5  # Return top 5 matches per search
            }
        }
    ]
)

print(f"Job submitted: {job_id}")

In [None]:
# Get enriched results
result = await client.wait_until_ready(job_id, result_format="json")

# Parse with Pydantic
enriched = OrderEnrichment.model_validate(result)

print("\n=== Enriched Order ===")
for product in enriched.products:
    print(f"\n{product.sku}: {product.full_name}")
    print(f"  Quantity: {product.quantity}")
    print(f"  Unit Price: ${product.unit_price:.2f}")
    print(f"  Total: ${product.quantity * product.unit_price:.2f}")
    print(f"  Compliance: {product.compliance}")

## Step 3: Enable Tracing to See Lookups

Want to see what the AI looked up? Enable tracing to get detailed logs.

In [None]:
# Process with tracing enabled
job_id = await client.submit_and_process_document(
    document=order_content.encode(),
    document_mime_type="text/plain",
    prompt="Look up each SKU and extract full product details.",
    schema=schema_json,
    tools=[
        {
            "type": "KNOWLEDGE_BASE_SEARCH",
            "config": {"caseId": str(case_id), "topK": 3}
        }
    ],
    trace=True  # Enable tracing!
)

result = await client.wait_until_ready(job_id, result_format="json")

# Get the trace
trace = await client.get_trace(job_id)
if trace:
    print(f"Total LLM calls: {trace.get('total_llm_calls', 0)}")
    print(f"Total tool calls: {trace.get('total_tool_calls', 0)}")
    print(f"Total tokens: {trace.get('total_tokens', 0)}")
    
    # Show tool calls (knowledge searches)
    for event in trace.get('events', []):
        if event.get('type') == 'tool_end':
            print(f"\nTool: {event.get('name')}")
            print(f"  Duration: {event.get('duration_ms', 0)}ms")

## Managing Knowledge Bases

In [None]:
# List all knowledge bases
kb_list = await client.list_knowledge_bases()
print("Your knowledge bases:")
for kb in kb_list.parsed or []:
    print(f"  - Case {kb.get('caseId')}: {kb.get('name', 'Unnamed')}")

In [None]:
# Clean up: Remove knowledge base status (keeps the case)
# Uncomment to run:
# await client.delete_knowledge_base(case_id)
# print(f"Removed knowledge base status from case {case_id}")

## Best Practices

1. **Keep knowledge bases focused**: One per domain (products, policies, customers)

2. **Use structured reference docs**: Clear headings and consistent formatting help

3. **Adjust topK based on need**: Higher = more context but slower. Start with 5.

4. **Update knowledge bases**: Add new documents to cases as your data changes

5. **Enable tracing during development**: See exactly what's being looked up

## Next Steps

- **[Operations](04-operations.ipynb)**: Run error analysis and ask follow-up questions on processed documents