In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
"""
Simple API client for PDF form processing.
Copy-paste these cells into a notebook and run one at a time.
"""

import requests
import json
from pathlib import Path

BASE_URL = "http://localhost:5000"

# =============================================================================
# STEP 1: Add PDFs (run this cell multiple times with different files)
# =============================================================================

def add_pdf(file_path: str, opp_id: str):
    """Add a local PDF file to the system."""
    response = requests.post(
        f"{BASE_URL}/api/pdf",
        json={"file_path": file_path, "opportunity_id": opp_id},
        timeout=120
    )
    print(f"Status: {response.status_code}")
    result = response.json()
    print(json.dumps(result, indent=2))
    return result

# Example usage - run these one at a time:
# add_pdf("C:/pp2/od_map/examples/opp1.pdf", "opp1")
# add_pdf("C:/pp2/od_map/examples/opp2.pdf", "opp2")
# add_pdf("C:/pp2/od_map/examples/opp3.pdf", "opp3")


# =============================================================================
# STEP 2: Create canonical questions (maps all existing forms)
# =============================================================================
def create_canonical():
    """Create canonical questions from all forms. Watch server logs for progress."""
    response = requests.post(
        f"{BASE_URL}/api/canonical/create",
        json={"webhook_url": f"{BASE_URL}/api/webhook/canonical"},
        timeout=300
    )
    print(f"Status: {response.status_code}")
    result = response.json()
    print(json.dumps(result, indent=2))
    return result


# Run: create_canonical()
# Then watch server logs - it will print when webhook is called


# =============================================================================
# STEP 3: Check canonical version exists
# =============================================================================

def get_versions():
    """List canonical versions to get the version_id."""
    response = requests.get(f"{BASE_URL}/api/canonical/versions", timeout=10)
    print(f"Status: {response.status_code}")
    result = response.json()
    print(json.dumps(result, indent=2))
    # Return the latest version_id
    versions = result.get("versions", [])
    if versions:
        latest = [v for v in versions if v.get("is_latest")]
        return latest[0]["version_id"] if latest else versions[0]["version_id"]
    return None

# Run: version_id = get_versions()


# =============================================================================
# STEP 4: Add a new PDF (will auto-map to canonical)
# =============================================================================

# Use add_pdf() again - now it will auto-map to canonical
# add_pdf("C:/pp2/od_map/examples/opp4.pdf", "opp4")
# Save the checksum from the output!


# =============================================================================
# STEP 5: Get form metadata
# =============================================================================

def get_metadata(opp_id: str, checksum: str, version_id: str):
    """Get form metadata with canonical mappings."""
    response = requests.post(
        f"{BASE_URL}/api/forms/metadata",
        json={
            "forms": [{"opp_id": opp_id, "checksum": checksum}],
            "version_id": version_id,
            "min_similarity": 3
        },
        timeout=30
    )
    print(f"Status: {response.status_code}")
    result = response.json()
    print(json.dumps(result, indent=2))
    return result

# Example: get_metadata("opp4", "YOUR_CHECKSUM_HERE", version_id)


# =============================================================================
# STEP 6: Fill the form
# =============================================================================

def fill_form(opp_id: str, checksum: str, version_id: str, answers: dict):
    """Fill a PDF form with answers and save to examples/filled/."""
    response = requests.post(
        f"{BASE_URL}/api/forms/fill",
        json={
            "opp_id": opp_id,
            "checksum": checksum,
            "version_id": version_id,
            "answers": answers,
            "options": {"truncate_on_char_limit": True}
        },
        timeout=300
    )
    print(f"Status: {response.status_code}")
    result = response.json()
    print(json.dumps(result, indent=2))

    # Copy filled PDF to examples/filled if successful
    if result.get("success") and result.get("output_path"):
        import shutil
        src = Path(result["output_path"])
        dest_dir = Path("C:/pp2/od_map/examples/filled")
        dest_dir.mkdir(exist_ok=True)
        dest = dest_dir / src.name
        shutil.copy2(src, dest)
        print(f"\nCopied to: {dest}")

    return result

# Example answers - use canonical question IDs (cq_001) or question numbers (q_1)
# answers = {
#     "cq_001": "John",        # First Name
#     "cq_002": "Smith",       # Last Name
#     "cq_004": "01/15/1985",  # DOB
#     "q_5": "Some answer",    # Question #5 (non-canonical)
# }
# fill_form("opp4", "YOUR_CHECKSUM_HERE", version_id, answers)



In [39]:
add_pdf("C:/pp2/od_map/examples/fillable_form1.pdf", "opp1")

Status: 200
{
  "canonical_version_id": null,
  "checksum": "17df98fda81cd93bcddf6b669e5c528303819f27c1d29ab36e9538258adae840",
  "from_cache": false,
  "mapping_stats": null,
  "message": "New form inserted (no canonical version)",
  "opportunity_id": "opp1",
  "success": true
}


{'canonical_version_id': None,
 'checksum': '17df98fda81cd93bcddf6b669e5c528303819f27c1d29ab36e9538258adae840',
 'from_cache': False,
 'mapping_stats': None,
 'message': 'New form inserted (no canonical version)',
 'opportunity_id': 'opp1',
 'success': True}

In [40]:
add_pdf("C:/pp2/od_map/examples/fillable_form2.pdf", "opp2")
add_pdf("C:/pp2/od_map/examples/fillable_form3.pdf", "opp3")

Status: 200
{
  "canonical_version_id": null,
  "checksum": "7a2504e7b9f5ba559ccd4808d6040eb9a8a905a56db1361625a03e30ea4e89cf",
  "from_cache": false,
  "mapping_stats": null,
  "message": "New form inserted (no canonical version)",
  "opportunity_id": "opp2",
  "success": true
}
Status: 200
{
  "canonical_version_id": null,
  "checksum": "440c6609b34d03dcde1af1cb0849631632645043351a2d8ba5f61f2a8db151ae",
  "from_cache": false,
  "mapping_stats": null,
  "message": "New form inserted (no canonical version)",
  "opportunity_id": "opp3",
  "success": true
}


{'canonical_version_id': None,
 'checksum': '440c6609b34d03dcde1af1cb0849631632645043351a2d8ba5f61f2a8db151ae',
 'from_cache': False,
 'mapping_stats': None,
 'message': 'New form inserted (no canonical version)',
 'opportunity_id': 'opp3',
 'success': True}

In [41]:
create_canonical()

Status: 200
{
  "message": "Canonical creation started in background",
  "run_id": "fbffd948",
  "status": "started",
  "success": true
}


{'message': 'Canonical creation started in background',
 'run_id': 'fbffd948',
 'status': 'started',
 'success': True}

In [45]:
version_id = get_versions()
version_id

Status: 200
{
  "latest_version_id": "c5d0133dd1cd",
  "success": true,
  "versions": [
    {
      "created_at": "2026-01-25T15:55:59.400295",
      "is_latest": true,
      "model_used": "gemini-2.5-flash",
      "num_questions": 3,
      "version_id": "c5d0133dd1cd"
    }
  ]
}


'c5d0133dd1cd'

In [43]:
result = add_pdf("C:/pp2/od_map/examples/fillable_form4.pdf", "opp4")
checksum = result["checksum"]

Status: 200
{
  "canonical_version_id": "c5d0133dd1cd",
  "checksum": "8e7b1c0fa5df2ec084902b84a06a72161a5733db6b84517e5ebe4a990b27f7f1",
  "from_cache": false,
  "mapping_stats": {
    "avg_score": "3.20",
    "canonical_ids_matched": 3,
    "match_rate": "60.0%",
    "opportunity_id": "opp4",
    "questions_mapped": 5,
    "questions_no_match": 2,
    "questions_with_match": 3,
    "score_distribution": {
      "1": 2,
      "2": 0,
      "3": 0,
      "4": 1,
      "5": 2
    },
    "total_questions": 5,
    "unmapped_question_numbers": []
  },
  "message": "New form inserted and mapped",
  "opportunity_id": "opp4",
  "success": true
}


In [46]:
get_metadata("opp4", checksum, version_id)


Status: 200
{
  "forms": [
    {
      "canonical_count": 3,
      "canonical_coverage": 0.6,
      "checksum": "8e7b1c0fa5df2ec084902b84a06a72161a5733db6b84517e5ebe4a990b27f7f1",
      "checksum_valid": true,
      "form_description": "A simple form to collect basic contact information including name, date of birth, phone number, and preferred language.",
      "opportunity_id": "opp4",
      "original_count": 2,
      "questions": [
        {
          "canonical_question_id": "cq_001",
          "char_limit": null,
          "debug": {
            "canonical": {
              "canonical_question_id": "cq_001",
              "char_limit": null,
              "is_agreement": false,
              "question_text": "Date of Birth",
              "question_type": "date",
              "sub_question_of": null,
              "unique_structure": false
            },
            "original": {
              "char_limit": null,
              "is_agreement": false,
              "is_required": f

{'forms': [{'canonical_count': 3,
   'canonical_coverage': 0.6,
   'checksum': '8e7b1c0fa5df2ec084902b84a06a72161a5733db6b84517e5ebe4a990b27f7f1',
   'checksum_valid': True,
   'form_description': 'A simple form to collect basic contact information including name, date of birth, phone number, and preferred language.',
   'opportunity_id': 'opp4',
   'original_count': 2,
   'questions': [{'canonical_question_id': 'cq_001',
     'char_limit': None,
     'debug': {'canonical': {'canonical_question_id': 'cq_001',
       'char_limit': None,
       'is_agreement': False,
       'question_text': 'Date of Birth',
       'question_type': 'date',
       'sub_question_of': None,
       'unique_structure': False},
      'original': {'char_limit': None,
       'is_agreement': False,
       'is_required': False,
       'mapped_pdf_fields': ['birth_date'],
       'question_id': 'fq_0',
       'question_number': '1',
       'question_text': 'Date of Birth:',
       'question_type': 'date',
       'sub

In [47]:
# 7. Fill form:

answers={                                                                                              
          "cq_001": "1985-03-22",       # Date of Birth → birth_date
          "cq_002": "Jane",             # First Name → first
          "cq_003": "Doe",              # Last Name → surname
          "fq_3": "555-123-4567",       # Phone Number → phone
          "fq_4": "English"             # Preferred Language → language
      }
fill_form("opp4", checksum, version_id, answers)

Status: 200
{
  "checksum": "8e7b1c0fa5df2ec084902b84a06a72161a5733db6b84517e5ebe4a990b27f7f1",
  "error": null,
  "opp_id": "opp4",
  "output_path": "C:\\pp2\\od_map\\data\\filled\\opp4-8e7b1c-1769385495.pdf",
  "success": true,
  "validation_report": {
    "stages": [
      {
        "issues": [],
        "name": "input",
        "passed": true
      },
      {
        "issues": [],
        "name": "llm_transform",
        "passed": true
      },
      {
        "issues": [
          {
            "canonical_question_id": null,
            "field": null,
            "message": "Output PDF has 5 fields",
            "severity": "info"
          }
        ],
        "name": "pdf_write",
        "passed": true
      }
    ],
    "success": true,
    "summary": {
      "errors": 0,
      "fields_filled": 5,
      "fields_skipped": 0,
      "total_fields": 5,
    }
  }
}

Copied to: C:\pp2\od_map\examples\filled\opp4-8e7b1c-1769385495.pdf


{'checksum': '8e7b1c0fa5df2ec084902b84a06a72161a5733db6b84517e5ebe4a990b27f7f1',
 'error': None,
 'opp_id': 'opp4',
 'output_path': 'C:\\pp2\\od_map\\data\\filled\\opp4-8e7b1c-1769385495.pdf',
 'success': True,
 'validation_report': {'stages': [{'issues': [],
    'name': 'input',
    'passed': True},
   {'issues': [], 'name': 'llm_transform', 'passed': True},
   {'issues': [{'canonical_question_id': None,
      'field': None,
      'message': 'Output PDF has 5 fields',
      'severity': 'info'}],
    'name': 'pdf_write',
    'passed': True}],
  'success': True,
  'summary': {'errors': 0,
   'fields_filled': 5,
   'fields_skipped': 0,
   'total_fields': 5,

In [None]:
import sys
sys.path.insert(0, 'src')                                                                                  
  
# Simulate what the server endpoints do
from form_mapping import load_form_records
from canonical_questions import list_canonical_versions

print('=== /api/forms/list ===')
forms = load_form_records()
print(f'Forms: {len(forms)}')

print()
print('=== /api/canonical/versions ===')
versions = list_canonical_versions()
print(f'Versions: {len(versions)}')
for v in versions:
    print(f'  {v}')