# Testing PyCharter

This notebook demonstrates how to test the **PyCharter** package and all 5 core services.

## Setup

First, make sure the package is installed in development mode:

```bash
# From the project root directory
pip install -e .
# or with dev dependencies
pip install -e ".[dev]"
```

If you're using a Jupyter kernel in a virtual environment, make sure the kernel is using the same environment where you installed the package.


In [1]:
# Import pycharter and all core services
import pycharter
from pycharter import (
    # Service 1: Contract Parser
    parse_contract,
    parse_contract_file,
    ContractMetadata,
    # Service 2: Metadata Store
    MetadataStoreClient,
    # Service 3: Pydantic Generator
    from_dict,
    from_json,
    from_file,
    from_url,
    generate_model,
    # Service 4: JSON Schema Converter
    to_dict,
    to_json,
    to_file,
    model_to_schema,
    # Service 5: Runtime Validator
    validate,
    validate_batch,
    ValidationResult,
)
import json
import os
from pathlib import Path

print(f"✓ PyCharter v{pycharter.__version__} imported successfully")
print(f"✓ Package location: {os.path.dirname(pycharter.__file__)}")
print(f"✓ Is editable install: {'site-packages' not in pycharter.__file__}")


✓ PyCharter v0.0.2 imported successfully
✓ Package location: /home/auscheng/statfyi/pycharter/pycharter
✓ Is editable install: True


## Service 3: Pydantic Generator - Basic Usage


In [2]:
# Define a simple JSON schema
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "email": {"type": "string"}
    },
    "required": ["name", "age"]
}

# Generate a Pydantic model
Person = from_dict(schema, "Person")

# Create an instance
person = Person(name="Alice", age=30, email="alice@example.com")
print(f"✓ Created person: {person.name}, age {person.age}")


✓ Created person: Alice, age 30


## Service 3: Pydantic Generator - Standard JSON Schema Keywords


In [3]:
# Test standard JSON Schema keywords
schema = {
    "type": "object",
    "properties": {
        "code": {
            "type": "string",
            "minLength": 3,
            "maxLength": 10,
            "pattern": "^[a-z]+$"
        },
        "status": {
            "type": "string",
            "enum": ["active", "inactive", "pending"]
        },
        "score": {
            "type": "number",
            "minimum": 0,
            "maximum": 100
        }
    },
    "required": ["code", "status"]
}

Model = from_dict(schema, "TestModel")

# Valid instance
instance = Model(code="abc", status="active", score=50)
print(f"✓ Valid instance: code={instance.code}, status={instance.status}, score={instance.score}")

# Test validation (this should raise an error)
try:
    invalid = Model(code="ab", status="active", score=50)  # Too short
except Exception as e:
    print(f"✓ Validation caught error: {type(e).__name__}")


✓ Valid instance: code=abc, status=active, score=50.0
✓ Validation caught error: ValidationError


## Service 3: Pydantic Generator - Nested Objects


In [4]:
# Test nested objects
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "address": {
            "type": "object",
            "properties": {
                "street": {"type": "string"},
                "city": {"type": "string"},
                "zipcode": {"type": "string"}
            },
            "required": ["street", "city"]
        }
    },
    "required": ["name", "address"]
}

Person = from_dict(schema, "Person")

person = Person(
    name="Alice",
    address={
        "street": "123 Main St",
        "city": "New York",
        "zipcode": "10001"
    }
)

print(f"✓ Nested object: {person.name} lives at {person.address.street}, {person.address.city}")


✓ Nested object: Alice lives at 123 Main St, New York


## Service 3: Pydantic Generator - Coercion and Validation (PyCharter Extensions)


In [5]:
# Test coercion and validation
schema = {
    "type": "object",
    "properties": {
        "flight_number": {
            "type": "integer",
            "coercion": "coerce_to_integer"  # Convert string/float to int
        },
        "destination": {
            "type": "string",
            "coercion": "coerce_to_string",
            "validations": {
                "min_length": {"threshold": 3},
                "max_length": {"threshold": 3},
                "no_capital_characters": None,
                "only_allow": {"allowed_values": ["abc", "def", "ghi"]}
            }
        },
        "distance": {
            "type": "number",
            "coercion": "coerce_to_float",
            "validations": {
                "greater_than_or_equal_to": {"threshold": 0}
            }
        }
    },
    "required": ["flight_number", "destination", "distance"]
}

Flight = from_dict(schema, "Flight")

# Coercion happens automatically
flight = Flight(
    flight_number="123",    # Coerced to int: 123
    destination="abc",      # Passes all validations
    distance="100.5"        # Coerced to float: 100.5
)

print(f"✓ Coercion: flight_number={flight.flight_number} (was string), distance={flight.distance} (was string)")
print(f"✓ Validation: destination={flight.destination} passed all checks")


✓ Coercion: flight_number=123 (was string), distance=100.5 (was string)
✓ Validation: destination=abc passed all checks


## Service 1: Contract Parser

Test parsing data contract files and decomposing them into metadata components.


In [9]:
# Test 1: Parse contract from dictionary
contract_data = {
    "schema": {
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "age": {"type": "integer"}
        },
        "required": ["name"]
    },
    "ownership": {
        "owner": "data-team",
        "team": "engineering"
    },
    "governance_rules": {
        "pii": True,
        "retention_days": 365
    },
    "metadata": {
        "version": "1.0",
        "description": "User contract"
    }
}

metadata = parse_contract(contract_data)
print("✓ Contract parsed successfully")
print(f"  Schema keys: {list(metadata.schema.keys())}")
print(f"  Owner: {metadata.ownership.get('owner')}")
print(f"  Governance rules: {metadata.governance_rules}")
print(f"  Metadata version: {metadata.metadata.get('version')}")

# Test 2: Parse contract file (if available)
data_dir = Path("../data/contracts")
if data_dir.exists():
    contract_files = list(data_dir.glob("*.yaml")) + list(data_dir.glob("*.json"))
    if contract_files:
        contract_file = contract_files[0]
        print(f"\n✓ Found contract file: {contract_file.name}")
        try:
            file_metadata = parse_contract_file(str(contract_file))
            print(f"  Parsed contract file successfully")
            print(f"  Schema type: {file_metadata.schema.get('type', 'N/A')}")
        except Exception as e:
            print(f"  Note: Could not parse file ({e})")
    else:
        print("\n  Note: No contract files found in data/contracts/")
else:
    print("\n  Note: data/contracts/ directory not found")


✓ Contract parsed successfully
  Schema keys: ['type', 'properties', 'required']
  Owner: data-team
  Governance rules: {'pii': True, 'retention_days': 365}
  Metadata version: 1.0

✓ Found contract file: user_contract.yaml
  Parsed contract file successfully
  Schema type: object


In [10]:
# Create an in-memory metadata store for testing
class InMemoryMetadataStore(MetadataStoreClient):
    """Simple in-memory implementation for testing."""
    
    def __init__(self):
        super().__init__()
        self._schemas = {}
        self._ownership = {}
        self._governance = {}
        self._next_id = 1
    
    def connect(self):
        self._connection = "connected"
        print("  ✓ Connected to metadata store")
    
    def disconnect(self):
        self._connection = None
    
    def store_schema(self, schema_name: str, schema: dict, version: str = None):
        schema_id = f"schema_{self._next_id}"
        self._next_id += 1
        self._schemas[schema_id] = {
            "id": schema_id,
            "name": schema_name,
            "version": version,
            "schema": schema,
        }
        print(f"  ✓ Stored schema '{schema_name}' v{version} (ID: {schema_id})")
        return schema_id
    
    def get_schema(self, schema_id: str):
        if schema_id in self._schemas:
            return self._schemas[schema_id]["schema"]
        return None
    
    def store_ownership(self, resource_id: str, owner: str, team: str = None, additional_info: dict = None):
        self._ownership[resource_id] = {
            "owner": owner,
            "team": team,
            "additional_info": additional_info or {}
        }
        print(f"  ✓ Stored ownership for {resource_id}")
        return resource_id
    
    def get_ownership(self, resource_id: str):
        return self._ownership.get(resource_id)

# Test metadata store
store = InMemoryMetadataStore()
store.connect()

# Store a schema
schema = {
    "type": "object",
    "properties": {
        "product_id": {"type": "string"},
        "price": {"type": "number"}
    }
}
schema_id = store.store_schema("product", schema, version="1.0")

# Store ownership
store.store_ownership(schema_id, owner="product-team", team="engineering")

# Retrieve schema
retrieved_schema = store.get_schema(schema_id)
print(f"\n✓ Retrieved schema: {retrieved_schema.get('type')} with {len(retrieved_schema.get('properties', {}))} properties")

# Retrieve ownership
ownership = store.get_ownership(schema_id)
print(f"✓ Retrieved ownership: {ownership.get('owner')} from {ownership.get('team')}")

store.disconnect()


  ✓ Connected to metadata store
  ✓ Stored schema 'product' v1.0 (ID: schema_1)
  ✓ Stored ownership for schema_1

✓ Retrieved schema: object with 2 properties
✓ Retrieved ownership: product-team from engineering


## Service 4: JSON Schema Converter

Test converting Pydantic models back to JSON Schema format (reverse conversion).


In [11]:
# Create a Pydantic model first
from pydantic import BaseModel, Field

class Product(BaseModel):
    """Product model for testing reverse conversion."""
    product_id: str = Field(description="Unique product identifier")
    name: str = Field(min_length=1, max_length=100)
    price: float = Field(ge=0, description="Product price in USD")
    in_stock: bool = Field(default=True)
    tags: list[str] = Field(default_factory=list)

# Convert model to JSON Schema
schema_dict = to_dict(Product)
print("✓ Converted Pydantic model to JSON Schema")
print(f"  Schema type: {schema_dict.get('type')}")
print(f"  Properties: {list(schema_dict.get('properties', {}).keys())}")

# Check specific field constraints
price_prop = schema_dict.get('properties', {}).get('price', {})
print(f"  Price type: {price_prop.get('type')}")
print(f"  Price minimum: {price_prop.get('minimum')}")

# Convert to JSON string
schema_json = to_json(Product)
print(f"\n✓ Converted to JSON string ({len(schema_json)} characters)")

# Round-trip test: schema → model → schema
print("\n✓ Round-trip test:")
ProductModel2 = from_dict(schema_dict, "Product2")
schema_dict2 = to_dict(ProductModel2)
print(f"  Original schema has {len(schema_dict.get('properties', {}))} properties")
print(f"  Round-trip schema has {len(schema_dict2.get('properties', {}))} properties")
print(f"  Round-trip successful: {len(schema_dict.get('properties', {})) == len(schema_dict2.get('properties', {}))}")


✓ Converted Pydantic model to JSON Schema
  Schema type: object
  Properties: ['product_id', 'name', 'price', 'in_stock', 'tags']
  Price type: number
  Price minimum: 0

✓ Converted to JSON string (787 characters)

✓ Round-trip test:
  Original schema has 5 properties
  Round-trip schema has 5 properties
  Round-trip successful: True


## Service 4: JSON Schema Converter - Nested Models

Test converting Pydantic models with nested structures back to JSON Schema format.


In [18]:
# Example 1: Simple nested models
print("=" * 70)
print("Example 1: Simple Nested Models")
print("=" * 70)

# Define nested models
class Address(BaseModel):
    """Address information."""
    street: str = Field(..., min_length=1)
    city: str = Field(..., min_length=1)
    state: str = Field(..., min_length=2, max_length=2)
    zipcode: str = Field(..., pattern="^\\d{5}(-\\d{4})?$")

class Contact(BaseModel):
    """Contact information."""
    email: str = Field(..., description="Email address")
    phone: str = Field(default="", description="Phone number")

class Person(BaseModel):
    """Person with nested address and contact."""
    name: str = Field(..., min_length=1)
    age: int = Field(..., ge=0, le=150)
    address: Address  # Nested model
    contact: Contact  # Nested model

# Convert to schema
schema = to_dict(Person)

print(f"\n✓ Converted Person model with nested Address and Contact")
print(f"  Top-level properties: {list(schema.get('properties', {}).keys())}")

# Check nested address schema
address_prop = schema.get('properties', {}).get('address', {})
print(f"\n  Address (nested) schema:")
print(f"    Type: {address_prop.get('type')}")
print(f"    Properties: {list(address_prop.get('properties', {}).keys())}")
print(f"    Required: {address_prop.get('required', [])}")

# Check nested contact schema
contact_prop = schema.get('properties', {}).get('contact', {})
print(f"\n  Contact (nested) schema:")
print(f"    Type: {contact_prop.get('type')}")
print(f"    Properties: {list(contact_prop.get('properties', {}).keys())}")

# Verify nested constraints are preserved
address_street = address_prop.get('properties', {}).get('street', {})
address_zipcode = address_prop.get('properties', {}).get('zipcode', {})
print(f"\n  Nested constraints preserved:")
print(f"    Address.street minLength: {address_street.get('minLength')}")
print(f"    Address.zipcode pattern: {address_zipcode.get('pattern')}")


Example 1: Simple Nested Models

✓ Converted Person model with nested Address and Contact
  Top-level properties: ['name', 'age', 'address', 'contact']

  Address (nested) schema:
    Type: object
    Properties: ['street', 'city', 'state', 'zipcode']
    Required: ['street', 'city', 'state', 'zipcode']

  Contact (nested) schema:
    Type: object
    Properties: ['email', 'phone']

  Nested constraints preserved:
    Address.street minLength: 1
    Address.zipcode pattern: ^\d{5}(-\d{4})?$


In [24]:
address_street

{'minLength': 1, 'title': 'Street', 'type': 'string'}

In [None]:
# Example 2: Arrays of nested models
print("\n" + "=" * 70)
print("Example 2: Arrays of Nested Models")
print("=" * 70)

# Define nested model
class OrderItem(BaseModel):
    """Order item with product details."""
    product_id: str = Field(..., description="Product identifier")
    quantity: int = Field(..., ge=1, description="Quantity ordered")
    price: float = Field(..., ge=0, description="Unit price")

class Order(BaseModel):
    """Order with array of items."""
    order_id: str = Field(..., description="Order identifier")
    customer_id: str = Field(..., description="Customer identifier")
    items: list[OrderItem] = Field(..., min_length=1, description="Order items")
    total: float = Field(..., ge=0, description="Total order amount")

# Convert to schema
schema = to_dict(Order)

print(f"\n✓ Converted Order model with array of OrderItem")
print(f"  Properties: {list(schema.get('properties', {}).keys())}")

# Check array of nested models
items_prop = schema.get('properties', {}).get('items', {})
print(f"\n  Items (array) schema:")
print(f"    Type: {items_prop.get('type')}")
print(f"    Min items: {items_prop.get('minItems')}")

# Check nested item schema
item_schema = items_prop.get('items', {})
print(f"\n  OrderItem (nested in array) schema:")
print(f"    Type: {item_schema.get('type')}")
print(f"    Properties: {list(item_schema.get('properties', {}).keys())}")
print(f"    Required: {item_schema.get('required', [])}")

# Verify nested constraints
item_quantity = item_schema.get('properties', {}).get('quantity', {})
print(f"\n  Nested array item constraints preserved:")
print(f"    OrderItem.quantity minimum: {item_quantity.get('minimum')}")


In [None]:
# Example 3: Deeply nested structures
print("\n" + "=" * 70)
print("Example 3: Deeply Nested Structures")
print("=" * 70)

# Deeply nested models
class Coordinates(BaseModel):
    """Geographic coordinates."""
    latitude: float = Field(..., ge=-90, le=90)
    longitude: float = Field(..., ge=-180, le=180)

class Location(BaseModel):
    """Location with coordinates."""
    name: str = Field(..., description="Location name")
    coordinates: Coordinates  # Nested model

class Warehouse(BaseModel):
    """Warehouse information."""
    warehouse_id: str = Field(..., description="Warehouse identifier")
    location: Location  # Nested model containing another nested model
    capacity: int = Field(..., ge=0)

class Product(BaseModel):
    """Product with warehouse information."""
    product_id: str = Field(..., description="Product identifier")
    name: str = Field(..., min_length=1)
    warehouses: list[Warehouse] = Field(default_factory=list)  # Array of nested models

# Convert to schema
schema = to_dict(Product)

print(f"\n✓ Converted Product model with deeply nested structures")
print(f"  Top-level properties: {list(schema.get('properties', {}).keys())}")

# Navigate through nested structure
warehouses_prop = schema.get('properties', {}).get('warehouses', {})
warehouse_item = warehouses_prop.get('items', {})
warehouse_location = warehouse_item.get('properties', {}).get('location', {})
location_coords = warehouse_location.get('properties', {}).get('coordinates', {})

print(f"\n  Deep nesting structure:")
print(f"    Product.warehouses (array)")
print(f"      → Warehouse.location (nested)")
print(f"        → Location.coordinates (nested)")
print(f"          → Coordinates.latitude/longitude")

print(f"\n  Deeply nested constraints preserved:")
lat_prop = location_coords.get('properties', {}).get('latitude', {})
lon_prop = location_coords.get('properties', {}).get('longitude', {})
print(f"    Coordinates.latitude range: [{lat_prop.get('minimum')}, {lat_prop.get('maximum')}]")
print(f"    Coordinates.longitude range: [{lon_prop.get('minimum')}, {lon_prop.get('maximum')}]")


## Service 4: JSON Schema Converter - Nested Models from Separate Modules

**Important**: Nested models do NOT need to be in the same file! The converter works with any BaseModel class that's accessible in the Python runtime, regardless of where it's defined.


In [None]:
# Example: Nested models from separate modules
print("=" * 70)
print("Example: Nested Models from Separate Modules")
print("=" * 70)

import tempfile
import os
import importlib.util

# Create a temporary module file with a nested model
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
    f.write('''from pydantic import BaseModel, Field

class Address(BaseModel):
    """Address model from separate module."""
    street: str = Field(..., min_length=1, description="Street address")
    city: str = Field(..., min_length=1, description="City name")
    state: str = Field(..., min_length=2, max_length=2, description="State code")
    zipcode: str = Field(..., pattern="^\\\\d{5}(-\\\\d{4})?$", description="ZIP code")
''')
    temp_module_path = f.name

try:
    # Import the module dynamically
    spec = importlib.util.spec_from_file_location("temp_address_module", temp_module_path)
    address_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(address_module)
    Address = address_module.Address
    
    # Create a model in this notebook that uses the imported model
    class Person(BaseModel):
        """Person model using Address from separate module."""
        name: str = Field(..., min_length=1)
        age: int = Field(..., ge=0, le=150)
        address: Address  # Nested model from separate module
    
    # Convert to schema
    schema = to_dict(Person)
    
    print(f"\n✓ Converted Person model with Address from separate module")
    print(f"  Top-level properties: {list(schema.get('properties', {}).keys())}")
    
    # Check nested address schema
    address_prop = schema.get('properties', {}).get('address', {})
    print(f"\n  Address (from separate module) schema:")
    print(f"    Type: {address_prop.get('type')}")
    print(f"    Properties: {list(address_prop.get('properties', {}).keys())}")
    print(f"    Required: {address_prop.get('required', [])}")
    
    # Verify constraints from separate module are preserved
    address_street = address_prop.get('properties', {}).get('street', {})
    address_zipcode = address_prop.get('properties', {}).get('zipcode', {})
    print(f"\n  Constraints from separate module preserved:")
    print(f"    Address.street minLength: {address_street.get('minLength')}")
    print(f"    Address.zipcode pattern: {address_zipcode.get('pattern')}")
    
    print(f"\n" + "=" * 70)
    print("✓ Key Point: Nested models can be defined in separate files/modules!")
    print("  The converter works with any BaseModel class accessible in Python runtime.")
    print("  It doesn't matter where the model is defined:")
    print("    - Same file")
    print("    - Different file in same package")
    print("    - Different package")
    print("    - Dynamically imported modules")
    print("=" * 70)
    
except Exception as e:
    print(f"\n✗ Error testing separate modules: {type(e).__name__}: {e}")
    import traceback
    traceback.print_exc()
finally:
    # Clean up
    if os.path.exists(temp_module_path):
        os.unlink(temp_module_path)


In [None]:
# Example 4: Round-trip with nested models
print("\n" + "=" * 70)
print("Example 4: Round-Trip with Nested Models")
print("=" * 70)

# Create a model with nested structure
class Category(BaseModel):
    name: str = Field(..., min_length=1)
    description: str = Field(default="")

class Product(BaseModel):
    product_id: str
    name: str = Field(..., min_length=1)
    category: Category
    tags: list[str] = Field(default_factory=list)

# Step 1: Convert model to schema
print("\n1. Pydantic model → JSON Schema")
original_schema = to_dict(Product)
print(f"   ✓ Converted Product model to schema")
print(f"     Properties: {list(original_schema.get('properties', {}).keys())}")

# Check nested category
category_prop = original_schema.get('properties', {}).get('category', {})
print(f"     Category (nested) properties: {list(category_prop.get('properties', {}).keys())}")

# Step 2: Convert schema back to model
print("\n2. JSON Schema → Pydantic model")
ProductModel2 = from_dict(original_schema, "Product2")
print(f"   ✓ Generated ProductModel2 from schema")

# Step 3: Create instance and verify
print("\n3. Create instance with nested data")
product = ProductModel2(
    product_id="prod-123",
    name="Widget",
    category={"name": "Electronics", "description": "Electronic items"},
    tags=["popular", "new"]
)
print(f"   ✓ Created product: {product.name}")
print(f"   ✓ Category: {product.category.name} - {product.category.description}")
print(f"   ✓ Tags: {product.tags}")

# Step 4: Convert back to schema
print("\n4. Pydantic model → JSON Schema (round-trip)")
round_trip_schema = to_dict(ProductModel2)
print(f"   ✓ Round-trip conversion successful")
print(f"     Original has {len(original_schema.get('properties', {}))} properties")
print(f"     Round-trip has {len(round_trip_schema.get('properties', {}))} properties")

# Verify nested structure preserved
rt_category = round_trip_schema.get('properties', {}).get('category', {})
print(f"     Nested category preserved: {rt_category.get('type') == 'object'}")


In [17]:
schema_dict

{'type': 'object',
 'title': 'Product',
 'description': 'Product model for testing reverse conversion.',
 'properties': {'product_id': {'description': 'Unique product identifier',
   'title': 'Product Id',
   'type': 'string'},
  'name': {'maxLength': 100,
   'minLength': 1,
   'title': 'Name',
   'type': 'string'},
  'price': {'description': 'Product price in USD',
   'minimum': 0,
   'title': 'Price',
   'type': 'number'},
  'in_stock': {'default': True, 'title': 'In Stock', 'type': 'boolean'},
  'tags': {'items': {'type': 'string'}, 'title': 'Tags', 'type': 'array'}},
 'required': ['product_id', 'name', 'price']}

## Service 5: Runtime Validator

Test validating data against generated Pydantic models in production scenarios.


In [None]:
# Generate a model for validation
validation_schema = {
    "type": "object",
    "properties": {
        "user_id": {"type": "string", "minLength": 1},
        "email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"},
        "age": {"type": "integer", "minimum": 0, "maximum": 150}
    },
    "required": ["user_id", "email", "age"]
}

UserModel = from_dict(validation_schema, "User")

# Test 1: Validate single valid record
print("Test 1: Validate single valid record")
valid_data = {
    "user_id": "user123",
    "email": "alice@example.com",
    "age": 30
}
result = validate(UserModel, valid_data)
if result.is_valid:
    print(f"  ✓ Validation passed: {result.data.user_id} ({result.data.email})")
else:
    print(f"  ✗ Validation failed: {result.errors}")

# Test 2: Validate single invalid record
print("\nTest 2: Validate single invalid record")
invalid_data = {
    "user_id": "user123",
    "email": "invalid-email",  # Invalid email format
    "age": 200  # Age too high
}
result = validate(UserModel, invalid_data)
if not result.is_valid:
    print(f"  ✓ Validation correctly failed")
    print(f"  Errors: {len(result.errors)} error(s)")
    for error in result.errors[:2]:  # Show first 2 errors
        print(f"    - {error}")

# Test 3: Validate batch
print("\nTest 3: Validate batch of records")
batch_data = [
    {"user_id": "user1", "email": "user1@example.com", "age": 25},
    {"user_id": "user2", "email": "user2@example.com", "age": 35},
    {"user_id": "user3", "email": "invalid", "age": 40},  # Invalid email
    {"user_id": "user4", "email": "user4@example.com", "age": -5},  # Invalid age
]

results = validate_batch(UserModel, batch_data)
valid_count = sum(1 for r in results if r.is_valid)
invalid_count = sum(1 for r in results if not r.is_valid)

print(f"  ✓ Batch validation complete:")
print(f"    Valid records: {valid_count}/{len(batch_data)}")
print(f"    Invalid records: {invalid_count}/{len(batch_data)}")

# Show valid records
valid_records = [r.data for r in results if r.is_valid]
print(f"    Valid user IDs: {[r.user_id for r in valid_records]}")


## Complete Workflow: All 5 Services Together

Demonstrate the complete data production journey from contract specification to runtime validation.



In [None]:
print("=" * 70)
print("COMPLETE DATA PRODUCTION JOURNEY")
print("=" * 70)

# Step 1: Contract Specification (create a contract)
print("\n[Step 1] Contract Specification")
contract = {
    "schema": {
        "type": "object",
        "properties": {
            "order_id": {"type": "string"},
            "customer_id": {"type": "string"},
            "total": {"type": "number", "minimum": 0},
            "items": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "product_id": {"type": "string"},
                        "quantity": {"type": "integer", "minimum": 1},
                        "price": {"type": "number", "minimum": 0}
                    },
                    "required": ["product_id", "quantity", "price"]
                }
            }
        },
        "required": ["order_id", "customer_id", "total", "items"]
    },
    "ownership": {
        "owner": "orders-team",
        "team": "data-engineering"
    },
    "metadata": {
        "version": "1.0",
        "description": "Order data contract"
    }
}
print("  ✓ Contract defined")

# Step 2: Contract Parsing
print("\n[Step 2] Contract Parsing")
metadata = parse_contract(contract)
print(f"  ✓ Contract parsed: {len(metadata.schema.get('properties', {}))} properties")
print(f"  ✓ Owner: {metadata.ownership.get('owner')}")

# Step 3: Metadata Storage
print("\n[Step 3] Metadata Storage")
store = InMemoryMetadataStore()
store.connect()
schema_id = store.store_schema("order", metadata.schema, version="1.0")
store.store_ownership(schema_id, owner=metadata.ownership.get("owner"), 
                     team=metadata.ownership.get("team"))
print(f"  ✓ Metadata stored (ID: {schema_id})")

# Step 4: Pydantic Model Generation
print("\n[Step 4] Pydantic Model Generation")
stored_schema = store.get_schema(schema_id)
OrderModel = from_dict(stored_schema, "Order")
print(f"  ✓ Model generated: {OrderModel.__name__}")

# Step 5: Runtime Validation
print("\n[Step 5] Runtime Validation")
test_order = {
    "order_id": "ORD-123",
    "customer_id": "CUST-456",
    "total": 99.99,
    "items": [
        {"product_id": "PROD-1", "quantity": 2, "price": 49.99}
    ]
}

result = validate(OrderModel, test_order)
if result.is_valid:
    print(f"  ✓ Order validated successfully")
    print(f"    Order ID: {result.data.order_id}")
    print(f"    Customer: {result.data.customer_id}")
    print(f"    Total: ${result.data.total}")
    print(f"    Items: {len(result.data.items)}")
else:
    print(f"  ✗ Validation failed: {result.errors}")

# Bonus: JSON Schema Converter (round-trip)
print("\n[Bonus] JSON Schema Converter (Round-trip)")
converted_schema = to_dict(OrderModel)
print(f"  ✓ Model converted back to schema")
print(f"    Schema has {len(converted_schema.get('properties', {}))} properties")

store.disconnect()
print("\n" + "=" * 70)
print("✓ Complete workflow test finished successfully!")
print("=" * 70)
