# Schema Utilities

> Dataclass-to-JSON-schema conversion utilities for form generation

In [None]:
#| default_exp core.schemas

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from dataclasses import fields, MISSING
from typing import Dict, Any, get_type_hints, get_origin, get_args

## Schema Metadata Constants

Constants for field metadata keys used in dataclass-to-JSON-schema conversion. Use these in dataclass field `metadata` dicts to control JSON schema generation.

In [None]:
#| export
SCHEMA_TITLE = "title"        # Display title for the field
SCHEMA_DESC = "description"   # Help text description
SCHEMA_MIN = "minimum"        # Minimum value for numbers
SCHEMA_MAX = "maximum"        # Maximum value for numbers
SCHEMA_ENUM = "enum"          # Allowed values for dropdowns
SCHEMA_MIN_LEN = "minLength"  # Minimum string length
SCHEMA_MAX_LEN = "maxLength"  # Maximum string length
SCHEMA_PATTERN = "pattern"    # Regex pattern for strings
SCHEMA_FORMAT = "format"      # String format (email, uri, date, etc.)

## Type Conversion

In [None]:
#| export
def _python_type_to_json_type(
    python_type: type  # Python type annotation to convert
) -> Dict[str, Any]:  # JSON schema type definition
    """Convert Python type to JSON schema type."""
    origin = get_origin(python_type)
    args = get_args(python_type)
    
    # Handle List[X] -> array with items
    if origin is list:
        item_type = args[0] if args else str
        return {
            "type": "array",
            "items": _python_type_to_json_type(item_type)
        }
    
    # Handle Optional[X] -> nullable type
    if origin is type(None) or (origin and type(None) in args):
        non_none_types = [a for a in args if a is not type(None)]
        if non_none_types:
            base_schema = _python_type_to_json_type(non_none_types[0])
            base_schema["type"] = [base_schema["type"], "null"]
            return base_schema
        return {"type": "null"}
    
    # Handle basic types
    type_mapping = {
        str: {"type": "string"},
        int: {"type": "integer"},
        float: {"type": "number"},
        bool: {"type": "boolean"},
    }
    
    return type_mapping.get(python_type, {"type": "string"})

In [None]:
# Test type conversion
from typing import List, Optional

assert _python_type_to_json_type(str) == {"type": "string"}
assert _python_type_to_json_type(int) == {"type": "integer"}
assert _python_type_to_json_type(float) == {"type": "number"}
assert _python_type_to_json_type(bool) == {"type": "boolean"}
assert _python_type_to_json_type(List[str]) == {"type": "array", "items": {"type": "string"}}
print("Type conversion tests passed")

Type conversion tests passed


## Dataclass to JSON Schema Conversion

In [None]:
#| export
def dataclass_to_jsonschema(
    cls: type  # Dataclass with field metadata
) -> Dict[str, Any]:  # JSON schema dictionary
    """Convert a dataclass to a JSON schema for form generation."""
    if not hasattr(cls, "__dataclass_fields__"):
        raise TypeError(f"{cls} is not a dataclass")
    
    # Get class-level schema metadata
    schema = {
        "name": getattr(cls, "__schema_name__", cls.__name__),
        "title": getattr(cls, "__schema_title__", cls.__name__),
        "description": getattr(cls, "__schema_description__", cls.__doc__ or ""),
        "type": "object",
        "properties": {}
    }
    
    # Get type hints for the class
    try:
        type_hints = get_type_hints(cls)
    except Exception:
        type_hints = {}
    
    # Process each field
    for f in fields(cls):
        python_type = type_hints.get(f.name, str)
        prop_schema = _python_type_to_json_type(python_type)
        
        # Add metadata from field
        metadata = f.metadata or {}
        for key in [SCHEMA_TITLE, SCHEMA_DESC, SCHEMA_MIN, SCHEMA_MAX, 
                    SCHEMA_ENUM, SCHEMA_MIN_LEN, SCHEMA_MAX_LEN, 
                    SCHEMA_PATTERN, SCHEMA_FORMAT]:
            if key in metadata:
                prop_schema[key] = metadata[key]
        
        # Add default value
        if f.default is not MISSING:
            prop_schema["default"] = f.default
        elif f.default_factory is not MISSING:
            prop_schema["default"] = f.default_factory()
        
        schema["properties"][f.name] = prop_schema
    
    return schema

In [None]:
# Test dataclass_to_jsonschema
from dataclasses import dataclass, field
from typing import ClassVar

@dataclass
class TestConfig:
    __schema_name__: ClassVar[str] = "test_config"
    __schema_title__: ClassVar[str] = "Test Configuration"
    __schema_description__: ClassVar[str] = "A test configuration"
    
    name: str = field(
        default="default",
        metadata={SCHEMA_TITLE: "Name", SCHEMA_DESC: "The name"}
    )
    count: int = field(
        default=10,
        metadata={SCHEMA_TITLE: "Count", SCHEMA_MIN: 1, SCHEMA_MAX: 100}
    )
    tags: list = field(
        default_factory=list,
        metadata={SCHEMA_TITLE: "Tags"}
    )

schema = dataclass_to_jsonschema(TestConfig)
assert schema["name"] == "test_config"
assert schema["title"] == "Test Configuration"
assert schema["properties"]["name"]["title"] == "Name"
assert schema["properties"]["count"]["minimum"] == 1
assert schema["properties"]["count"]["maximum"] == 100
assert schema["properties"]["tags"]["default"] == []
print("dataclass_to_jsonschema tests passed")

dataclass_to_jsonschema tests passed


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()