# Configuration Validation

> Validation helpers for plugin configuration dataclasses

In [None]:
#| default_exp utils.validation

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from dataclasses import fields, is_dataclass, asdict, MISSING
from typing import Dict, Any, Tuple, Optional, Type, TypeVar, get_type_hints, get_origin, get_args, Union

T = TypeVar('T')

## Schema Metadata Constants

Constants for field metadata keys used in dataclass configuration. These enable validation and are compatible with JSON schema generation for UI form builders.

In [None]:
#| export
SCHEMA_TITLE = "title"        # Display title for the field
SCHEMA_DESC = "description"   # Help text description
SCHEMA_MIN = "minimum"        # Minimum value for numbers
SCHEMA_MAX = "maximum"        # Maximum value for numbers
SCHEMA_ENUM = "enum"          # Allowed values for dropdowns
SCHEMA_MIN_LEN = "minLength"  # Minimum string length
SCHEMA_MAX_LEN = "maxLength"  # Maximum string length
SCHEMA_PATTERN = "pattern"    # Regex pattern for strings
SCHEMA_FORMAT = "format"      # String format (email, uri, date, etc.)

## Field Validation

Functions for validating field values against metadata constraints.

In [None]:
#| export
import re

def validate_field_value(
    value:Any, # Value to validate
    metadata:Dict[str, Any], # Field metadata containing constraints
    field_name:str="" # Field name for error messages
) -> Tuple[bool, Optional[str]]: # (is_valid, error_message)
    """Validate a value against field metadata constraints."""
    # Check enum constraint
    if SCHEMA_ENUM in metadata:
        allowed = metadata[SCHEMA_ENUM]
        if value not in allowed:
            return False, f"{field_name}: {value!r} is not one of {allowed}"
    
    # Check numeric constraints
    if isinstance(value, (int, float)) and not isinstance(value, bool):
        if SCHEMA_MIN in metadata and value < metadata[SCHEMA_MIN]:
            return False, f"{field_name}: {value} is less than minimum {metadata[SCHEMA_MIN]}"
        if SCHEMA_MAX in metadata and value > metadata[SCHEMA_MAX]:
            return False, f"{field_name}: {value} is greater than maximum {metadata[SCHEMA_MAX]}"
    
    # Check string constraints
    if isinstance(value, str):
        if SCHEMA_MIN_LEN in metadata and len(value) < metadata[SCHEMA_MIN_LEN]:
            return False, f"{field_name}: string length {len(value)} is less than minimum {metadata[SCHEMA_MIN_LEN]}"
        if SCHEMA_MAX_LEN in metadata and len(value) > metadata[SCHEMA_MAX_LEN]:
            return False, f"{field_name}: string length {len(value)} is greater than maximum {metadata[SCHEMA_MAX_LEN]}"
        if SCHEMA_PATTERN in metadata:
            pattern = metadata[SCHEMA_PATTERN]
            if not re.match(pattern, value):
                return False, f"{field_name}: {value!r} does not match pattern {pattern!r}"
    
    return True, None

In [None]:
#| export
def validate_config(
    config:Any # Configuration dataclass instance to validate
) -> Tuple[bool, Optional[str]]: # (is_valid, error_message)
    """Validate all fields in a configuration dataclass against their metadata constraints."""
    if not is_dataclass(config) or isinstance(config, type):
        raise TypeError(f"Expected dataclass instance, got {type(config).__name__}")
    
    for f in fields(config):
        value = getattr(config, f.name)
        metadata = f.metadata or {}
        
        is_valid, error = validate_field_value(value, metadata, f.name)
        if not is_valid:
            return False, error
    
    return True, None

## Dataclass Configuration Utilities

These functions provide utilities for working with dataclass-based plugin configurations.

In [None]:
#| export
def config_to_dict(
    config:Any # Configuration dataclass instance
) -> Dict[str, Any]: # Dictionary representation of the configuration
    """Convert a configuration dataclass instance to a dictionary."""
    if is_dataclass(config) and not isinstance(config, type):
        return asdict(config)
    elif isinstance(config, dict):
        return config
    else:
        raise TypeError(f"Expected dataclass instance or dict, got {type(config).__name__}")

Converts a dataclass configuration instance to a dictionary for serialization or passing to other systems. Also accepts dict input for passthrough convenience.

In [None]:
#| export
def dict_to_config(
    config_class:Type[T], # Configuration dataclass type
    data:Optional[Dict[str, Any]]=None, # Dictionary with configuration values
    validate:bool=False # Whether to validate against metadata constraints
) -> T: # Instance of the configuration dataclass
    """Create a configuration dataclass instance from a dictionary."""
    if not is_dataclass(config_class):
        raise TypeError(f"Expected dataclass type, got {type(config_class).__name__}")
    
    if data is None:
        data = {}
    
    # Get valid field names for this dataclass
    valid_fields = {f.name for f in fields(config_class)}
    
    # Filter data to only include valid fields
    filtered_data = {k: v for k, v in data.items() if k in valid_fields}
    
    # Create the config instance
    config = config_class(**filtered_data)
    
    # Optionally validate
    if validate:
        is_valid, error = validate_config(config)
        if not is_valid:
            raise ValueError(error)
    
    return config

In [None]:
#| export
def extract_defaults(
    config_class:Type # Configuration dataclass type
) -> Dict[str, Any]: # Default values from the dataclass
    """Extract default values from a configuration dataclass type."""
    if not is_dataclass(config_class):
        raise TypeError(f"Expected dataclass type, got {type(config_class).__name__}")
    
    defaults = {}
    for f in fields(config_class):
        if f.default is not MISSING:
            defaults[f.name] = f.default
        elif f.default_factory is not MISSING:
            defaults[f.name] = f.default_factory()
    
    return defaults

## JSON Schema Conversion

Functions for converting dataclass configurations to JSON Schema format, enabling automatic form generation in UIs.

In [None]:
#| export
def _python_type_to_json_type(
    python_type:type # Python type annotation to convert
) -> Dict[str, Any]: # JSON schema type definition
    """Convert Python type to JSON schema type."""
    origin = get_origin(python_type)
    args = get_args(python_type)
    
    # Handle List[X] -> array with items
    if origin is list:
        item_type = args[0] if args else str
        return {
            "type": "array",
            "items": _python_type_to_json_type(item_type)
        }
    
    # Handle Optional[X] / Union[X, None] -> nullable type
    if origin is Union:
        non_none_types = [a for a in args if a is not type(None)]
        if len(non_none_types) == 1:
            # This is Optional[X]
            base_schema = _python_type_to_json_type(non_none_types[0])
            base_schema["type"] = [base_schema["type"], "null"]
            return base_schema
        # Multiple non-None types - just use first one
        if non_none_types:
            return _python_type_to_json_type(non_none_types[0])
        return {"type": "null"}
    
    # Handle basic types
    type_mapping = {
        str: {"type": "string"},
        int: {"type": "integer"},
        float: {"type": "number"},
        bool: {"type": "boolean"},
    }
    
    return type_mapping.get(python_type, {"type": "string"})

In [None]:
#| export
def dataclass_to_jsonschema(
    cls:type # Dataclass with field metadata
) -> Dict[str, Any]: # JSON schema dictionary
    """Convert a dataclass to a JSON schema for form generation."""
    if not hasattr(cls, "__dataclass_fields__"):
        raise TypeError(f"{cls} is not a dataclass")
    
    # Get class-level schema metadata
    schema = {
        "name": getattr(cls, "__schema_name__", cls.__name__),
        "title": getattr(cls, "__schema_title__", cls.__name__),
        "description": getattr(cls, "__schema_description__", cls.__doc__ or ""),
        "type": "object",
        "properties": {}
    }
    
    # Get type hints for the class
    try:
        type_hints = get_type_hints(cls)
    except Exception:
        type_hints = {}
    
    # Process each field
    for f in fields(cls):
        python_type = type_hints.get(f.name, str)
        prop_schema = _python_type_to_json_type(python_type)
        
        # Add metadata from field
        metadata = f.metadata or {}
        for key in [SCHEMA_TITLE, SCHEMA_DESC, SCHEMA_MIN, SCHEMA_MAX, 
                    SCHEMA_ENUM, SCHEMA_MIN_LEN, SCHEMA_MAX_LEN, 
                    SCHEMA_PATTERN, SCHEMA_FORMAT]:
            if key in metadata:
                prop_schema[key] = metadata[key]
        
        # Add default value
        if f.default is not MISSING:
            prop_schema["default"] = f.default
        elif f.default_factory is not MISSING:
            prop_schema["default"] = f.default_factory()
        
        schema["properties"][f.name] = prop_schema
    
    return schema

In [None]:
# Test _python_type_to_json_type
from typing import List, Optional

assert _python_type_to_json_type(str) == {"type": "string"}
assert _python_type_to_json_type(int) == {"type": "integer"}
assert _python_type_to_json_type(float) == {"type": "number"}
assert _python_type_to_json_type(bool) == {"type": "boolean"}
assert _python_type_to_json_type(List[str]) == {"type": "array", "items": {"type": "string"}}

optional_result = _python_type_to_json_type(Optional[int])
assert optional_result["type"] == ["integer", "null"]

print("Type conversion tests passed")

Type conversion tests passed


In [None]:
# Test dataclass_to_jsonschema using ExampleConfig defined below
# (This test runs after the ExampleConfig is defined in the notebook)
def _test_jsonschema():
    """Test JSON schema generation with ExampleConfig."""
    schema = dataclass_to_jsonschema(ExampleConfig)
    
    # Check structure
    assert schema["name"] == "ExampleConfig"
    assert schema["type"] == "object"
    assert "properties" in schema
    
    # Check field properties
    assert schema["properties"]["model"]["type"] == "string"
    assert schema["properties"]["model"]["title"] == "Model"
    assert schema["properties"]["model"]["enum"] == ["tiny", "base", "small", "medium", "large"]
    assert schema["properties"]["model"]["default"] == "base"
    
    assert schema["properties"]["temperature"]["type"] == "number"
    assert schema["properties"]["temperature"]["minimum"] == 0.0
    assert schema["properties"]["temperature"]["maximum"] == 1.0
    
    assert schema["properties"]["batch_size"]["type"] == "integer"
    assert schema["properties"]["enabled"]["type"] == "boolean"
    assert schema["properties"]["tags"]["type"] == "array"
    
    print("dataclass_to_jsonschema tests passed")
    return schema

### Example: Working with Configuration Dataclasses

In [None]:
from dataclasses import dataclass, field
from typing import List

@dataclass
class ExampleConfig:
    """Example configuration dataclass with metadata constraints."""
    model:str = field(
        default="base",
        metadata={
            SCHEMA_TITLE: "Model",
            SCHEMA_DESC: "Model size to use",
            SCHEMA_ENUM: ["tiny", "base", "small", "medium", "large"]
        }
    )
    temperature:float = field(
        default=0.0,
        metadata={
            SCHEMA_TITLE: "Temperature",
            SCHEMA_DESC: "Sampling temperature",
            SCHEMA_MIN: 0.0,
            SCHEMA_MAX: 1.0
        }
    )
    batch_size:int = field(
        default=8,
        metadata={
            SCHEMA_TITLE: "Batch Size",
            SCHEMA_DESC: "Batch size for processing",
            SCHEMA_MIN: 1,
            SCHEMA_MAX: 32
        }
    )
    enabled:bool = field(
        default=True,
        metadata={SCHEMA_TITLE: "Enabled", SCHEMA_DESC: "Whether feature is enabled"}
    )
    tags:List[str] = field(
        default_factory=list,
        metadata={SCHEMA_TITLE: "Tags", SCHEMA_DESC: "Optional tags"}
    )

print("ExampleConfig dataclass defined with metadata constraints")
print(f"Fields: {[f.name for f in fields(ExampleConfig)]}")

ExampleConfig dataclass defined with metadata constraints
Fields: ['model', 'temperature', 'batch_size', 'enabled', 'tags']


In [None]:
# Test extract_defaults
defaults = extract_defaults(ExampleConfig)
print("Default values extracted from ExampleConfig:")
for k, v in defaults.items():
    print(f"  {k}: {v!r}")

Default values extracted from ExampleConfig:
  model: 'base'
  temperature: 0.0
  batch_size: 8
  enabled: True
  tags: []


In [None]:
# Test dict_to_config with validation
print("Creating config from dictionary:")

# Valid config
config1 = dict_to_config(ExampleConfig, {"model": "large", "temperature": 0.7}, validate=True)
print(f"Valid config: {config1}")

# Config with defaults (all valid)
config2 = dict_to_config(ExampleConfig, {}, validate=True)
print(f"Config with defaults: {config2}")

# Test validation failure - invalid enum
try:
    config_bad = dict_to_config(ExampleConfig, {"model": "invalid"}, validate=True)
except ValueError as e:
    print(f"\n✓ Caught invalid enum: {e}")

# Test validation failure - value below minimum
try:
    config_bad = dict_to_config(ExampleConfig, {"temperature": -0.5}, validate=True)
except ValueError as e:
    print(f"✓ Caught below minimum: {e}")

# Test validation failure - value above maximum
try:
    config_bad = dict_to_config(ExampleConfig, {"batch_size": 100}, validate=True)
except ValueError as e:
    print(f"✓ Caught above maximum: {e}")

Creating config from dictionary:
Valid config: ExampleConfig(model='large', temperature=0.7, batch_size=8, enabled=True, tags=[])
Config with defaults: ExampleConfig(model='base', temperature=0.0, batch_size=8, enabled=True, tags=[])

✓ Caught invalid enum: model: 'invalid' is not one of ['tiny', 'base', 'small', 'medium', 'large']
✓ Caught below minimum: temperature: -0.5 is less than minimum 0.0
✓ Caught above maximum: batch_size: 100 is greater than maximum 32


In [None]:
# Test validate_config directly
print("Testing validate_config:")

# Valid config
valid_config = ExampleConfig(model="small", temperature=0.5, batch_size=16)
is_valid, error = validate_config(valid_config)
print(f"Valid config: is_valid={is_valid}, error={error}")

# Invalid config - create without validation, then validate
invalid_config = ExampleConfig(model="invalid_model", temperature=0.5, batch_size=16)
is_valid, error = validate_config(invalid_config)
print(f"Invalid model: is_valid={is_valid}, error={error}")

# Test config_to_dict
print("\nConverting config to dictionary:")
config_dict = config_to_dict(valid_config)
print(f"As dictionary: {config_dict}")

Testing validate_config:
Valid config: is_valid=True, error=None
Invalid model: is_valid=False, error=model: 'invalid_model' is not one of ['tiny', 'base', 'small', 'medium', 'large']

Converting config to dictionary:
As dictionary: {'model': 'small', 'temperature': 0.5, 'batch_size': 16, 'enabled': True, 'tags': []}


In [None]:
#| eval: false
# Test JSON schema generation
import json

schema = _test_jsonschema()
print("\nGenerated JSON Schema:")
print(json.dumps(schema, indent=2))

dataclass_to_jsonschema tests passed

Generated JSON Schema:
{
  "name": "ExampleConfig",
  "title": "ExampleConfig",
  "description": "Example configuration dataclass with metadata constraints.",
  "type": "object",
  "properties": {
    "model": {
      "type": "string",
      "title": "Model",
      "description": "Model size to use",
      "enum": [
        "tiny",
        "base",
        "small",
        "medium",
        "large"
      ],
      "default": "base"
    },
    "temperature": {
      "type": "number",
      "title": "Temperature",
      "description": "Sampling temperature",
      "minimum": 0.0,
      "maximum": 1.0,
      "default": 0.0
    },
    "batch_size": {
      "type": "integer",
      "title": "Batch Size",
      "description": "Batch size for processing",
      "minimum": 1,
      "maximum": 32,
      "default": 8
    },
    "enabled": {
      "type": "boolean",
      "title": "Enabled",
      "description": "Whether feature is enabled",
      "default": tr

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()