In [2]:
import pydantic as pd2
import pydantic as pd
import pydantic.v1 as pd1
import sys

In order to understand the performance cost of using a docs schema, let's write some tests:

In [3]:
class MyBaseClass(pd.BaseModel):
    """
    Test documentation
    """
    
    a: str = ""
    """
    Example Description
    """
    
    b: str = pd.Field(default="", description="Example Description")

our_class = MyBaseClass()
our_class

MyBaseClass(a='', b='')

So the goal is that we want a function that can compile into a structure.

In [4]:
# Calculate the memory cost
a_class_attribute_memory_cost = sys.getsizeof(our_class.a)
b_class_attribute_memory_cost = sys.getsizeof(our_class.b)
print(f"a_class_attribute memory: {a_class_attribute_memory_cost}")
print(f"b_class_attribute memory: {b_class_attribute_memory_cost}")

a_class_attribute memory: 49
b_class_attribute memory: 49


In [5]:
our_class.__annotations__

{'a': str, 'b': str}

So there's a few options of what we can do here. We could create a higher-dimensional `pd.Field` that can be compiled into a better documentation representation which is more complete.

In [6]:
example_default = ""
example_basic_field = pd.Field(default="")

Let's analyse the memory aspect of this data representation:

In [7]:
# Calculate the memory cost
default_string_memory_cost = sys.getsizeof(example_default)
default_field_memory_cost = sys.getsizeof(example_basic_field)
print(f"a_class_attribute memory: {default_string_memory_cost}")
print(f"b_class_attribute memory: {default_field_memory_cost}")

a_class_attribute memory: 49
b_class_attribute memory: 216


So we can clearly tell there's a difference here. It's interesting because we can tell that even though the actual declaration on its own does have 4x higher memory cost, at the instantiation default level it is equivalent.

So if anything we can look inside annotations of each class and compile the documentation from there.

In [8]:
example_basic_field

FieldInfo(annotation=NoneType, required=False, default='')

In [9]:
example_basic_field.__annotations__

{'annotation': 'type[Any] | None',
 'default': 'Any',
 'default_factory': 'typing.Callable[[], Any] | None',
 'alias': 'str | None',
 'alias_priority': 'int | None',
 'validation_alias': 'str | AliasPath | AliasChoices | None',
 'serialization_alias': 'str | None',
 'title': 'str | None',
 'field_title_generator': 'typing.Callable[[str, FieldInfo], str] | None',
 'description': 'str | None',
 'examples': 'list[Any] | None',
 'exclude': 'bool | None',
 'discriminator': 'str | types.Discriminator | None',
 'deprecated': 'Deprecated | str | bool | None',
 'json_schema_extra': 'JsonDict | typing.Callable[[JsonDict], None] | None',
 'frozen': 'bool | None',
 'validate_default': 'bool | None',
 'repr': 'bool',
 'init': 'bool | None',
 'init_var': 'bool | None',
 'kw_only': 'bool | None',
 'metadata': 'list[Any]',
 'metadata_lookup': 'ClassVar[dict[str, typing.Callable[[Any], Any] | None]]'}

In [10]:
import tidy3d

In [11]:
tidy3d.Simulation.__annotations__

{'boundary_spec': 'BoundarySpec',
 'courant': 'float',
 'lumped_elements': 'Tuple[LumpedElementType, ...]',
 'grid_spec': 'GridSpec',
 'medium': 'MediumType3D',
 'normalize_index': 'Union[pydantic.NonNegativeInt, None]',
 'monitors': 'Tuple[annotate_type(MonitorType), ...]',
 'sources': 'Tuple[annotate_type(SourceType), ...]',
 'shutoff': 'pydantic.NonNegativeFloat',
 'structures': 'Tuple[Structure, ...]',
 'symmetry': 'Tuple[Symmetry, Symmetry, Symmetry]',
 'run_time': 'Union[pydantic.PositiveFloat, RunTimeSpec]'}

The problem with this is that it only contains the types of the annotations, it does not contain the actual meaning. We can verify if this is the same for the attribute declaration

In [12]:
# our_class.a.__annotations__ # Doesn't work
# our_class.b.__annotations__ # Doesn't work
our_class.__annotations__

{'a': str, 'b': str}

So it is concievable we can overwrite the annotations of a class attribute. However, it is interesting that the field description is not directly accessible from the class instantiation.

So we can improve the documentation based on assigning this type of elements. We could extract the class definition from there in principle. However, clearly we have an issue which is where does the actual documentation data get contained.

We can extract the json schema defaults and variables from the `model_json_schema` attribute of the pydantic class.

In [13]:
from pydantic.fields import FieldInfo
from pydantic import BaseModel

def check_json_schema_extra(field: FieldInfo) -> bool:
    """Check if the FieldInfo contains a 'json_schema_extra' parameter."""
    return hasattr(field, 'json_schema_extra') and field.json_schema_extra is not None



def determine_pydantic_version_from_base_model(model: BaseModel):
    """Determine if a BaseModel is from Pydantic v1 or v2."""
    if hasattr(model, 'model_fields'):
        return 2
    elif hasattr(model, '__fields__'):
        return 1
    else:
        raise ValueError("Unknown Pydantic version or incompatible BaseModel class.")


def get_field_infos(model: BaseModel):
    """Get all FieldInfo instances from a Pydantic model, compatible with v1 and v2."""
    version = determine_pydantic_version_from_base_model(model)

    field_infos = []

    # Handle Pydantic v2
    if version == 2:
        for field_name, field in model.model_fields.items():
            field_infos.append(field)

    # Handle Pydantic v1
    elif version == 1:
        for field_name, field in model.__fields__.items():
            field_infos.append(field)

    return field_infos

our_class.model_json_schema()

{'description': 'Test documentation',
 'properties': {'a': {'default': '', 'title': 'A', 'type': 'string'},
  'b': {'default': '',
   'description': 'Example Description',
   'title': 'B',
   'type': 'string'}},
 'title': 'MyBaseClass',
 'type': 'object'}

So when we go and define the parameters we want on our parameter table:

```
Units
Description
Type
Equation
```

We need to then evaluate how to define this in a standardized way, and where this gets extracted from.

So we've been analyzing the `__annotations__` and the provided json schema, it can be seen that a `pydantic.Field` contains hte example description and relevant information in the way we want. So it makes sense to create a higher dimensional version of the `pydantic.Field` and use this to annotate. We can then extract the information from there and compile each field in the particular format enabled by the extension. So we provide a full cycle of the implementation strategy.

In [51]:
# help(pd.fields.Field)
afield = pd.fields.Field(
    json_schema_extra={
        "unit": "ms",
        "math": "s + 1"
}
)

In [55]:
afield._extract_metadata

<function pydantic.fields.FieldInfo._extract_metadata(annotation: 'type[Any] | None') -> 'tuple[type[Any] | None, list[Any]]'>

In [57]:
our_class.model_fields

{'a': FieldInfo(annotation=str, required=False, default=''),
 'b': FieldInfo(annotation=str, required=False, default='', description='Example Description')}

In [58]:
our_class.__annotations__

{'a': str, 'b': str}

In [63]:
our_class.a.__doc__

"str(object='') -> str\nstr(bytes_or_buffer[, encoding[, errors]]) -> str\n\nCreate a new string object from the given object. If encoding or\nerrors is specified, then the object must expose a data buffer\nthat will be decoded using the given encoding and error handler.\nOtherwise, returns the result of object.__str__() (if defined)\nor repr(object).\nencoding defaults to sys.getdefaultencoding().\nerrors defaults to 'strict'."

In [15]:
test_extra_fieldinfo = pd.fields.FieldInfo(test=1)
dir(test_extra_fieldinfo)

['__annotations__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__pretty__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_attributes_set',
 '_collect_metadata',
 '_extract_metadata',
 '_from_dataclass_field',
 'alias',
 'alias_priority',
 'annotation',
 'apply_typevars_map',
 'default',
 'default_factory',
 'deprecated',
 'deprecation_message',
 'description',
 'discriminator',
 'examples',
 'exclude',
 'field_title_generator',
 'from_annotated_attribute',
 'from_annotation',
 'from_field',
 'frozen',
 'get_default',
 'init',
 'init_var',
 'is_required',
 'json_schema_extra',
 'kw_only',
 'merge_field_infos',
 'meta

In [44]:
test_extra_fieldinfo

FieldInfo(annotation=NoneType, required=True)

In [17]:
help(pd.fields.FieldInfo)

Help on class FieldInfo in module pydantic.fields:

class FieldInfo(pydantic._internal._repr.Representation)
 |  FieldInfo(**kwargs: 'Unpack[_FieldInfoInputs]') -> 'None'
 |  
 |  This class holds information about a field.
 |  
 |  `FieldInfo` is used for any field definition regardless of whether the [`Field()`][pydantic.fields.Field]
 |  function is explicitly used.
 |  
 |      You generally shouldn't be creating `FieldInfo` directly, you'll only need to use it when accessing
 |      [`BaseModel`][pydantic.main.BaseModel] `.model_fields` internals.
 |  
 |  Attributes:
 |      annotation: The type annotation of the field.
 |      default: The default value of the field.
 |      default_factory: The factory function used to construct the default for the field.
 |      alias: The alias name of the field.
 |      alias_priority: The priority of the field's alias.
 |      validation_alias: The validation alias of the field.
 |      serialization_alias: The serialization alias of the fi

In [18]:
class PhysicalParameterV1(pd1.BaseModel):
    units: str = ""

class PhysicalParameterV2(pd2.BaseModel):
    units: str = ""

In [19]:
# help(pd.fields.Field)
# pd.fields.Field(extra=PhysicalParameter())

In [20]:
get_field_infos(PhysicalParameterV1)

[ModelField(name='units', type=str, required=False, default='')]

In [21]:
get_field_infos(PhysicalParameterV2)

[FieldInfo(annotation=str, required=False, default='')]

In [22]:
class PhysicalFieldInfo(pd.fields.FieldInfo):
    a: PhysicalParameterV1

In [23]:
PhysicalFieldInfo()

PhysicalFieldInfo(annotation=NoneType, required=True)

In [24]:
PhysicalFieldInfo()

PhysicalFieldInfo(annotation=NoneType, required=True)

In [68]:
from __future__ import annotations

import pydoc
import warnings
from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Union, Type, TypeVar

from pydantic import BaseModel, ConfigDict, create_model, Field
from pydantic.fields import FieldInfo
from typing_extensions import TypeGuard

# Define constants
ASTERISK_FIELD_NAME = 'all fields'

# Define type aliases for clarity
SymbolicTypes = Union[str, Dict[str, Any]]
UnitTypes = Union[str, Dict[str, Any]]
PropertyTypes = Union[Dict[str, Any], Dict[str, Any]]  # Adjust as needed based on actual Property models

# ----------------------------
# Helper Functions and Types
# ----------------------------

def is_pydantic_model(obj: Any) -> TypeGuard[Type[BaseModel]]:
    """Determine if the object is a valid Pydantic model."""
    try:
        return issubclass(obj, BaseModel)
    except TypeError:
        return False


def is_pydantic_field(parent: Any, field_name: str) -> bool:
    """Determine if the given `field_name` is a Pydantic field of the `parent` model."""
    if not is_pydantic_model(parent):
        return False
    return field_name in parent.model_fields


def get_field_names(model: Type[BaseModel]) -> List[str]:
    """Return the field names of the Pydantic model while keeping ordering."""
    return list(model.model_fields.keys())


def get_field_info(model: Type[BaseModel], name: str) -> FieldInfo:
    """Get the instance of `FieldInfo` for the given field `name`."""
    return model.model_fields[name]


def get_alias_or_name(model: Type[BaseModel], field_name: str) -> str:
    """Get the alias of a Pydantic field if set; otherwise, return the field name."""
    if field_name == ASTERISK_FIELD_NAME:
        return field_name
    alias = get_field_info(model, field_name).alias
    return alias if alias is not None else field_name


def get_property_from_field_info(model: Type[BaseModel], field_name: str, property_name: str) -> Any:
    """Get a specific property value from Pydantic's `FieldInfo`."""
    field = get_field_info(model, field_name)
    return getattr(field, property_name, None)


def _get_meta_items(meta_class: Any) -> Dict[str, Any]:
    """Helper method to extract constraint names and values from different
    Pydantic Metadata objects such as `pydantic.types.Strict`."""
    try:
        return meta_class.__dataclass_fields__
    except AttributeError:
        return meta_class.__dict__


def get_constraints(model: Type[BaseModel], field_name: str) -> Dict[str, Any]:
    """Get constraints for the given `field_name`."""
    metadata = model.model_fields[field_name].metadata
    available = [meta for meta in metadata if meta is not None]

    constraints = {}
    for meta in available:
        meta_items = _get_meta_items(meta)
        for key in meta_items:
            attr = getattr(meta, key, None)
            if attr is not None:
                constraints[key] = attr
    return constraints


def is_required(model: Type[BaseModel], field_name: str) -> bool:
    """Check if the given Pydantic field is required/mandatory."""
    return get_field_info(model, field_name).is_required()


def has_default_factory(model: Type[BaseModel], field_name: str) -> bool:
    """Check if the field has a `default_factory` set."""
    return get_field_info(model, field_name).default_factory is not None


def _test_field_serializability(field: FieldInfo) -> bool:
    """Test JSON serializability for the given Pydantic `FieldInfo`."""
    model_config = ConfigDict(arbitrary_types_allowed=True)

    try:
        field_args = (field.annotation, field.default)
        model = create_model('_', __config__=model_config, test_field=field_args)
        model.model_json_schema()
    except Exception:  # noqa: BLE001
        return False
    else:
        return True


def _is_json_serializable(field: FieldInfo) -> bool:
    """Ensure JSON serializability for the given Pydantic `FieldInfo`."""
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        return _test_field_serializability(field)


def is_json_serializable(model: Type[BaseModel], field_name: str) -> bool:
    """Check if the given Pydantic field is JSON serializable."""
    field = get_field_info(model, field_name)
    return _is_json_serializable(field)


def get_non_json_serializable_fields(model: Type[BaseModel]) -> List[str]:
    """Get all fields that can't be safely JSON serialized."""
    return [name for name in get_field_names(model) if not is_json_serializable(model, name)]




def _extract_class_name(func: Callable) -> Optional[str]:
    """Extract the class name from a function's qualified name, if applicable."""
    qualname_parts = func.__qualname__.split('.')
    if len(qualname_parts) > 1:
        return qualname_parts[-2]
    return None


def get_field_validator_mapping(model: Type[BaseModel]) -> Dict[str, List[Dict[str, Any]]]:
    """Collect all available validators keyed by their corresponding fields including post/pre root validators."""
    mapping: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
    validators = get_validators(model)

    for validator in validators:
        field_name = validator['field']
        mapping[field_name].append(validator)

    return mapping


def get_validator_field_maps(model: Type[BaseModel]) -> List[Dict[str, Any]]:
    """Get all validator-field mappings for the given Pydantic model."""
    mapping = get_field_validator_mapping(model)
    validator_field_maps = []

    for field_name, validators in mapping.items():
        for validator in validators:
            validator_field_map = {
                'field_name': field_name,
                'validator_name': validator['name'],
                'field_ref': f'{model.__name__}.model_fields.{field_name}',
                'validator_ref': validator['object_path']
            }
            validator_field_maps.append(validator_field_map)

    return validator_field_maps


def is_validator_by_name(name: str, obj: Any) -> bool:
    """Determine if a validator with the given `name` is present in the `obj` model."""
    if is_pydantic_model(obj):
        validators = get_validators(obj)
        return any(validator['name'] == name for validator in validators)
    return False



# ----------------------------
# Example Usage
# ----------------------------

# Assuming you have a Pydantic model like this:
class ExampleModel(BaseModel):
    name: str
    age: int = Field(..., ge=0)
    email: Optional[str] = None

# Retrieve field names
field_names = get_field_names(ExampleModel)
print("Field Names:", field_names)

# Get field info
field_info = get_field_info(ExampleModel, 'age')
print("Field Info for 'age':", field_info)

# Get alias or name
alias_or_name = get_alias_or_name(ExampleModel, 'age')
print("Alias or Name for 'age':", alias_or_name)

# Get constraints
constraints = get_constraints(ExampleModel, 'age')
print("Constraints for 'age':", constraints)

# Check if a field is required
required = is_required(ExampleModel, 'name')
print("Is 'name' required?", required)

# Check if a field has a default factory
default_factory = has_default_factory(ExampleModel, 'email')
print("Does 'email' have a default factory?", default_factory)

# Check JSON serializability
json_serializable = is_json_serializable(ExampleModel, 'email')
print("Is 'email' JSON serializable?", json_serializable)

# Get non-JSON serializable fields
non_serializable = get_non_json_serializable_fields(ExampleModel)
print("Non-JSON Serializable Fields:", non_serializable)

# Get validators
validators = get_validators(ExampleModel)
for validator in validators:
    print(f"Validator: {validator['name']}, Module: {validator['module']}")

# Get validator-field mappings
validator_field_maps = get_validator_field_maps(ExampleModel)
for mapping in validator_field_maps:
    print(f"Field: {mapping['field_name']}, Validator: {mapping['validator_name']}, "
          f"Field Ref: {mapping['field_ref']}, Validator Ref: {mapping['validator_ref']}")

# Check if a specific validator exists
has_name_validator = is_validator_by_name('name_must_not_be_empty', ExampleModel)
print("Has 'name_must_not_be_empty' validator:", has_name_validator)


TypeError: ExampleModel.__get_pydantic_core_schema__() takes 1 positional argument but 3 were given

In [79]:
from docutils import nodes
from typing import List
from autoflex.types import PropertyTypes

def create_property_table(properties: List[PropertyTypes]):
    # Define a list of table columns
    table_head = [
        ["Name", "Description"],
        ["Units", "Symbolic", "Types", "Default"]
    ]

    # Create the table node
    table = nodes.table()
    tgroup = nodes.tgroup(cols=4)
    table += tgroup

    # Define column specifications
    for _ in range(4):
        tgroup += nodes.colspec(colwidth=1)

    thead = nodes.thead()
    tgroup += thead
    tbody = nodes.tbody()
    tgroup += tbody

    # Fill in the header rows
    for head_row in table_head:
        row = nodes.row()
        for header in head_row:
            entry = nodes.entry()
            entry += nodes.paragraph(text=header)
            row += entry
        thead += row

    # Fill in the property rows
    for prop in properties:
        print(prop)
        row = nodes.row()
        # Add property name
        entry = nodes.entry()
        entry += nodes.paragraph(text=prop["name"])
        row += entry

        # Add property description
        entry = nodes.entry()
        entry += nodes.paragraph(text=prop["description"])
        row += entry

        # Add units (if it's a PhysicalProperty)
        # Check if it's a PhysicalProperty by checking for `math` and `unit` attributes
        if hasattr(prop, 'math') and hasattr(prop, 'unit'):
            # Add units
            entry = nodes.entry()
            entry += nodes.paragraph(text=str(prop.unit))
            row += entry

            # Add symbolic
            entry = nodes.entry()
            entry += nodes.paragraph(text=str(prop.math))
            row += entry
        else:
            # Add empty cells for units and symbolic if it's not a PhysicalProperty
            entry = nodes.entry()
            entry += nodes.paragraph(text="")
            row += entry

            entry = nodes.entry()
            entry += nodes.paragraph(text="")
            row += entry

        # Add types
        entry = nodes.entry()
        entry += nodes.paragraph(text=prop["types"])
        row += entry

        # Add default
        entry = nodes.entry()
        entry += nodes.paragraph(text=prop["default"])
        row += entry

        # Append row to the body
        tbody += row

    return table

# Example usage
properties = [
    PhysicalProperty(name="Mass", types="float", description="The mass of the object", default="1.0", math="m", unit="kg"),
    Property(name="Name", types="str", description="The name of the object", default="Unnamed")
]

table_node = create_property_table(properties)

{'name': 'Mass', 'types': 'float', 'description': 'The mass of the object', 'default': '1.0', 'math': 'm', 'unit': 'kg'}
{'name': 'Name', 'types': 'str', 'description': 'The name of the object', 'default': 'Unnamed'}


In [88]:
table_node

<table: <tgroup...>>