In [7]:
# scratch/test_extraction_item.py

from extraction_io.ExtractionItems import ExtractionItem

# 1) A valid single-item dict that we want to parse:
raw_item = {
    "field_name": "BorrowerName",
    "description": "Name of the borrower",
    "probable_pages": [1, 2],
    "type": "key-value",          # must match Literal exactly
    "multipage_value": False,
    "multiline_value": False,
    "extra_rules": {"regex": "\\w+"}
}

# 2) Parse/validate using Pydantic v2's `model_validate`:
item = ExtractionItem.model_validate(raw_item)

print("Parsed ExtractionItem successfully:")
print(f"  field_name       = {item.field_name!r}")
print(f"  description      = {item.description!r}")
print(f"  probable_pages   = {item.probable_pages!r}")
print(f"  type             = {item.type!r}")
print(f"  multipage_value  = {item.multipage_value!r}")
print(f"  multiline_value  = {item.multiline_value!r}")
print(f"  extra_rules      = {item.extra!r}")

Parsed ExtractionItem successfully:
  field_name       = 'BorrowerName'
  description      = 'Name of the borrower'
  probable_pages   = [1, 2]
  type             = 'key-value'
  multipage_value  = False
  multiline_value  = False
  extra_rules      = {}


In [2]:
# scratch/test_extraction_items.py

from extraction_io.ExtractionItems import ExtractionItem, ExtractionItems
from pydantic import ValidationError

# 1) Example list of two config items:
raw_config = [
    {
        "field_name": "BorrowerName",
        "description": "Name of the borrower",
        "probable_pages": [1],
        "type": "key-value",
        "multipage_value": False,
        "multiline_value": False,
        "extra_rules": {"regex": "\\w+"}
    },
    {
        "field_name": "benefits_list",
        "description": "List of benefits",
        # no probable_pages provided ⇒ defaults to []
        "type": "bullet-points",
        "multipage_value": False,
        "multiline_value": False,
        # omit extra_rules entirely, it will default to {}
    }
]

# 2) Attempt to parse the list as ExtractionItems:
try:
    config_model = ExtractionItems.model_validate(raw_config)
    print("Parsed ExtractionItems successfully. Contents:")
    for idx, item in enumerate(config_model):
        print(f"Item {idx}:")
        print(f"  field_name      = {item.field_name!r}")
        print(f"  description     = {item.description!r}")
        print(f"  probable_pages  = {item.probable_pages!r}")
        print(f"  type            = {item.type!r}")
        print(f"  multipage_value = {item.multipage_value!r}")
        print(f"  multiline_value = {item.multiline_value!r}")
        print(f"  extra_rules     = {item.extra_rules!r}")
        print()
except ValidationError as e:
    print("ValidationError while parsing ExtractionItems:")
    print(e)

Parsed ExtractionItems successfully. Contents:
Item 0:
  field_name      = 'BorrowerName'
  description     = 'Name of the borrower'
  probable_pages  = [1]
  type            = 'key-value'
  multipage_value = False
  multiline_value = False
  extra_rules     = {'regex': '\\w+'}

Item 1:
  field_name      = 'benefits_list'
  description     = 'List of benefits'
  probable_pages  = []
  type            = 'bullet-points'
  multipage_value = False
  multiline_value = False
  extra_rules     = {}

