# uniprot.core Pydantic Model Generation

This notebook generates Pydantic models from LinkML schema files exported from the main schema extraction workflow.

**Prerequisites:** Run `uniprot.core.ipynb` first to generate the LinkML YAML schema file.

In [None]:
# Configuration and setup
import os
from pathlib import Path

# Dataset parameters
dataset_name = uniprot.core

# Setup paths
working_path = Path().absolute()
# use os.path to build paths
exports_path = Path(os.path.join(str(working_path), "..", "..", "data", dataset_name))
linkml_file = Path(os.path.join(str(exports_path), f"{dataset_name}_linkml_schema.yaml"))

# These will be logged after logger is set up in next cell
print(f"Working directory: {working_path}")
print(f"Exports path: {exports_path}")
print(f"LinkML schema file: {linkml_file}")
print(f"LinkML file exists: {linkml_file.exists()}")

In [None]:
# Import required libraries
from linkml.generators.erdiagramgen import ERDiagramGenerator
from linkml_runtime.utils.schemaview import SchemaView
from linkml.generators.pydanticgen import PydanticGenerator
from IPython.display import display, Markdown

In [None]:
import logging
import sys

# Setup notebook logger for pydantic generation
logger = logging.getLogger(dataset_name or "pydantic_notebook")
logger.setLevel(logging.INFO)

# Also configure the rdfsolve.parser logger for any parser operations
parser_logger = logging.getLogger('rdfsolve.parser')
parser_logger.setLevel(logging.DEBUG)

# Avoid adding duplicate handlers if the cell is re-run
if not logger.handlers:
    fmt = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s", "%Y-%m-%d %H:%M:%S")

    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)  # Use DEBUG to see parser query details
    sh.setFormatter(fmt)
    logger.addHandler(sh)
    
    # Add the same handler to the parser logger
    parser_logger.addHandler(sh)

logger.info("Pydantic notebook logger initialized. All logs will be displayed in notebook.")
parser_logger.info("Parser logger configured for query tracking")

In [None]:
# Verify LinkML schema file exists
if not linkml_file.exists():
    raise FileNotFoundError(
        f"LinkML schema file not found: {linkml_file}\n"
        f"Please run {dataset_name}_schema.ipynb first to generate the schema."
    )

logger.info("Found LinkML schema: %s", linkml_file)

## Load and Inspect LinkML Schema

In [None]:
# Load the LinkML schema
sv = SchemaView(str(linkml_file))
linkml_schema = sv.schema

display(Markdown(f"**Parsed LinkML schema:** Classes = {len(sv.all_classes())}, Slots = {len(sv.all_slots())}"))

# Show basic schema info
logger.info("Schema ID: %s", linkml_schema.id)
logger.info("Schema name: %s", linkml_schema.name)
logger.info("Description: %s", linkml_schema.description)
logger.info("Classes (%d):", len(sv.all_classes()))
for class_name in sorted(sv.all_classes())[:10]:  # Show first 10
    logger.info("  - %s", class_name)
if len(sv.all_classes()) > 10:
    logger.info("  ... and %d more", len(sv.all_classes()) - 10)

## Generate Mermaid Diagram

In [None]:
# Generate and display Mermaid class diagram
mermaid_code = ERDiagramGenerator(str(linkml_file)).serialize()

display(Markdown(mermaid_code))

## Generate Pydantic Models

In [None]:
# Generate Pydantic models from LinkML schema
src = PydanticGenerator(str(linkml_file)).serialize()

logger.info("Generated Pydantic code: %d characters", len(src))
logger.info("Preview (first 500 chars):\n%s...", src[:500])

In [None]:
# Execute the generated Pydantic code and extract models
ns = {}
exec(src, ns)

# Find the Pydantic model classes
def _is_pydantic_model(name, val):
    """Check if this is likely a generated Pydantic model class"""
    # Must be a class (type) and have at least one model field
    if not isinstance(val, type):
        return False
    try:
        has_model_fields = 0 < len(getattr(val, "model_fields", {}))
    except:
        has_model_fields = False

    return has_model_fields

pydantic_models = {k: v for k, v in ns.items() if _is_pydantic_model(k, v)}

display(Markdown(f"**Found {len(pydantic_models)} Pydantic model classes for schema.**"))

# Make models available in globals for interactive use
for name, cls in pydantic_models.items():
    globals()[name] = cls
    
logger.info("Available Pydantic models:")
for name in sorted(pydantic_models.keys())[:10]:  # Show first 10
    logger.info("  - %s", name)
if len(pydantic_models) > 10:
    logger.info("  ... and %d more", len(pydantic_models) - 10)

## Inspect Generated Models

In [None]:
# Show detailed information about generated Pydantic classes
def show_fields(cls):
    if hasattr(cls, 'model_fields'):
        fields = list(cls.model_fields.items())
        field_list = []
        for name, info in fields:
            field_list.append(f"  - `{name}`: {info.annotation}")
        return "\n".join(field_list)
    return "  No fields found"

# Show all available classes (limit output for readability)
if pydantic_models:
    markdown_output = f"**Generated Pydantic classes ({len(pydantic_models)} total):**\n\n"
    
    # Show first 5 classes in detail
    shown_classes = list(sorted(pydantic_models.keys()))[:5]
    for name in shown_classes:
        markdown_output += f"### {name}\n"
        markdown_output += show_fields(pydantic_models[name]) + "\n\n"
        
    if len(pydantic_models) > 5:
        remaining = list(sorted(pydantic_models.keys()))[5:]
        markdown_output += f"### Additional Classes ({len(remaining)})\n"
        for name in remaining[:20]:  # Show up to 20 more names
            markdown_output += f"- {name}\n"
        if len(remaining) > 20:
            markdown_output += f"- ... and {len(remaining) - 20} more\n"
            
    display(Markdown(markdown_output))
else:
    display(Markdown("**No pydantic_models found**"))

## Export Generated Code

In [None]:
# Save the generated Pydantic code to a Python file
pydantic_output_file = Path(os.path.join(str(exports_path), f"{dataset_name}_pydantic_models.py"))

with open(pydantic_output_file, 'w', encoding='utf-8') as f:
    f.write(f'"""\nPydantic models generated from LinkML schema for {dataset_name}\n\nGenerated from: {linkml_file.name}\n"""\n\n')
    f.write(src)

logger.info("Saved Pydantic models to: %s", pydantic_output_file)
logger.info("File size: %.1f KB", pydantic_output_file.stat().st_size / 1024)

# Also show the generated code in the notebook (truncated)
display(Markdown(f"### Generated Pydantic Code\n\n```python\n{src[:2000]}\n# ... (truncated, see full code in {pydantic_output_file.name})\n```"))

## Usage Examples

In [None]:
# Example usage of the generated Pydantic models
if pydantic_models:
    # Get the first model class for demonstration
    example_class_name = list(pydantic_models.keys())[0]
    example_class = pydantic_models[example_class_name]
    
    logger.info("Example usage of %s:", example_class_name)
    logger.info("\n# Import the model")
    logger.info("from %s_pydantic_models import %s", dataset_name, example_class_name)
    
    logger.info("\n# Create an instance")
    logger.info("# %s(...)", example_class_name)
    
    # Show model fields
    if hasattr(example_class, 'model_fields'):
        fields = list(example_class.model_fields.keys())[:5]  # First 5 fields
        logger.info("\n# Available fields: %s", ', '.join(fields))
        if len(example_class.model_fields) > 5:
            logger.info("# ... and %d more", len(example_class.model_fields) - 5)
            
    logger.info("\n# Get model schema")
    logger.info("schema = %s.model_json_schema()", example_class_name)
else:
    logger.info("No Pydantic models available for examples")

## Summary

This notebook successfully generated Pydantic models from the LinkML schema. The models are now available for:

- **Data validation**: Use the models to validate data structures
- **API development**: Use as request/response models in FastAPI or similar
- **Data serialization**: Convert between Python objects and JSON/dict formats
- **IDE support**: Get type hints and autocompletion in your IDE

### Output Files
- **Pydantic models**: `{dataset_name}_pydantic_models.py`
- **Source LinkML schema**: `{dataset_name}_linkml_schema.yaml`

### Next Steps
1. Import the generated models in your Python projects
2. Use the models for data validation and serialization
3. Integrate with web frameworks like FastAPI for API development