# Schema to Pydantic Model Generator

This notebook processes JSON schema files from the `schema_imp` directory and generates Pydantic v2 models that can be used as the basis for a PostgreSQL database schema.

In [None]:
%env UV_LINK_MODE=copy
!uv add datamodel-code-generator datamodel-code-generator[graphql,http] tomli tomli-w python-dotenv sqlalchemy
!uv sync

In [None]:
from pathlib import Path
import json
import tomli
import tomli_w
from datamodel_code_generator import InputFileType, generate, PythonVersion
from datamodel_code_generator import DataModelType
from datamodel_code_generator.model.pydantic_v2 import UnionMode
from pprint import pprint
import os
import re

## Configuration

Load the datamodel-codegen configuration from pyproject.toml

In [None]:
# Load existing configuration
pyproject_path = Path('pyproject.toml')
pyproject = tomli.loads(pyproject_path.read_text())
datamodel_codegen_config = pyproject.get('tool', {}).get('datamodel-codegen', {})

print("Using datamodel-codegen configuration:")
pprint(datamodel_codegen_config)

## Create Output Directory Structure

In [None]:
# Create output directory for models
models_dir = Path('models')
models_dir.mkdir(exist_ok=True)

# Create __init__.py to make the directory a proper package
init_file = models_dir / '__init__.py'
if not init_file.exists():
    init_file.write_text('# Generated Pydantic models\n')
else:
    # Reset the __init__.py file
    init_file.write_text('# Generated Pydantic models\n')

## Process Schema Files

Find all JSON schema files in the schema_imp directory and generate Pydantic models

In [None]:
# Get all JSON files in the schema_imp directory
schema_dir = Path('schema_imp')
schema_files = list(schema_dir.glob('*.json'))

print(f"Found {len(schema_files)} schema files to process:")
for file in schema_files:
    print(f"  - {file.name}")

In [None]:
def clean_model_name(name):
    """Convert file name to a proper model name in PascalCase"""
    # Remove .json extension and convert to singular form if plural
    name = name.replace('.json', '')
    if name.endswith('s') and not name.endswith('ss'):  # Handle plurals but not words ending with 'ss'
        name = name[:-1]
    
    # Convert to PascalCase
    words = re.findall(r'[a-z]+|[A-Z][a-z]*', name)
    return ''.join(word.capitalize() for word in words)

def generate_model(schema_file):
    """Generate a Pydantic model from a JSON schema file"""
    # Read schema file
    schema_content = schema_file.read_text()
    
    # Determine model name and output path
    model_name = clean_model_name(schema_file.name)
    output_path = models_dir / f"{model_name.lower()}.py"
    
    print(f"Generating model {model_name} from {schema_file.name}...")
    
    # Generate model
    generate(
        schema_content,
        input_file_type=InputFileType.JsonSchema,
        input_filename=str(schema_file),
        output=output_path,
        output_model_type=DataModelType.PydanticV2BaseModel,
        target_python_version=PythonVersion.PY_311,
        union_mode=UnionMode.smart,
        **datamodel_codegen_config
    )
    
    # Add model to __init__.py for easy importing
    with open(init_file, 'a') as f:
        f.write(f"from .{model_name.lower()} import {model_name}\n")
    
    return output_path

In [None]:
# Process each schema file
generated_models = []
for schema_file in schema_files:
    try:
        output_path = generate_model(schema_file)
        generated_models.append(output_path)
    except Exception as e:
        print(f"Error processing {schema_file.name}: {str(e)}")

print(f"\nSuccessfully generated {len(generated_models)} models:")
for model in generated_models:
    print(f"  - {model}")

## Create Database Model Relationships

Generate a relationships.py file that defines the relationships between models for PostgreSQL

In [None]:
# Create a relationships.py file to define model relationships
relationships_file = models_dir / 'relationships.py'
relationships_content = """
# Generated relationships for PostgreSQL database models
from sqlalchemy import Column, ForeignKey, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship

Base = declarative_base()

# Define relationships between models
# This file should be customized based on your specific data relationships

# Example relationship definitions:
'''
# Many-to-many relationship between cards and packs
card_pack_association = Table(
    'card_pack_association',
    Base.metadata,
    Column('card_id', ForeignKey('card.id'), primary_key=True),
    Column('pack_id', ForeignKey('pack.id'), primary_key=True)
)

# One-to-many relationship between factions and cards
class Card(Base):
    __tablename__ = 'card'
    faction_id = Column(ForeignKey('faction.id'))
    faction = relationship("Faction", back_populates="cards")

class Faction(Base):
    __tablename__ = 'faction'
    cards = relationship("Card", back_populates="faction")
'''
"""
relationships_file.write_text(relationships_content)

print(f"Created relationships file at {relationships_file}")

## Create Database Setup Script

Generate a script to set up the PostgreSQL database with SQLAlchemy

In [None]:
# Create a db_setup.py file
db_setup_file = Path('db_setup.py')
db_setup_content = """
# PostgreSQL database setup script
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Database connection settings
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres")
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "arkhamdb")

# Create SQLAlchemy engine
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DATABASE_URL)

# Create session factory
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

# Base class for models
Base = declarative_base()

def get_db():
    """Get database session"""
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

def init_db():
    """Initialize database tables"""
    # Import all models to ensure they're registered with Base.metadata
    from models import *
    from models.relationships import *
    
    # Create tables
    Base.metadata.create_all(bind=engine)

if __name__ == "__main__":
    print("Initializing database...")
    init_db()
    print("Database initialized successfully!")
"""
db_setup_file.write_text(db_setup_content)

print(f"Created database setup script at {db_setup_file}")

## Summary

This notebook has:
1. Used the datamodel-codegen configuration from pyproject.toml
2. Created Pydantic v2 models from all JSON schema files in schema_imp
3. Generated a relationships.py template for defining model relationships
4. Created a database setup script for PostgreSQL integration

Next steps:
1. Review and customize the generated models
2. Define relationships between models in relationships.py
3. Set up a PostgreSQL database and run the db_setup.py script
4. Create data import/export utilities