In [1]:
from marshmallow import Schema, fields, validate, ValidationError

# Define a schema for an individual object
class UserSchema(Schema):
    id = fields.Int(required=True)
    name = fields.Str(required=True)
    email = fields.Email(required=True)
    age = fields.Int(required=True, validate=validate.Range(min=18, max=65))
    isSubscribed = fields.Bool(required=True)

# Sample JSON object array with an invalid record
json_data = [
    {
        "id": 1,
        "name": "John Doe",
        "email": "johndoe@example.com",
        "age": 30,
        "isSubscribed": True
    },
    {
        "id": 2,
        "name": "Jane Smith",
        "email": "invalid-email",
        "age": 25,
        "isSubscribed": False
    },
    {
        "id": 3,
        "name": "Alice Johnson",
        "email": "alicejohnson@example.com",
        "age": -5,  # Invalid age
        "isSubscribed": True
    }
]

# Instantiate the schema
user_schema = UserSchema(many=True)  # `many=True` for arrays

# Validate and deserialize
try:
    valid_data = user_schema.load(json_data)
    print("Valid data:", valid_data)
except ValidationError as e:
    print("Validation error details:")
    for index, errors in e.messages.items():
        print(f"Record {index}: {errors}")


Validation error details:
Record 1: {'email': ['Not a valid email address.']}
Record 2: {'age': ['Must be greater than or equal to 18 and less than or equal to 65.']}


In [3]:
import json

with open('data.json', 'r') as file:
    data = json.load(file)


In [4]:
from marshmallow import Schema, fields

def generate_schema_from_dict(data_dict):
    field_mappings = {
        str: fields.Str,
        int: fields.Int,
        float: fields.Float,
        bool: fields.Bool,
        type(None): fields.Raw  # For null values
    }

    schema_fields = {}
    for key, value in data_dict.items():
        value_type = type(value)
        
        if value_type in field_mappings:
            schema_fields[key] = field_mappings[value_type](required=True)
        elif value_type == list:
            if value:  # If the list is not empty
                # Assume all elements are of the same type
                sub_type = type(value[0])
                if sub_type == dict:
                    # Nested list of dicts
                    NestedSchema = generate_schema_from_dict(value[0])
                    schema_fields[key] = fields.List(fields.Nested(NestedSchema), required=True)
                else:
                    field_type = field_mappings.get(sub_type, fields.Raw)
                    schema_fields[key] = fields.List(field_type(), required=True)
            else:
                # Empty list, default to List of Raw fields
                schema_fields[key] = fields.List(fields.Raw(), required=True)
        elif value_type == dict:
            # Nested dictionary
            NestedSchema = generate_schema_from_dict(value)
            schema_fields[key] = fields.Nested(NestedSchema, required=True)
        else:
            schema_fields[key] = fields.Raw(required=True)
    
    return type('DynamicSchema', (Schema,), schema_fields)


In [5]:
DynamicSchema = generate_schema_from_dict(data)
schema = DynamicSchema()
result = schema.load(data)  # Deserialization
print(result)


{'name': 'John Doe', 'age': 30, 'email': 'john.doe@example.com', 'address': {'street': '123 Main St', 'city': 'Anytown', 'state': 'CA', 'zip_code': '12345'}, 'phone_numbers': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}


In [6]:
import json
from marshmallow import Schema, fields, validate
from datetime import datetime

def get_field_type(field_path, value, enum_fields=None):
    if enum_fields is None:
        enum_fields = {}
        
    if field_path in enum_fields:
        # Enum field
        enum_values = enum_fields[field_path]
        return 'Str', {'validate': f'validate.OneOf({enum_values})'}
    
    if isinstance(value, str):
        # Check for date formats
        date_formats = [
            '%Y-%m-%d', '%Y/%m/%d', '%d-%m-%Y', '%d/%m/%Y',
            '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S'
        ]
        for fmt in date_formats:
            try:
                datetime.strptime(value, fmt)
                # Decide between Date and DateTime based on format
                if 'H' in fmt:
                    return 'DateTime', {}
                else:
                    return 'Date', {}
            except ValueError:
                continue
        return 'Str', {}
    elif isinstance(value, int):
        return 'Int', {}
    elif isinstance(value, float):
        return 'Float', {}
    elif isinstance(value, bool):
        return 'Bool', {}
    elif value is None:
        return 'Raw', {}
    else:
        return 'Raw', {}

def generate_schema_code(class_name, data_dict, indent=0, enum_fields=None, field_path=''):
    if enum_fields is None:
        enum_fields = {}
    indent_str = '    ' * indent
    schema_lines = [f"{indent_str}class {class_name}(Schema):"]
    for key, value in data_dict.items():
        current_field_path = f"{field_path}.{key}" if field_path else key
        value_type = type(value)
        
        if value_type == dict:
            # Nested dictionary
            nested_class_name = f"{class_name}_{key.capitalize()}"
            nested_schema_code = generate_schema_code(
                nested_class_name, value, indent, enum_fields, current_field_path
            )
            schema_lines.append(f"{indent_str}    {key} = fields.Nested({nested_class_name}, required=True)")
            schema_lines.append(nested_schema_code)
        elif value_type == list:
            # List field
            if value:
                first_elem = value[0]
                elem_type = type(first_elem)
                if elem_type == dict:
                    # List of nested dictionaries
                    nested_class_name = f"{class_name}_{key.capitalize()}Item"
                    nested_schema_code = generate_schema_code(
                        nested_class_name, first_elem, indent, enum_fields, current_field_path
                    )
                    schema_lines.append(
                        f"{indent_str}    {key} = fields.List(fields.Nested({nested_class_name}), required=True)"
                    )
                    schema_lines.append(nested_schema_code)
                else:
                    field_type, field_args = get_field_type(current_field_path, first_elem, enum_fields)
                    args_str = ', '.join([f"{k}={v}" for k, v in field_args.items()])
                    args_str = f", {args_str}" if args_str else ''
                    schema_lines.append(
                        f"{indent_str}    {key} = fields.List(fields.{field_type}(){args_str}, required=True)"
                    )
            else:
                # Empty list
                schema_lines.append(f"{indent_str}    {key} = fields.List(fields.Raw(), required=True)")
        else:
            # Primitive field
            field_type, field_args = get_field_type(current_field_path, value, enum_fields)
            args_str = ', '.join([f"{k}={v}" for k, v in field_args.items()])
            args_str = f", {args_str}" if args_str else ''
            schema_lines.append(f"{indent_str}    {key} = fields.{field_type}(required=True{args_str})")
    return '\n'.join(schema_lines)

# Load the JSON data
# with open('data.json', 'r') as file:
#     data = json.load(file)

data = {
  "name": "John Doe",
  "age": 30,
  "email": "john.doe@example.com",
  "is_active": True,
  "roles": ["admin", "user"],
  "status": "active",
  "address": {
    "street": "123 Main St",
    "city": "Anytown",
    "zip": "12345"
  },
  "projects": [
    {
      "name": "Project A",
      "deadline": "2023-12-31",
      "status": "planned"
    },
    {
      "name": "Project B",
      "deadline": "6-30-24",
      "status": "in_progress"
    }
  ]
}



# Specify fields to be treated as enums
enum_fields = {
    'roles': ['admin', 'user', 'guest'],
    'status': ['active', 'inactive', 'pending'],
    'projects.status': ['planned', 'in_progress', 'completed'],
}

# Generate the schema code
schema_code = generate_schema_code('GeneratedSchema', data, enum_fields=enum_fields)

# Output the schema code
print('from marshmallow import Schema, fields, validate\n')
print(schema_code)

# # Optionally, write to a file
# with open('generated_schema.py', 'w') as f:
#     f.write('from marshmallow import Schema, fields, validate\n\n')
#     f.write(schema_code)


from marshmallow import Schema, fields, validate

class GeneratedSchema(Schema):
    name = fields.Str(required=True)
    age = fields.Int(required=True)
    email = fields.Str(required=True)
    is_active = fields.Int(required=True)
    roles = fields.List(fields.Str(), validate=validate.OneOf(['admin', 'user', 'guest']), required=True)
    status = fields.Str(required=True, validate=validate.OneOf(['active', 'inactive', 'pending']))
    address = fields.Nested(GeneratedSchema_Address, required=True)
class GeneratedSchema_Address(Schema):
    street = fields.Str(required=True)
    city = fields.Str(required=True)
    zip = fields.Str(required=True)
    projects = fields.List(fields.Nested(GeneratedSchema_ProjectsItem), required=True)
class GeneratedSchema_ProjectsItem(Schema):
    name = fields.Str(required=True)
    deadline = fields.Date(required=True)
    status = fields.Str(required=True, validate=validate.OneOf(['planned', 'in_progress', 'completed']))
