http://json-schema.org/implementations.html

Pypi https://pypi.org/project/jsonschema/ currently 2.6.0

Github tag 2.6.0 https://github.com/Julian/jsonschema/blob/v2.6.0/jsonschema/__init__.py

Docs https://python-jsonschema.readthedocs.io/en/v2.6.0/

Writing schemas:
- https://json-schema.org/understanding-json-schema/index.html 
- https://json-schema.org/understanding-json-schema/reference/index.html

Online validator (not for draft4 though) https://jsonschema.net/

In [1]:
import jsonschema
assert jsonschema.__version__ == '2.6.0'

In [2]:
from jsonschema import validate
from jsonschema import Draft4Validator as Validator
from jsonschema.exceptions import (
    RefResolutionError,
    SchemaError,
    UnknownType,
    ValidationError,
)

In [3]:
# A sample schema, like what we'd get from json.load()
schema = {
     "type" : "object",
     "properties" : {
         "name" : {"type" : "string"},
         "price" : {"type" : "number"},
         "stock" : {"type" : "number"},
     },
}

In [4]:
good_data = {"name" : "Eggs", "price" : 34.9, "stock": 34}
bad_data = {"name": "Eggs", "price" : "invalid as price", "stock": "invalid as stock"}

https://python-jsonschema.readthedocs.io/en/v2.6.0/validate/#jsonschema.validate

In [5]:
# If no exception is raised by validate(), the instance is valid.
try:
    validate(good_data, schema, cls=Validator)
except ValidationError as e:
    print(str(e))

In [6]:
try:
    validate(bad_data, schema, cls=Validator)
except ValidationError as e:
    for attr in sorted(dir(e)):
        if not attr.startswith('_'):
            print(attr, ':', getattr(e, attr))

absolute_path : deque(['stock'])
absolute_schema_path : deque(['properties', 'stock', 'type'])
args : ("'invalid as stock' is not of type 'number'", <unset>, (), None, (), <unset>, <unset>, <unset>, (), None)
cause : None
context : []
create_from : <bound method type.create_from of <class 'jsonschema.exceptions.ValidationError'>>
instance : invalid as stock
message : 'invalid as stock' is not of type 'number'
parent : None
path : deque(['stock'])
relative_path : deque(['stock'])
relative_schema_path : deque(['properties', 'stock', 'type'])
schema : {'type': 'number'}
schema_path : deque(['properties', 'stock', 'type'])
validator : type
validator_value : number
with_traceback : <built-in method with_traceback of ValidationError object at 0x7f4c8efdfe88>


Doesn't seem to report all errors in this mode...

https://python-jsonschema.readthedocs.io/en/v2.6.0/errors/#handling-validation-errors

In [7]:
v = Validator(schema)
for error in sorted(v.iter_errors(bad_data), key=str):
    print(error.message)

'invalid as price' is not of type 'number'
'invalid as stock' is not of type 'number'


In [8]:
array_schema = {
    "type" : "array",
    "items" : {
        "type" : "number", 
        "enum" : [1, 2, 3]
    },
    "minItems" : 3,
}

In [9]:
v = Validator(array_schema)
for error in sorted(v.iter_errors(["spam", 2]), key=str):
    print(error.message)

'spam' is not of type 'number'
'spam' is not one of [1, 2, 3]
['spam', 2] is too short


In [10]:
v = Validator(array_schema)
for error in sorted(v.iter_errors(["spam", 2, 3]), key=str):
    print(error.message)

'spam' is not of type 'number'
'spam' is not one of [1, 2, 3]


In [11]:
v = Validator(array_schema)
for error in sorted(v.iter_errors([0, 2, 3]), key=str):
    print(error.message)

0 is not one of [1, 2, 3]


In [12]:
v = Validator(array_schema)
for error in sorted(v.iter_errors([1, 2, 3]), key=str):
    print(error.message)

In [13]:
# can traverse errors also using tree = ErrorTree(v.iter_errors(instance))

In [14]:
# best match error - select one!
# https://python-jsonschema.readthedocs.io/en/v2.6.0/errors/#best-match-and-relevance
from jsonschema.exceptions import best_match
print(best_match(v.iter_errors(["spam", 2])).message)

['spam', 2] is too short


#### Use with SQLA constructor?

In [15]:
import sqlalchemy
sqlalchemy.__version__ 

'1.2.10'

In [16]:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

engine = create_engine('sqlite:///:memory:') #, echo=True)
Base = declarative_base()

user_schema = {
    "type" : "object",
    "properties" : {
        "name" : {
            "type" : "string", 
            "minlength": 1
        },
        "email" : {
            "type" : "string",
            "pattern": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
        },
    },
    "additionalProperties": False,
    "required": ["name", "email"],
}

class User(Base):
    __tablename__ = 'users'  # required but assigned automatically if omitted in flask-sqlalchemy 

    id = Column(Integer, primary_key=True)  # at least one primary key is required 
    name = Column(String, nullable=False)
    email = Column(String, nullable=False)
    
    def __init__(self, *args, **kwargs):
        validate(kwargs, user_schema, Validator)  # intercept kwargs!
        super().__init__(*args, **kwargs)

    def __repr__(self):
        return "<User(name='%s', email='%s')>" % (self.name, self.email)

Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

In [17]:
user = User(name='ed', email='ed@gmail.com')

In [18]:
try:
    user = User(name='ed', email='ed@gmail.com', other='foo')
except ValidationError as e:
    print('message:', e.message)
    print('{0} fails {1}'.format(e.path[-1] if e.path else 'item', e.validator))

message: Additional properties are not allowed ('other' was unexpected)
item fails additionalProperties


In [19]:
try:
    user = User()
except ValidationError as e:
    print('message:', e.message)
    print('{0} fails {1}'.format(e.path[-1] if e.path else 'item', e.validator))

message: 'name' is a required property
item fails required


In [20]:
try:
    user = User(name='ed', email='ed@gmail@com')
except ValidationError as e:
    print('message:', e.message)
    print('{0} fails {1}'.format(e.path[-1] if e.path else 'item', e.validator))

message: 'ed@gmail@com' does not match '(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$)'
email fails pattern


#### How to coordinate with the database?

Pretty difficult. The fact that the schema itself can be traversed means that we can extract constraints on properties, but translating it to SQL check constraint would be manual, hard to see a programmable method.

#### Integrate with WTForms?

Someone appears to be working on WTForms to jsonschema but 'To embed a JSON Schema as a form field' is 'TODO:':
```
class MyForm(forms.Form):
    subfield = JSONSchemaField(schema=schema)
```

Bingo? https://pythonhosted.org/Flask-Inputs/#json-validation or maybe not could be that's using WTForms validators... 

Perhaps custom validators and pull out the relevant part of the schema https://wtforms.readthedocs.io/en/stable/validators.html#custom-validators?

https://medium.com/@doobeh/posting-a-wtform-via-ajax-with-flask-b977782edeee

HTTP form encoding (hence WTForms?) cannot represent nested data https://github.com/requests/requests/issues/2885 so I guess the 'API spec' for HTTPs posts has to be a simple schema.

#### Where could hook a validation for modifying an instance?

decorators https://github.com/mikefromit/flask-jsonschema-example

In [21]:
from sqlalchemy import event

def my_name_set_listener(target, value, oldvalue, initiator):
    print('Have received an set event for User.name', target)
    
event.listen(User.name, 'set', my_name_set_listener)

def my_user_update_listener(mapper, connection, target):
    print('Have received an update event for User')
    
event.listen(User, 'before_update', my_user_update_listener)

In [22]:
user = User(name='ed', email='ed@gmail.com')

Have received an set event for User.name <User(name='None', email='ed@gmail.com')>


In [23]:
session.add(user)

In [24]:
session.flush()

In [25]:
session.commit()

In [26]:
a_user = session.query(User).first()

In [27]:
a_user

<User(name='ed', email='ed@gmail.com')>

In [28]:
a_user.name = 'foo'

Have received an set event for User.name <User(name='ed', email='ed@gmail.com')>


session.commit()

In [29]:
a_user = session.query(User).first()

Have received an update event for User


In [30]:
a_user

<User(name='foo', email='ed@gmail.com')>

In [31]:
# Fields that must be present on init
# Intercept kwargs in __init__ and apply the following validator.
user_model_instantiate_schema = {
    "type" : "object",
    "properties" : {
        "username" : {
            "type" : "string",
            "minlength": 1,
            "maxlength": 64,
        },
        "email" : {
            "type" : "string",
            "pattern": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
        },
        "password" : {
            "type" : "string",
            "pattern": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
        },
        "about_me" : {
            "type": "string",
            "maxlength": 140,
        }
    },
    "additionalProperties": False,  # don't allow anything else
    "required": ["username", "email", "password"],  # require these
}


The update function is a bit more complicated.

We could set listeners for attribute set events on the model
http://docs.sqlalchemy.org/en/latest/orm/events.html#sqlalchemy.orm.events.AttributeEvents.set                              
but that would involve setting up a listener for each field mentioned in the schema.  They could all call the same validation routine.                                                                                                                    
                                                                                                                             
It does not seem that there is a way to listen for a change in __any__ attribute, or any in attribute in a list.                                                                                                                                                                                                               

However the update function is usually closely followed by flush so it would be simpler to have just one listener for that http://docs.sqlalchemy.org/en/latest/orm/events.html#sqlalchemy.orm.events.MapperEvents.before_update this could be put into a package level validate routine (operating on instance.to_dict, schema) method that could be triggered manually as well.                                                                                                                    
                                                                                                                            
But what would we pass to the jsonschema.validate method?  I guess this needs to be the serialized instance, but not of all the fields since some (password_hash) are not appropriate. Hmm. I guess the validate change needs to be applied as in the ```__init__``` e.g. ```update(self, *args, **kwargs)``` and the user should use that to make a __validated__ change. This would avoid listeners entirely, which seems like a good thing.

In [32]:
user_model_update_schema = {  # password_hash is not                                                                          
    "type" : "object",
    "properties" : {
        "email" : {
            "type" : "string",
            "pattern": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
        },
        "password" : {
            "type" : "string",
            "pattern": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
        },
        "about_me" : {
            "type": "string",
            "maxlength": 140,
        },
        "last_seen" : {
            "type": "string",
            # matches a datetime pattern                                                                                      
        },
        "token": {
            "type": "string",
            # matches some encoded length datetime pattern                                                                    
        },
        "token_expiration" : {
            "type": "string",
            # matches a datetime pattern                                                                                      
        },
    },
    "additionalProperties": False,
    "required": ["username"],
}

Actually all this overlooks quite a bit problem which is that jsonschema validates json so anything non-json-serializable like a date cannot be passed to a class...

In [77]:
from datetime import datetime, timezone
# A sample schema, like what we'd get from json.load()
date_schema = {
     "type" : "object",
     "properties" : {
         "date" : {"type" : "string"},
     },
}
data = {
    'date': datetime.utcnow()
}
try:
    validate(data, date_schema, cls=Validator)
except ValidationError as e:
    print(str(e))

datetime.datetime(2018, 8, 13, 11, 41, 52, 178180) is not of type 'string'

Failed validating 'type' in schema['properties']['date']:
    {'type': 'string'}

On instance['date']:
    datetime.datetime(2018, 8, 13, 11, 41, 52, 178180)


Seems jsonschema can be asked to support additional types but this is limited to types that can be mapped to JSON easily e.g. integer, float -> "number" https://python-jsonschema.readthedocs.io/en/v2.6.0/validate/#validating-types 

Well, datetime is maybe the most problematic one. At the end of the day all the SQLA models represent things that go into a database so all the scalar items need to be basic, if not primitive string, number etc.

Working with JSON in python https://realpython.com/python-json/

In [67]:
import json
class MyEncoder(json.JSONEncoder):
    def default(self, z):
        if isinstance(z, datetime):
            return z.replace(tzinfo=timezone.utc).isoformat()
        else:
            super().default(self, z)

In [68]:
# then
json.loads(json.dumps(data, cls=MyEncoder))

{'date': '2018-08-13T12:38:54.072321+00:00', 'email': 'joegmail.com'}

In [76]:
# and works
date_schema = {
     "type" : "object",
     "properties" : {
         "date" : {
             "type" : "string",
             "pattern": r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d{6}[+-]\d\d:\d\d$"
         },
     },
}
data = {
    'date': datetime.utcnow()
}
serializable_data = json.loads(json.dumps(data, cls=MyEncoder))
print(serializable_data)
try:
    validate(serializable_data, date_schema, cls=Validator)
except ValidateError as e:
    print(str(e))

{'date': '2018-08-13T11:41:39.659715+00:00'}


In [73]:
# or there is a cool format defined here
# https://json-schema.org/understanding-json-schema/reference/string.html#format
# BUT see warning
from jsonschema import FormatChecker
date_schema = {
     "type" : "object",
     "properties" : {
         "date" : {
             "type" : "string",
             "format": "date-time"
         },
         "email" : {
             "type" : "string",
             "format": "email"
         },
     },
}
data = {
    'date': datetime.utcnow(),
    'email': 'joegmail.com'
}
serializable_data = json.loads(json.dumps(data, cls=MyEncoder))
print(serializable_data)
try:
    validate(serializable_data, date_schema, cls=Validator,
            format_checker=FormatChecker())  # warning MUST specify this or will pass silently
except ValidationError as e:
    print(str(e))

{'email': 'joegmail.com', 'date': '2018-08-13T11:41:19.787115+00:00'}
'joegmail.com' is not a 'email'

Failed validating 'format' in schema['properties']['email']:
    {'format': 'email', 'type': 'string'}

On instance['email']:
    'joegmail.com'


In [78]:
# Also, point here would be to ensure a standard format throughout the app
# not necessarily the one decided by JSON or jsonschema.