# cmip6 schema

* https://json-schema.org/
* https://github.com/python-jsonschema/jsonschema

In [1]:
from jsonschema import validate
from jsonschema import Draft202012Validator

In [2]:
AGGREGATION_LEVELS = ["FILE", "DATASET"]
CHECKSUM_METHODS = ["MD5", "SHA256"]

schema = {
     "$schema": "https://json-schema.org/draft/2020-12/schema",
     "title": "CMIP6",
     "description": "A handle record schema for CMIP6.",
     "type" : "object",
     "properties" : {
         "URL" : {
             "description": "URL of the landing page", 
             "type" : "string",
             "format": "uri",
         },
         "FIXED_CONTENT": {"type": "boolean"},
         "AGGREGATION_LEVEL" : {"enum": AGGREGATION_LEVELS},
     },
     "required": [ "URL", 'AGGREGATION_LEVEL' ],
     "if": {
        "properties": {
          "AGGREGATION_LEVEL": { "const": "FILE" }
        },
      },
      "then": {
        "properties": {
          "IS_PART_OF" : {
             "type" : "string",
           },
            "FILE_NAME": {
                "type": "string",
                "minLength": 1,
                "maxLength": 200,
            },
            "FILE_SIZE": {
                "type": "integer",
                "minimum": 0,
            },
            "FILE_VERSION": {
                "type": "integer",
                "minimum": 0,
            },
            "CHECKSUM": {"type": "string"},
            "CHECKSUM_METHOD": {"enum": CHECKSUM_METHODS},
        },
        "required": [ "FILE_NAME" ],
      },
      "else": {
        "properties": {
          "DRS_ID": {
            "type": "string",
          },
          "VERSION_NUMBER": {
            "type": "string",
          },
          "HAS_PARTS": {
            "type": "string",
          },
        },
        "required": [ "DRS_ID" ],
      },
}

## validate file

In [3]:
file_data={
    "URL": "http://test",
    "AGGREGATION_LEVEL": "FILE",
    "IS_PART_OF": "blas",
    "FILE_NAME": "temperature.nc",
    "FILE_SIZE": 10,
    "FILE_VERSION": 1,
    "CHECKSUM": "abc123",
    "CHECKSUM_METHOD": "MD5",
}

In [4]:
validate(file_data, schema=schema, format_checker=Draft202012Validator.FORMAT_CHECKER)

## validate dataset

In [5]:
ds_data={
    "URL": "http://test",
    "AGGREGATION_LEVEL": "DATASET",
    "DRS_ID": "cmip6.tas.123",
    "HAS_PARTS": "one;two;three",
}

In [6]:
validate(ds_data, schema=schema, format_checker=Draft202012Validator.FORMAT_CHECKER)