In [2]:
#!pip install -e .. --upgrade

In [3]:
from pyjsg.validate_json import JSGPython

# JSG Syntax
The names of the various components defined in [Introducing JSON](https://json.org/) are referenced in ***bold italics*** in the document below.  Example: A member definition defines the ***string***/***value*** pairs that may appear as ***members*** in an ***object*** definition.

A JSG Syntax definition consists of the following components:
1. [Type](#type) and/or [Ignore](#ignore) directives.  The `.TYPE` directive identifies the ***string*** (if any) that identifies the JSG "type" of an ***object***.  It also names any JSG objects that do not have a `.TYPE` identifier.  The `.IGNORE` directive identifies a set of ***strings*** that may appear in any JSON object.
2. [Grammar rules](#grammar).  These define valid (conformant) JSON ***objects*** and ***arrays***.
3. Lexer rules.  Lexer rules define the regular expressions that can be used to constrain the range of a JSON ***value***.

<hr/>

## Directives
<a id='type'></a>
### The `.TYPE` directive:

1. Names a unique property that identifies the JSG object being represented
2. (Optional) lists one or more production types that do not use the `.TYPE` discriminator

### Syntax
`.TYPE <type> [ - <type> [<type>...]] ;`

### No type directive

In [20]:
# A conforming document must have an element named "a" that can have any value. 
# The trailing comma indicates that any other elements are allowed
jsg = 'doc { a:., }'

p1 = '{"a":"hello"}'
p2 = '{"a":17.2, "b": true}'
f1 = '{"b":"test"}'

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(p2, 'p2'))
print(x.conforms(f1, 'f1'))

p1: Conforms to doc
p2: Conforms to doc
f1: FAIL - doc: Missing required field: 'a'


### Simple type directive
`TYPE <type> ;`

In [30]:
# The JSON element name 't' determines the type of the object.
# "t": "doc" says that it is a doc object, "t": "id" an id object
jsg = '''
.TYPE t ;
doc {a:.,}
id {b: @int}
'''

# P1 is a 'doc', with element 'a' referencing an object of type 'id'
p1 = '''
{"t": "doc", 
 "a": {
     "t":"id",
     "b": 173
  }
}'''

f1 = '{"t": "id", "b":"text"}'           # Fails because "b" isn't an integer
f2 = '{"t": "doc", "b": 173}'            # Fails because no "a" element
f3 = '{"t": "missing", "val": -143}'     # Fails because no "missing" JSG object definition

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(f1, 'f1'))
print(x.conforms(f2, 'f2'))
print(x.conforms(f3, 'f3'))

p1: Conforms to doc
f1: FAIL - Wrong type for b: 'text' - expected: <class 'pyjsg.jsglib.jsg_strings.Integer'> got str
f2: FAIL - doc: Missing required field: 'a'
f3: FAIL - Unknown reference type: "t": "missing"


### Type directive with one exception
`.TYPE <type> - <type>;`

In [31]:
# Everything has a type identifier of "t" except the 'id' object
jsg = '''
.TYPE t - id ;

doc {a:.,}
id  {b: @int}
val {t: @number}
'''

p1 = '{"t": "doc", "a": "Fred"}'    # Explicit type identifier
p2 = '{"b": 140}'                   # Matches id exception
p3 = '{"t": 42}'                    # Impossible to instantiate "val" object
f1 = '{"t": "val"}'                 # Impossible to instantiate "val" object
f2 = '{"a": "Fred"}'                # Type required
f3 = '{"t": "ele"}'                 # Type references an unknown object

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(p2, 'p2'))
print(x.conforms(p3, 'p3'))
print(x.conforms(f1, 'f1'))
print(x.conforms(f2, 'f2'))
print(x.conforms(f2, 'f3'))

p1: Conforms to doc
p2: Conforms to id
p3: FAIL - Unknown reference type: "t": "42"
f1: FAIL - Wrong type for t: 'val' - expected: <class 'pyjsg.jsglib.jsg_strings.Number'> got str
f2: FAIL - Missing "t" element
f3: FAIL - Missing "t" element


### Type directive with two exceptions
`.TYPE <type> - <type> <type>;`

In [32]:
# Same as the above except 'val' doesn't need an id as well
# Note that "t" references a number rather than a type in the 'f2' case
jsg = '''
.TYPE t - id val;

doc {a:.,}
id  {b: @int}
val {t: @number}
'''

x = JSGPython(jsg)
print(x.conforms(p3, 'p3'))         # P3 now words as expected

p3: Conforms to val


<a id='ignore'></a>
### The `.IGNORE` directive
The `.IGNORE` directive identifies a list of property names to be globally ignored

#### Syntax
`.IGNORE <type> [<type> ...] ;`

### No ignore directive

In [33]:
# Exactly one element, "a", of type string
jsg = 'doc {a:@string}'

x = JSGPython(jsg)
print(x.conforms('{"a":"hello"}'))
print(x.conforms('{"a":"hello", "target":"earthling"}'))   # Additional element

Conforms to doc
FAIL - Unknown attribute: target=earthling


### With ignore directive

In [34]:
# Same definition except 'target' is ignored
jsg = '''
.IGNORE target;
doc {a:@string}
'''
x = JSGPython(jsg)
print(x.conforms('{"a":"hello"}'))
print(x.conforms('{"a":"hello", "target":"earthling"}'))
print(x.conforms('{"a":"hello", "target":"earthling", "mode": "formal"}'))

Conforms to doc
Conforms to doc
FAIL - Unknown attribute: mode=formal


<hr/>
<a id='grammar'></a>
## Grammar Rules

<a id='objectdef'></a>
### Object Definitions
A JSG object definition consists of the definition name followed by the definition enclosed in curly braces ({ ... })

#### Syntax
`<identifier> : { [<element definitions>] }` 

Where identifier must either be a single, upper case character or, if there are two or more characters, at least one of them must be lower case alphabetic.

* valid identifiers:  `A`,  `Type`,  `type`, `a17`, `a_abc`, `DOCx`
* invalid identifiers: `AA`, `A17`, `DOCX`, `A_`

### Simple object definition
An object definition with no element definitions describes an empty object

In [35]:
jsg = 'mtdoc {}'          # The only thing that passes is an empty object

x = JSGPython(jsg)
print(x.conforms('{}', 'empty object'))
print(x.conforms('{"name":"Fred"}', 'non-empty object'))

# .IGNORE elements can always appear - even in empty documents
x = JSGPython('.IGNORE name; mtdoc {}')
print(x.conforms('{}', 'empty object'))
print(x.conforms('{"name":"Fred"}', 'object with ignored name'))
print(x.conforms('{"location":"Spain"}', 'object w/o ignored name'))

empty object: Conforms to mtdoc
non-empty object: FAIL - Unknown attribute: name=Fred
empty object: Conforms to mtdoc
object with ignored name: Conforms to mtdoc
object w/o ignored name: FAIL - Unknown attribute: location=Spain


A trailing comma in an object definition indicates that the definition is "open", meaning that additional elements are allowed

In [36]:
jsg = 'anydoc {,}'                # Trailing comma means anything is allowed
jsg2 = 'doc {"name": @string, }'  # Object must have a name element, but anything else is still allowed

x = JSGPython(jsg)
print(x.conforms('{}', "empty document"))
print(x.conforms('{"a": 17, "b": true, "c" : {"x": -12, "y": 143}}', "complex JSON"))
x = JSGPython(jsg2)
print(x.conforms('{}', "empty document"))
print(x.conforms('{"name": "Fred", "attitude": ["bad", "worse", "off the scale"]}', "with name"))

empty document: Conforms to anydoc
complex JSON: Conforms to anydoc
empty document: FAIL - doc: Missing required field: 'name'
with name: Conforms to doc


### Element definitions
An **`element definition`** consist of either:

 1. a list of one or more `[member definitions](#memberdef)` or ...
 2. ... a `mapping definition`

<a id='memberdef'></a>
### member definition
A **`member definition`** identifies the name (string), type, possible values and cardinality for a JSON object member.

#### Syntax
`<name> : <valueType> [<cardinality>]`<br/>
&nbsp;&nbsp;&nbsp;&nbsp;
or<br/>
`(<name> <name> ...) : <valuetype> [<cardinality>]`<br/>
&nbsp;&nbsp;&nbsp;&nbsp;or<br/>
`<objectId>`<br/>


**`<name> : <valueType> [<cardinality]`** format:<br/>
* `<name>` - can be any valid JSON *string* as defined by the ECMA-404 standard<br/>
* `<valuetype>` and `<cardinality>` are defined later in this document<br/>

#### Example
The following example defines four elements:

1) A required string, '`last_name`'

2) A non-empty list of required strings, '`first name`' (note that this identifier is a valid JSON string but NOT a valid python identifier)

3) An optional integer, '`age`'

4) A (possibly empty) list of numbers named '`weight`'

In [37]:
jsg = '''
doc {
    last_name : @string,       # exactly one last name of type string
    "first name" : @string+    # array or one or more first names
    age : @int?,               # optional age of type int
    weight : @number*          # required array of zero or more weights
}
'''
# last_name, first name and empty weight array passes
p1 = '''
{ "last_name" : "snooter",
  "first name" : ["grunt", "peter"],
  "weight" : []
}'''
# First name is missing
f1 = '''
{ "last_name" : "snooter", "weight" : []
}'''

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(f1, 'f1'))

p1: Conforms to doc
f1: FAIL - doc: Missing required field: 'first name'


Note that the `weight` array above is required, even if empty.  An optional array is defined as:

In [53]:
jsg = '''
doc {
    weight : @number*           # required array of zero or more weights
    weight2 : [@number]         # required array of zero or more weights (alternative format)
    heights : [@number]?        # optional array of zero or more heights
    heights2 : [@number+]?      # optional array or one or more heights
}
'''
p1 = '{"weight": [], "weight2": [], "heights": [], "heights2": [1]}'   # Everything present
p2 = '{"weight": [], "weight2": []}'                                   # Heights arrays missing
f1 = '{"heights": []}'                                                 # Weights arrays missing
f2 = '{"weight": [], "weight2": [], "heights2": []}'                    # Empty heights2 array

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(p2, 'p2'))
print(x.conforms(f1, 'f1'))
print(x.conforms(f2, 'f2'))

p1: Conforms to doc
p2: Conforms to doc
f1: FAIL - doc: Missing required field: 'weight'
doc: Missing required field: 'weight2'
f2: FAIL - Wrong type for heights2: [] - expected: typing.Union[pyjsg.jsglib.jsg_array.heights2, NoneType] got list


<a id='memberdeflist'></a>
### member definition as a list
A shorthand format exists to allow multiple members of the same type to be defined together.

#### Syntax
`(<name> <name> ...) : <valuetype> [<cardinality>]`<br/>


In [57]:
x = JSGPython('doc { (t b l r) : @int}')   # 4 required entries of type integer
print(x.conforms('{"t": 17, "b": -142, "l": 12048, "r": 0}'))


Conforms to doc


### **`<valueType>`**:

valueType can be:

1) [A built in value type](#builtin types):
* @string   - a JSON string
* @number   - a JSON number
* @int      - a JSON number with no decimal value
* @bool     - `true` or `false`
* @null     - the JSON null value
* @array    - any JSON array (`[ ... ]`)
* @object   - any JSON object (`{ ... }`)

2) [The identifier of a pattern defined in the lexer section](#lexer id)

3) [A JSON string enclosed in quotes](#string types)

4) [A JSG object definition of the form](#embedded object): `{ ... }`

5) A JSG array definition of the form: `[ ... ]`

6) A dot ('.') indicating any value

7) One or more type choices enclosed in parenthesis: `(<type> [| <type]...)`

<a id='builtin types'></a>
### Built In  Types

In [13]:
x = JSGPython(
'''
doc {
    v1: @string,
    v2: @number,
    v3: @int,
    v4: @bool,
    v5: @null,
    v6: @array,
    v7: @object 
}

obj {a: . , }
''')

print(x.conforms('''
{ "v1": "This is text!",
  "v2": -117.432e+2,
  "v3": -100173,
  "v4": false,
  "v5": null,
  "v6": [12, "text", null],
  "v7": {"q": "life", "a": 42}
}'''))

doc = getattr(x.module, 'doc')          # Reference to the class named 'doc'
d1 = doc()                              # Instance

try:
    d1.v1 = True
except ValueError as e:
    print(str(e))
try:
    d2 = doc(v4=17)
except ValueError as e:
    print(str(e))

Conforms to doc
Wrong type for v1: True - expected: <class 'pyjsg.jsglib.jsg_strings.String'> got bool
Wrong type for v4: 17 - expected: <class 'pyjsg.jsglib.jsg_strings.Boolean'> got int


<a id='lexer id'></a>
### Lexer Patterns

Lexer patterns can be used to constrain the possible values for strings, numbers, integers or boolean types.

Lexer patterns follow the JSG type definitions and begin with:
`@terminals` followed by a list of lexer patterns.

A lexer pattern takes the form:
`<LEXER_ID> : (pattern) ;`

Where <LEXER_ID> must be at least two characters in length, where the first is a capital letter and the succeeding characters are capital letters, numbers or an underscore (_).

Valid Lexer ID's:

'`AA`', '`A01B`', '`A_LEXER\_ID\_`', '`INT`'

Invalid Lexer ID's
* '`1A`'     -- starts with a digit
* '`A`'      -- single character
* '`A_LeXER_ID`' -- lower case 'e'
* '`A-ID`'   -- illegal character

Examples:

In [58]:
jsg = """
doc {a: POSINT,
     b: TRUE?
     c: ID}
     
@terminals
POSINT: [1-9][0-9]* @number ;
TRUE: [Tt] 'rue' @bool ;
ID: [A-Z][A-Z0-9_]+ ;
"""

x = JSGPython(jsg)
print(x.conforms('{"a": 1, "b": true, "c": "AN_ID1"}'))
print(x.conforms('{"a": 0, "b": false, "c": "An_ID1"}'))
print(x.conforms('{"a": 1, "b": false, "c": "An_ID1"}'))
print(x.conforms('{"a": 1, "c": "An_ID1"}'))

Conforms to doc
FAIL - Wrong type for a: 0 - expected: <class 'JSGPython.POSINT'> got int
FAIL - Wrong type for b: False - expected: typing.Union[JSGPython.TRUE, NoneType] got bool
FAIL - Wrong type for c: 'An_ID1' - expected: <class 'JSGPython.ID'> got str


We will go into more depth on lexer patterns later in this document.  For the time being, 
however, what we have serves to show the second form of a value type definition.

<a id='string types'></a>
### String Types
The third form of a `<valueType>` definition is in the form of a JSON string, and identifies a fixed value.

##### Examples:

In [59]:
# A valid document consists of:
#  1) Exactly one element "a" that just have the value "Fred"
#  2) An optional element, "b", that, if present, must be "Penguins"
#  3) A list of zero of more elements, "c", that must all be "Albatross"
jsg = """
doc {a: "Fred",
     b: "Penguins"?,
     c: "Albatross"*
}"""
p1 = '{"a": "Fred", "c": ["Albatross", "Albatross"]}'
f1 = '{"a": "John"}'
f2 = '{"a": "Fred", "b": "Penguins", "c": ["Albatross", "Aardvark"]}'

x = JSGPython(jsg)
print(x.conforms(p1, 'p1'))
print(x.conforms(f1, 'f1'))
print(x.conforms(f2, 'f2'))


p1: Conforms to doc
f1: FAIL - Wrong type for a: 'John' - expected: <class 'JSGPython._Anon1'> got str
f2: FAIL - Wrong type for c: ['Albatross', 'Aardvark'] - expected: <class 'pyjsg.jsglib.jsg_array.c'> got list


<a id='embedded object'></a>
### Embedded Object Definitions
Object definitions can be nested, which leads to the fourth form of `<valueType>`


In [16]:
# A document that consists of zero or more entries, each of which has an 8 digit 'id',
# a name consisting of a first, last and zero or more middle initials
# and one or more addresses
jsg = """
directory {
    entries: {
        id: ID,
        name: {
            first: @string,
            middle: [@string]?,
            last: @string
        },
        address: {
            city: @string,
            state: @string,
            zip: @int?
        }+
    }*
}

@terminals
ID: [1-9][0-9]{7} @int;
"""


p1 = '''
{
"entries": [
    {"id": 11725433,
      "name": {
          "first": "Sam",
          "last": "Sneed"
      },
      "address": [
        {
          "city": "Southwark",
          "state": "Bliss"
        }
      ]
    },
    {"id": 10000001,
      "name": {
          "first": "Julie",
          "middle": ["Mary", "Elizabeth"],
          "last": "Sneed"
      },
      "address": [
        {
          "city": "Southwark",
          "state": "Agony"
        }
      ]
    }
]
}  
'''

x = JSGPython(jsg, print_python=False)
print(x.conforms(p1, 'p1'))


p1: Conforms to directory


<a id='embedded array'></a>
### Embedded Array Definition
A `<valueType>` can be take the form of an embedded array definition.

#### Example

In [19]:
# A sequence document consists of list of sequences
jsg = '''
doc {
    sequences: [(RNASEQ|DNASEQ)]
}

@terminals
RNASEQ: [ACGU]+ ;
DNASEQ: [ACGT]+ ;
'''

p1 = '''
{ "sequences": [
    "GCUACGGAGCUUGGAGCUAG",
    "ATTTTGCGAGGTCCC"
   ]
}'''

x = JSGPython(jsg, print_python=False)
print(x.conforms(p1, 'P1'))

P1: Conforms to doc
