Permalink
Browse files

Adding a JSON validator (for use in future status messages).

git-svn-id: http://google-mobwrite.googlecode.com/svn/trunk@96 6a03f37a-134a-0410-852f-bbb3195577bb
  • Loading branch information...
1 parent 428f850 commit 3d3aab509486a5e2228e67086f549032d471cdf4 @NeilFraser NeilFraser committed Dec 8, 2009
Showing with 361 additions and 0 deletions.
  1. +256 −0 lib/json_validator.py
  2. +105 −0 lib/json_validator_test.py
View
@@ -0,0 +1,256 @@
+"""JSON Validator
+
+Copyright 2009 Google Inc.
+http://code.google.com/p/google-mobwrite/
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+__author__ = 'fraser@google.com (Neil Fraser)'
+
+# States
+ERROR = -1
+GROUND = 0
+STRING = 1
+STRINGSLASH = 2
+UNICODE1 = 3
+UNICODE2 = 4
+UNICODE3 = 5
+UNICODE4 = 6
+TRUE1 = 7
+TRUE2 = 8
+TRUE3 = 9
+FALSE1 = 10
+FALSE2 = 11
+FALSE3 = 12
+FALSE4 = 13
+NULL1 = 14
+NULL2 = 15
+NULL3 = 16
+NEGATIVE = 17
+ZERO = 18
+NUMBER = 19
+DECIMALBAD = 20
+DECIMALOK = 21
+EXPONENT1 = 22
+EXPONENT2 = 23
+EXPONENT3 = 24
+
+# Tokens
+OBJECTSTART = 1
+OBJECTEND = 2
+ARRAYSTART = 3
+ARRAYEND = 4
+COLON = 5
+COMMA = 6
+STRVALUE = 7
+VALUE = 8 # true, false, null, number
+
+# Transformations
+TRANSFORMATIONS = {}
+def _add_rule(startState, characters, endState, token):
+ """Add a rule to the transformations map.
+
+ Args:
+ startState: This rule only applies if the parser is in this state.
+ characters: This rule only applies if the current character is one of these.
+ endState: When applied, this rule changes the state this.
+ token: When applied, this rule adds this token to the stack.
+ """
+ # None is treated as a wildcard character.
+ if characters == None:
+ TRANSFORMATIONS[(startState, None)] = (endState, token)
+ else:
+ # Create a rule for every character.
+ for char in characters:
+ TRANSFORMATIONS[(startState, char)] = (endState, token)
+
+_add_rule(GROUND, " \r\n", GROUND, None)
+_add_rule(GROUND, "[", GROUND, ARRAYSTART)
+_add_rule(GROUND, "]", GROUND, ARRAYEND)
+_add_rule(GROUND, "{", GROUND, OBJECTSTART)
+_add_rule(GROUND, "}", GROUND, OBJECTEND)
+_add_rule(GROUND, ",", GROUND, COMMA)
+_add_rule(GROUND, ":", GROUND, COLON)
+_add_rule(GROUND, "\"", STRING, None)
+_add_rule(STRING, "\"", GROUND, STRVALUE)
+_add_rule(STRING, "\\", STRINGSLASH, None)
+_add_rule(STRINGSLASH, "\"\\/bfnrt", STRING, None)
+_add_rule(STRINGSLASH, "u", UNICODE1, None)
+_add_rule(UNICODE1, "0123456789abcdefABCDEF", UNICODE2, None)
+_add_rule(UNICODE2, "0123456789abcdefABCDEF", UNICODE3, None)
+_add_rule(UNICODE3, "0123456789abcdefABCDEF", UNICODE4, None)
+_add_rule(UNICODE4, "0123456789abcdefABCDEF", STRING, None)
+_add_rule(STRING, "\b\f\n\r", ERROR, None)
+_add_rule(STRING, None, STRING, None)
+_add_rule(GROUND, "t", TRUE1, None)
+_add_rule(TRUE1, "r", TRUE2, None)
+_add_rule(TRUE2, "u", TRUE3, None)
+_add_rule(TRUE3, "e", GROUND, VALUE)
+_add_rule(GROUND, "f", FALSE1, None)
+_add_rule(FALSE1, "a", FALSE2, None)
+_add_rule(FALSE2, "l", FALSE3, None)
+_add_rule(FALSE3, "s", FALSE4, None)
+_add_rule(FALSE4, "e", GROUND, VALUE)
+_add_rule(GROUND, "n", NULL1, None)
+_add_rule(NULL1, "u", NULL2, None)
+_add_rule(NULL2, "l", NULL3, None)
+_add_rule(NULL3, "l", GROUND, VALUE)
+_add_rule(GROUND, "-", NEGATIVE, None)
+_add_rule(GROUND, "0", ZERO, VALUE)
+_add_rule(GROUND, "123456789", NUMBER, VALUE)
+_add_rule(NEGATIVE, "0", NUMBER, VALUE)
+_add_rule(NEGATIVE, "123456789", NUMBER, VALUE)
+_add_rule(NUMBER, "0123456789", NUMBER, None)
+_add_rule(NUMBER, ".", DECIMALBAD, None)
+_add_rule(ZERO, ".", DECIMALBAD, None)
+_add_rule(DECIMALBAD, "0123456789", DECIMALOK, None)
+_add_rule(DECIMALOK, "0123456789", DECIMALOK, None)
+_add_rule(NUMBER, "eE", EXPONENT1, None)
+_add_rule(ZERO, "eE", EXPONENT1, None)
+_add_rule(DECIMALOK, "eE", EXPONENT1, None)
+_add_rule(EXPONENT1, "+-", EXPONENT2, None)
+_add_rule(EXPONENT1, "0123456789", EXPONENT3, None)
+_add_rule(EXPONENT2, "0123456789", EXPONENT3, None)
+_add_rule(EXPONENT3, "0123456789", EXPONENT3, None)
+_add_rule(EXPONENT3, " \r\n", GROUND, None)
+_add_rule(EXPONENT3, ",", GROUND, COMMA)
+_add_rule(EXPONENT3, ":", GROUND, COLON)
+_add_rule(EXPONENT3, "]", GROUND, ARRAYEND)
+_add_rule(EXPONENT3, "}", GROUND, OBJECTEND)
+_add_rule(DECIMALOK, " \r\n", GROUND, None)
+_add_rule(DECIMALOK, ",", GROUND, COMMA)
+_add_rule(DECIMALOK, ":", GROUND, COLON)
+_add_rule(DECIMALOK, "]", GROUND, ARRAYEND)
+_add_rule(DECIMALOK, "}", GROUND, OBJECTEND)
+_add_rule(NUMBER, " \r\n", GROUND, None)
+_add_rule(NUMBER, ",", GROUND, COMMA)
+_add_rule(NUMBER, ":", GROUND, COLON)
+_add_rule(NUMBER, "]", GROUND, ARRAYEND)
+_add_rule(NUMBER, "}", GROUND, OBJECTEND)
+_add_rule(ZERO, " \r\n", GROUND, None)
+_add_rule(ZERO, ",", GROUND, COMMA)
+_add_rule(ZERO, ":", GROUND, COLON)
+_add_rule(ZERO, "]", GROUND, ARRAYEND)
+_add_rule(ZERO, "}", GROUND, OBJECTEND)
+
+# List of states which are acceptable to end in.
+EXITSTATES = (GROUND, NUMBER, ZERO, EXPONENT3)
+
+def is_valid(string):
+ """Returns true if the string is valid syntax for a JSON array or object.
+
+ Args:
+ string: JSON string to check.
+
+ Returns:
+ True iff JSON string is valid.
+ """
+ state = GROUND
+ tokens = []
+ for char in string:
+ # Transform from this state to the next state.
+ next = TRANSFORMATIONS.get((state, char))
+ if next == None:
+ # No matching character, check for a wildcard match.
+ next = TRANSFORMATIONS.get((state, None))
+ if next == None:
+ return False
+ (state, token) = next
+ if token != None:
+ tokens.append(token)
+ if not state in EXITSTATES:
+ # A half-defined value.
+ return False
+ if not tokens or (tokens[0] != ARRAYSTART and tokens[0] != OBJECTSTART):
+ # Root value must be array or object.
+ return False
+ if not _pop_value(tokens):
+ # Not a value.
+ return False
+ if tokens:
+ # Leftover tokens beyond first value.
+ return False
+ return True
+
+def _pop_value(tokens):
+ """Do the provided JSON tokens form a value? Starting from the end, pop
+ tokens off the list as they are used. Unused tokens remain on the list.
+ This function is recursive.
+
+ Args:
+ tokens: List of JSON tokens.
+
+ Returns:
+ True iff JSON value is found.
+ """
+ if not tokens:
+ # Empty
+ return False
+ # Work backwards since t.pop() is much more efficent than del t[0].
+ token = tokens.pop()
+
+ if token == VALUE or token == STRVALUE:
+ return True
+
+ if token == ARRAYEND:
+ has_value = False
+ while tokens:
+ if tokens[-1] == ARRAYSTART:
+ tokens.pop()
+ return True
+ if has_value:
+ if tokens[-1] != COMMA:
+ # Values not comma separated.
+ return False
+ tokens.pop()
+ if not _pop_value(tokens):
+ # Array contains non-value.
+ return False
+ has_value = True
+ # Ran out of tokens looking for "["
+ return False
+
+ if token == OBJECTEND:
+ has_value = False
+ while tokens:
+ if tokens[-1] == OBJECTSTART:
+ tokens.pop()
+ return True
+ if has_value:
+ if tokens[-1] != COMMA:
+ # Pairs not comma separated.
+ return False
+ tokens.pop()
+ if not _pop_value(tokens):
+ # Object contains non-value.
+ return False
+ has_value = True
+ if not tokens:
+ break
+ if tokens[-1] != COLON:
+ # Name:value not colon separated.
+ return False
+ tokens.pop()
+ if not tokens:
+ break
+ if tokens[-1] != STRVALUE:
+ # Object property not a string.
+ return False
+ tokens.pop()
+ # Ran out of tokens looking for "{"
+ return False
+
+ # Must be a comma or colon.
+ return False
+
View
@@ -0,0 +1,105 @@
+#!/usr/bin/python2.4
+
+"""Test harness for json_validator.py
+
+Copyright 2009 Google Inc.
+http://code.google.com/p/google-mobwrite/
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+__author__ = 'fraser@google.com (Neil Fraser)'
+
+import unittest
+import json_validator
+# Force a module reload to make debugging easier (at least in PythonWin).
+reload(json_validator)
+
+class JsonValidatorTest(unittest.TestCase):
+
+ def assertValid(self, json):
+ self.assertTrue(json_validator.is_valid(json))
+
+ def assertInvalid(self, json):
+ self.assertFalse(json_validator.is_valid(json))
+
+ def testJsonValid(self):
+ # Valid expressions.
+ self.assertValid('["\\u1abc"]')
+ self.assertValid('[""]')
+ self.assertValid('["\\""]')
+ self.assertValid('[123]')
+ self.assertValid('[]')
+ self.assertValid('[0]')
+ self.assertValid('[0e0]')
+ self.assertValid('[0, -1, 1.2, -3.4, 5e+6, 7.8E-90]')
+ self.assertValid('[true, false, null]')
+ self.assertValid('{}')
+ self.assertValid('{"foo":"bar"}')
+ self.assertValid('{"1":"one", "2":["deux", "zwei"], "3":null}')
+
+ def testJsonInvalid(self):
+ # Invalid expressions.
+ self.assertInvalid('')
+ self.assertInvalid(' ')
+ self.assertInvalid('1')
+ self.assertInvalid('1.2')
+ self.assertInvalid('"Hi"')
+ self.assertInvalid('true')
+ self.assertInvalid('[,,,]')
+ self.assertInvalid('{[]}')
+ self.assertInvalid('{"1", "2"}')
+ self.assertInvalid('{"zero"}')
+ self.assertInvalid('{1:"one"}')
+ self.assertInvalid('{null:[]}')
+ self.assertInvalid('{true:"true"}')
+ self.assertInvalid('{[false]:"false"}')
+ self.assertInvalid('{{}:"object"}')
+ self.assertInvalid('["]')
+ self.assertInvalid('["\\x"]')
+ self.assertInvalid('["\\u1ab"]')
+ self.assertInvalid('[{]}')
+ self.assertInvalid('[1:2]')
+ self.assertInvalid('[1, 2')
+ self.assertInvalid('1, 2')
+ self.assertInvalid('[007]')
+ self.assertInvalid('[.1]')
+ self.assertInvalid('[document.cookies]')
+ self.assertInvalid('[alert()]')
+ self.assertInvalid('[1+1]')
+ self.assertInvalid('[1;2]')
+
+ def testJsonMultiLine(self):
+ # The JSON example in Wikipedia.
+ self.assertValid("""
+{
+ "firstName": "John",
+ "lastName": "Smith",
+ "address": {
+ "streetAddress": "21 2nd Street",
+ "city": "New York",
+ "state": "NY",
+ "postalCode": 10021
+ },
+ "phoneNumbers": [
+ { "type": "home", "number": "212 555-1234" },
+ { "type": "fax", "number": "646 555-4567" }
+ ],
+ "newSubscription": false,
+ "companyName": null
+ }
+""")
+
+if __name__ == "__main__":
+ unittest.main()
+

0 comments on commit 3d3aab5

Please sign in to comment.