Permalink
Browse files

Tokenizer changes:

* split up the tokenizer regex for readability
* ignore inter-token whitespace
* allow backslash-escapes in strings
* disallow control characters in strings
* clean up number regex and don't allow . (or the empty string)
* fall-through token-matching case is . leaving the parser to validate it.

Parser changes:
* disallow non-string keys in objects
* detect unrecognized tokens, e.g. @ A $ etc.
* consistent quoting on case patterns
* use ${token:-EOF} in error messages

fixed wrong tests (and removed the wrong-test-generator)
added new tests.
fix test/valid-test.sh to count failures like test/invalid-test.sh
removed unnecessary outlog/errlog files (and add to .gitignore)
  • Loading branch information...
1 parent 65b1ddd commit f6e5cd2d1ef9e4cd226011072e2677575770c073 @medgar123 medgar123 committed Oct 25, 2011
View
@@ -1,3 +1,2 @@
-node_modules
-node_modules/*
-npm_debug.log
+test/errlog
+test/outlog
View
@@ -27,6 +27,6 @@ done
if [ $fail -eq 0 ]; then
echo -n 'SUCCESS '
else
- echo -n 'FAILOUR '
+ echo -n 'FAILURE '
fi
echo $passed / $tests
View
1 errlog
@@ -1 +0,0 @@
-/home/dominic/dev/JSON.sh/test/invalid-test.sh: line 10: ../bin/json_parse: No such file or directory
View
105 parse.sh
@@ -1,93 +1,90 @@
throw () {
- echo $* >&2
+ echo "$*" >&2
exit 1
}
tokenize () {
- egrep -ao '[]|[{}]|:|,|("((\\")|[^"])*")|:|(\-?[0-9]*\.?([0-9]*)?(e?\-?([0-9]*))?)|null|true|false' --color=never
+ local ESCAPE='(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})'
+ local CHAR='[^[:cntrl:]"\\]'
+ local STRING="\"$CHAR*($ESCAPE$CHAR*)*\""
+ local NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?'
+ local KEYWORD='null|false|true'
+ local SPACE='[[:space:]]+'
+ egrep -ao "$STRING|$NUMBER|$KEYWORD|$SPACE|." --color=never |
+ egrep -v "^$SPACE$" # eat whitespace
}
parse_array () {
local index=0
local ary=''
- read token
+ read -r token
while true;
do
- key=$index
case "$token" in
']') break ;;
- *)
- parse_value "$1" "$index"
- let index=$index+1
- ary="$ary""$value"
- read token
- case "$token" in
- ']') break ;;
- ',') ary="$ary", ;;
- *)
- if [ "_$token" = _ ]; then token=EOF; fi
- throw "EXPECTED ] or , GOT $token"
- ;;
- esac
- read token
- ;;
esac
+ parse_value "$1" "$index"
+ let index=$index+1
+ ary="$ary""$value"
+ read -r token
+ case "$token" in
+ ']') break ;;
+ ',') ary="$ary", ;;
+ *) throw "EXPECTED ] or , GOT ${token:-EOF}" ;;
+ esac
+ read -r token
done
value=`printf '[%s]' $ary`
}
parse_object () {
- local go=true
+ local key
local obj=''
- local EXPECT_COMMA=0
- local EXPECT_COLON=0
- read token
- while [ "$go" = true ];
+ read -r token
+ while :
do
case "$token" in
'}') break ;;
- *)
-
- key=$token
- read colon
- if [ "$colon" != ':' ]; then throw "EXPECTED COLON, GOT $colon"; fi
- if [ "_$key" = _ ]; then throw "NULL KEY"; fi
- read token
- parse_value "$1" "$key"
- obj="$obj$key:$value"
-
- read token
- case "$token" in
- '}') break;;
- ,) obj="$obj,"; read token ;;
- *)
- if [ "_$token" = _ ]; then token=EOF; fi
- throw "EXPECTED , or }, but got $token"
- ;;
- esac
- ;;
+ '"'*'"') key=$token ;;
+ *) throw "EXPECTED STRING, GOT ${token:-EOF}" ;;
+ esac
+ read -r token
+ case "$token" in
+ ':') ;;
+ *) throw "EXPECTED COLON, GOT ${token:-EOF}" ;;
+ esac
+ read -r token
+ parse_value "$1" "$key"
+ obj="$obj$key:$value"
+ read -r token
+ case "$token" in
+ '}') break;;
+ ',') obj="$obj,"; read -r token ;;
+ *) throw "EXPECTED , or }, but got ${token:-EOF}" ;;
esac
done
value=`printf '{%s}' "$obj"`
}
parse_value () {
- local jpath
-
- if [ "x$1" = "x" ]; then jpath="$2"; else jpath="$1,$2"; fi
-
+ local jpath="${1:+$1,}$2"
case "$token" in
- {) parse_object "$jpath" ;;
- [) parse_array "$jpath" ;;
- ','|'}'|']') throw "EXPECTED value, GOT $token" ;;
- *) value=$token
- ;;
+ '{') parse_object "$jpath" ;;
+ '[') parse_array "$jpath" ;;
+ # At this point, the only valid single-character tokens are digits.
+ ''|[^0-9]) throw "EXPECTED value, GOT ${token:-EOF}" ;;
+ *) value=$token ;;
esac
printf "[%s]\t%s\n" "$jpath" "$value"
}
parse () {
- read token
+ read -r token
parse_value
+ read -r token
+ case "$token" in
+ '') ;;
+ *) throw "EXPECTED EOF, GOT $token" ;;
+ esac
}
View
@@ -1,10 +0,0 @@
-echo "set -e"
-for input in valid/*.json
-do
-expected=${input%.json}.parsed
-cat $input | ../bin/json_parse > $expected
-echo "# $input"
-echo "diff <(cat $input | ../bin/json_parse) ${input%.json}.parsed"
-echo "echo OK $input"
-
-done
View
@@ -1 +0,0 @@
-EXPECTED , or }, but got EOF
View
@@ -13,21 +13,19 @@ echo PWD=$PWD
fails=0
for input in invalid/*
do
- cat $input | ../bin/json_parse > outlog 2> errlog
- ret=$?
- if [ $ret -eq 0 ]; then
+ if ../bin/json_parse < "$input" > outlog 2> errlog
+ then
echo "NOT OK: cat $input | ../bin/json_parse SHOULD FAIL"
echo "OUTPUT WAS >>>"
cat outlog
echo "<<<"
let fails=$fails+1
- else
- echo "OK: cat $input | ../bin/json_parse failed correctly"
- echo "stderr was >>>"
- cat errlog
- echo "<<<"
+# else
+# echo "OK: cat $input | ../bin/json_parse failed correctly"
+# echo "stderr was >>>"
+# cat errlog
+# echo "<<<"
fi
-
done
echo "$fails test(s) failed"
-exit $fails
+exit $fails
@@ -0,0 +1 @@
+"hello\u20world"
@@ -0,0 +1 @@
+bareword
@@ -0,0 +1 @@
+{[: "bad"}
View
@@ -0,0 +1 @@
+:
@@ -0,0 +1,3 @@
+{
+ "hello": :
+}
@@ -0,0 +1 @@
+"ab"
@@ -0,0 +1 @@
+.
File renamed without changes.
@@ -0,0 +1 @@
+{false: "bad"}
@@ -0,0 +1 @@
+{null: "bad"}
@@ -0,0 +1 @@
+{5: "bad"}
@@ -0,0 +1 @@
+[1,2,3]'
@@ -0,0 +1 @@
+{true: "bad"}
@@ -0,0 +1 @@
+"Hello world
View
@@ -0,0 +1 @@
+@
@@ -0,0 +1 @@
+{@: "bad"}
View
No changes.
View
@@ -1 +0,0 @@
-["hELLO"] "goodeoeu"
View
@@ -4,7 +4,7 @@ __filename=`readlink -f $0`
__dirname=`dirname $__filename`
cd $__dirname
-. $__dirname/../parse.sh
+. ../parse.sh
set -e
diff <( echo '"dah"' | tokenize ) <( echo '"dah"' )
@@ -37,4 +37,4 @@ diff <( echo '[ null , -110e10, "null" ]' \
diff <( echo '{"e": false}' | tokenize ) <( printf '{\n"e"\n:\nfalse\n}\n' )
diff <( echo '{"e": "string"}' | tokenize ) <( printf '{\n"e"\n:\n"string"\n}\n' )
-cat ../package.json | tokenize
+cat ../package.json | tokenize
View
@@ -5,30 +5,14 @@ __dirname=`dirname $__filename`
cd $__dirname
set -e
-# valid/array.json
-diff <(cat valid/array.json | ../bin/json_parse) valid/array.parsed
-echo OK valid/array.json
-# valid/empty_array.json
-diff <(cat valid/empty_array.json | ../bin/json_parse) valid/empty_array.parsed
-echo OK valid/empty_array.json
-# valid/empty_object.json
-diff <(cat valid/empty_object.json | ../bin/json_parse) valid/empty_object.parsed
-echo OK valid/empty_object.json
-# valid/many_object.json
-diff <(cat valid/many_object.json | ../bin/json_parse) valid/many_object.parsed
-echo OK valid/many_object.json
-# valid/nested_array.json
-diff <(cat valid/nested_array.json | ../bin/json_parse) valid/nested_array.parsed
-echo OK valid/nested_array.json
-# valid/nested_object.json
-diff <(cat valid/nested_object.json | ../bin/json_parse) valid/nested_object.parsed
-echo OK valid/nested_object.json
-# valid/number.json
-diff <(cat valid/number.json | ../bin/json_parse) valid/number.parsed
-echo OK valid/number.json
-# valid/object.json
-diff <(cat valid/object.json | ../bin/json_parse) valid/object.parsed
-echo OK valid/object.json
-# valid/string.json
-diff <(cat valid/string.json | ../bin/json_parse) valid/string.parsed
-echo OK valid/string.json
+fails=0
+for input in valid/*.json
+do
+ expected="${input%.json}.parsed"
+ if ! ../bin/json_parse < "$input" | diff -u - "$expected"
+ then
+ let fails=$fails+1
+ fi
+done
+echo "$fails test(s) failed"
+exit $fails
@@ -5,5 +5,5 @@
[2,2] {}
[2] [4,"hello",{}]
[3,"array"] []
-[3] {0:[]}
-[] [1,[],[4,"hello",{}],{0:[]}]
+[3] {"array":[]}
+[] [1,[],[4,"hello",{}],{"array":[]}]
@@ -2,4 +2,4 @@
["object","empty"] {}
["object"] {"key":"value","empty":{}}
["number"] 5
-[] {"empty":{"key":"value","empty":{}},"number":5}
+[] {"object":{"key":"value","empty":{}},"number":5}
@@ -0,0 +1 @@
+"hello\tworld"
@@ -0,0 +1 @@
+[] "hello\tworld"

0 comments on commit f6e5cd2

Please sign in to comment.