Skip to content

Commit

Permalink
Tokenizer changes:
Browse files Browse the repository at this point in the history
* split up the tokenizer regex for readability
* ignore inter-token whitespace
* allow backslash-escapes in strings
* disallow control characters in strings
* clean up number regex and don't allow . (or the empty string)
* fall-through token-matching case is . leaving the parser to validate it.

Parser changes:
* disallow non-string keys in objects
* detect unrecognized tokens, e.g. @ A $ etc.
* consistent quoting on case patterns
* use ${token:-EOF} in error messages

fixed wrong tests (and removed the wrong-test-generator)
added new tests.
fix test/valid-test.sh to count failures like test/invalid-test.sh
removed unnecessary outlog/errlog files (and add to .gitignore)
  • Loading branch information
medgar123 committed Oct 25, 2011
1 parent 65b1ddd commit f6e5cd2
Show file tree
Hide file tree
Showing 31 changed files with 97 additions and 113 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Original file line Diff line number Diff line change
@@ -1,3 +1,2 @@
node_modules test/errlog
node_modules/* test/outlog
npm_debug.log
2 changes: 1 addition & 1 deletion all-tests.sh
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ done
if [ $fail -eq 0 ]; then if [ $fail -eq 0 ]; then
echo -n 'SUCCESS ' echo -n 'SUCCESS '
else else
echo -n 'FAILOUR ' echo -n 'FAILURE '
fi fi
echo $passed / $tests echo $passed / $tests
1 change: 0 additions & 1 deletion errlog

This file was deleted.

105 changes: 51 additions & 54 deletions parse.sh
Original file line number Original file line Diff line number Diff line change
@@ -1,93 +1,90 @@


throw () { throw () {
echo $* >&2 echo "$*" >&2
exit 1 exit 1
} }


tokenize () { tokenize () {
egrep -ao '[]|[{}]|:|,|("((\\")|[^"])*")|:|(\-?[0-9]*\.?([0-9]*)?(e?\-?([0-9]*))?)|null|true|false' --color=never local ESCAPE='(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})'
local CHAR='[^[:cntrl:]"\\]'
local STRING="\"$CHAR*($ESCAPE$CHAR*)*\""
local NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?'
local KEYWORD='null|false|true'
local SPACE='[[:space:]]+'
egrep -ao "$STRING|$NUMBER|$KEYWORD|$SPACE|." --color=never |
egrep -v "^$SPACE$" # eat whitespace
} }


parse_array () { parse_array () {
local index=0 local index=0
local ary='' local ary=''
read token read -r token
while true; while true;
do do
key=$index
case "$token" in case "$token" in
']') break ;; ']') break ;;
*)
parse_value "$1" "$index"
let index=$index+1
ary="$ary""$value"
read token
case "$token" in
']') break ;;
',') ary="$ary", ;;
*)
if [ "_$token" = _ ]; then token=EOF; fi
throw "EXPECTED ] or , GOT $token"
;;
esac
read token
;;
esac esac
parse_value "$1" "$index"
let index=$index+1
ary="$ary""$value"
read -r token
case "$token" in
']') break ;;
',') ary="$ary", ;;
*) throw "EXPECTED ] or , GOT ${token:-EOF}" ;;
esac
read -r token
done done
value=`printf '[%s]' $ary` value=`printf '[%s]' $ary`
} }


parse_object () { parse_object () {
local go=true local key
local obj='' local obj=''
local EXPECT_COMMA=0 read -r token
local EXPECT_COLON=0 while :
read token
while [ "$go" = true ];
do do
case "$token" in case "$token" in
'}') break ;; '}') break ;;
*) '"'*'"') key=$token ;;

*) throw "EXPECTED STRING, GOT ${token:-EOF}" ;;
key=$token esac
read colon read -r token
if [ "$colon" != ':' ]; then throw "EXPECTED COLON, GOT $colon"; fi case "$token" in
if [ "_$key" = _ ]; then throw "NULL KEY"; fi ':') ;;
read token *) throw "EXPECTED COLON, GOT ${token:-EOF}" ;;
parse_value "$1" "$key" esac
obj="$obj$key:$value" read -r token

parse_value "$1" "$key"
read token obj="$obj$key:$value"
case "$token" in read -r token
'}') break;; case "$token" in
,) obj="$obj,"; read token ;; '}') break;;
*) ',') obj="$obj,"; read -r token ;;
if [ "_$token" = _ ]; then token=EOF; fi *) throw "EXPECTED , or }, but got ${token:-EOF}" ;;
throw "EXPECTED , or }, but got $token"
;;
esac
;;
esac esac
done done
value=`printf '{%s}' "$obj"` value=`printf '{%s}' "$obj"`
} }


parse_value () { parse_value () {
local jpath local jpath="${1:+$1,}$2"

if [ "x$1" = "x" ]; then jpath="$2"; else jpath="$1,$2"; fi

case "$token" in case "$token" in
{) parse_object "$jpath" ;; '{') parse_object "$jpath" ;;
[) parse_array "$jpath" ;; '[') parse_array "$jpath" ;;
','|'}'|']') throw "EXPECTED value, GOT $token" ;; # At this point, the only valid single-character tokens are digits.
*) value=$token ''|[^0-9]) throw "EXPECTED value, GOT ${token:-EOF}" ;;
;; *) value=$token ;;
esac esac
printf "[%s]\t%s\n" "$jpath" "$value" printf "[%s]\t%s\n" "$jpath" "$value"
} }


parse () { parse () {
read token read -r token
parse_value parse_value
read -r token
case "$token" in
'') ;;
*) throw "EXPECTED EOF, GOT $token" ;;
esac
} }
10 changes: 0 additions & 10 deletions test/.generate-valid

This file was deleted.

1 change: 0 additions & 1 deletion test/errlog

This file was deleted.

18 changes: 8 additions & 10 deletions test/invalid-test.sh
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -13,21 +13,19 @@ echo PWD=$PWD
fails=0 fails=0
for input in invalid/* for input in invalid/*
do do
cat $input | ../bin/json_parse > outlog 2> errlog if ../bin/json_parse < "$input" > outlog 2> errlog
ret=$? then
if [ $ret -eq 0 ]; then
echo "NOT OK: cat $input | ../bin/json_parse SHOULD FAIL" echo "NOT OK: cat $input | ../bin/json_parse SHOULD FAIL"
echo "OUTPUT WAS >>>" echo "OUTPUT WAS >>>"
cat outlog cat outlog
echo "<<<" echo "<<<"
let fails=$fails+1 let fails=$fails+1
else # else
echo "OK: cat $input | ../bin/json_parse failed correctly" # echo "OK: cat $input | ../bin/json_parse failed correctly"
echo "stderr was >>>" # echo "stderr was >>>"
cat errlog # cat errlog
echo "<<<" # echo "<<<"
fi fi

done done
echo "$fails test(s) failed" echo "$fails test(s) failed"
exit $fails exit $fails
1 change: 1 addition & 0 deletions test/invalid/bad_unicode_sequence.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
"hello\u20world"
1 change: 1 addition & 0 deletions test/invalid/bareword.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
bareword
1 change: 1 addition & 0 deletions test/invalid/bracket_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{[: "bad"}
1 change: 1 addition & 0 deletions test/invalid/colon.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
:
3 changes: 3 additions & 0 deletions test/invalid/colon_obj.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"hello": :
}
1 change: 1 addition & 0 deletions test/invalid/control_char_in_string.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
"ab"
1 change: 1 addition & 0 deletions test/invalid/decimal_point.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
.
File renamed without changes.
1 change: 1 addition & 0 deletions test/invalid/false_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{false: "bad"}
1 change: 1 addition & 0 deletions test/invalid/null_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{null: "bad"}
1 change: 1 addition & 0 deletions test/invalid/number_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{5: "bad"}
1 change: 1 addition & 0 deletions test/invalid/trailing_garbage.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
[1,2,3]'
1 change: 1 addition & 0 deletions test/invalid/true_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{true: "bad"}
1 change: 1 addition & 0 deletions test/invalid/unclosed_string.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
"Hello world
1 change: 1 addition & 0 deletions test/invalid/weird.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
@
1 change: 1 addition & 0 deletions test/invalid/weird_key.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
{@: "bad"}
Empty file removed test/out
Empty file.
1 change: 0 additions & 1 deletion test/outlog

This file was deleted.

4 changes: 2 additions & 2 deletions test/tokenizer-test.sh
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ __filename=`readlink -f $0`
__dirname=`dirname $__filename` __dirname=`dirname $__filename`
cd $__dirname cd $__dirname


. $__dirname/../parse.sh . ../parse.sh
set -e set -e


diff <( echo '"dah"' | tokenize ) <( echo '"dah"' ) diff <( echo '"dah"' | tokenize ) <( echo '"dah"' )
Expand Down Expand Up @@ -37,4 +37,4 @@ diff <( echo '[ null , -110e10, "null" ]' \
diff <( echo '{"e": false}' | tokenize ) <( printf '{\n"e"\n:\nfalse\n}\n' ) diff <( echo '{"e": false}' | tokenize ) <( printf '{\n"e"\n:\nfalse\n}\n' )
diff <( echo '{"e": "string"}' | tokenize ) <( printf '{\n"e"\n:\n"string"\n}\n' ) diff <( echo '{"e": "string"}' | tokenize ) <( printf '{\n"e"\n:\n"string"\n}\n' )


cat ../package.json | tokenize cat ../package.json | tokenize
38 changes: 11 additions & 27 deletions test/valid-test.sh
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -5,30 +5,14 @@ __dirname=`dirname $__filename`
cd $__dirname cd $__dirname


set -e set -e
# valid/array.json fails=0
diff <(cat valid/array.json | ../bin/json_parse) valid/array.parsed for input in valid/*.json
echo OK valid/array.json do
# valid/empty_array.json expected="${input%.json}.parsed"
diff <(cat valid/empty_array.json | ../bin/json_parse) valid/empty_array.parsed if ! ../bin/json_parse < "$input" | diff -u - "$expected"
echo OK valid/empty_array.json then
# valid/empty_object.json let fails=$fails+1
diff <(cat valid/empty_object.json | ../bin/json_parse) valid/empty_object.parsed fi
echo OK valid/empty_object.json done
# valid/many_object.json echo "$fails test(s) failed"
diff <(cat valid/many_object.json | ../bin/json_parse) valid/many_object.parsed exit $fails
echo OK valid/many_object.json
# valid/nested_array.json
diff <(cat valid/nested_array.json | ../bin/json_parse) valid/nested_array.parsed
echo OK valid/nested_array.json
# valid/nested_object.json
diff <(cat valid/nested_object.json | ../bin/json_parse) valid/nested_object.parsed
echo OK valid/nested_object.json
# valid/number.json
diff <(cat valid/number.json | ../bin/json_parse) valid/number.parsed
echo OK valid/number.json
# valid/object.json
diff <(cat valid/object.json | ../bin/json_parse) valid/object.parsed
echo OK valid/object.json
# valid/string.json
diff <(cat valid/string.json | ../bin/json_parse) valid/string.parsed
echo OK valid/string.json
4 changes: 2 additions & 2 deletions test/valid/nested_array.parsed
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
[2,2] {} [2,2] {}
[2] [4,"hello",{}] [2] [4,"hello",{}]
[3,"array"] [] [3,"array"] []
[3] {0:[]} [3] {"array":[]}
[] [1,[],[4,"hello",{}],{0:[]}] [] [1,[],[4,"hello",{}],{"array":[]}]
2 changes: 1 addition & 1 deletion test/valid/nested_object.parsed
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
["object","empty"] {} ["object","empty"] {}
["object"] {"key":"value","empty":{}} ["object"] {"key":"value","empty":{}}
["number"] 5 ["number"] 5
[] {"empty":{"key":"value","empty":{}},"number":5} [] {"object":{"key":"value","empty":{}},"number":5}
1 change: 1 addition & 0 deletions test/valid/tab_escape.json
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
"hello\tworld"
1 change: 1 addition & 0 deletions test/valid/tab_escape.parsed
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1 @@
[] "hello\tworld"

0 comments on commit f6e5cd2

Please sign in to comment.