Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from pradyun/patch-1
Refactored minify_json.py, Uses unittest for testing.
- Loading branch information
Showing
1 changed file
with
109 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,112 +1,128 @@ | ||
''' | ||
Created on 20/01/2011 | ||
v0.1 (C) Gerald Storer | ||
v0.2 (C) Gerald Storer | ||
MIT License | ||
Based on JSON.minify.js: | ||
Based on JSON.minify.js: | ||
https://github.com/getify/JSON.minify | ||
Contributers: | ||
- Pradyun S. Gedam (conditions and variable names changed) | ||
''' | ||
|
||
|
||
import re | ||
|
||
def json_minify(json,strip_space=True): | ||
tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r') | ||
def json_minify(string, strip_space=True): | ||
tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r') | ||
in_string = False | ||
in_multiline_comment = False | ||
in_singleline_comment = False | ||
in_multi = False | ||
in_single = False | ||
|
||
new_str = [] | ||
from_index = 0 # from is a keyword in Python | ||
for match in re.finditer(tokenizer,json): | ||
if not in_multiline_comment and not in_singleline_comment: | ||
tmp2 = json[from_index:match.start()] | ||
index = 0 | ||
|
||
for match in re.finditer(tokenizer, string): | ||
|
||
if not (in_multi or in_single): | ||
tmp = string[index:match.start()] | ||
if not in_string and strip_space: | ||
tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard | ||
new_str.append(tmp2) | ||
|
||
from_index = match.end() | ||
|
||
if match.group() == '"' and not in_multiline_comment and not in_singleline_comment: | ||
escaped = re.search('(\\\\)*$',json[:match.start()]) | ||
if not in_string or escaped is None or len(escaped.group()) % 2 == 0: | ||
# start of string with ", or unescaped " character found to end string | ||
# replace white space as defined in standard | ||
tmp = re.sub('[ \t\n\r]*', '', tmp) | ||
new_str.append(tmp) | ||
|
||
index = match.end() | ||
val = match.group() | ||
|
||
if val == '"' and not (in_multi or in_single): | ||
escaped = re.search(r'(\\)*$', string[:match.start()]) | ||
|
||
# start of string or unescaped quote character to end string | ||
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0): | ||
in_string = not in_string | ||
from_index -= 1 # include " character in next catch | ||
|
||
elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment: | ||
in_multiline_comment = True | ||
elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment: | ||
in_multiline_comment = False | ||
elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment: | ||
in_singleline_comment = True | ||
elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment: | ||
in_singleline_comment = False | ||
elif not in_multiline_comment and not in_singleline_comment and ( | ||
match.group() not in ['\n','\r',' ','\t'] or not strip_space): | ||
new_str.append(match.group()) | ||
|
||
new_str.append(json[from_index:]) | ||
index -= 1 # include " character in next catch | ||
elif not (in_string or in_multi or in_single): | ||
if val == '/*': | ||
in_multi = True | ||
elif val == '//': | ||
in_single = True | ||
elif val == '*/' and in_multi and not (in_string or in_single): | ||
in_multi = False | ||
elif val in '\r\n' and not (in_multi or in_string) and in_single: | ||
in_single = False | ||
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)): | ||
new_str.append(val) | ||
|
||
new_str.append(string[index:]) | ||
return ''.join(new_str) | ||
|
||
|
||
if __name__ == '__main__': | ||
import json # requires Python 2.6+ to run tests | ||
|
||
def test_json(s): | ||
return json.loads(json_minify(s)) | ||
|
||
test1 = '''// this is a JSON file with comments | ||
{ | ||
"foo": "bar", // this is cool | ||
"bar": [ | ||
"baz", "bum", "zam" | ||
], | ||
/* the rest of this document is just fluff | ||
in case you are interested. */ | ||
"something": 10, | ||
"else": 20 | ||
} | ||
/* NOTE: You can easily strip the whitespace and comments | ||
from such a file with the JSON.minify() project hosted | ||
here on github at http://github.com/getify/JSON.minify | ||
*/ | ||
''' | ||
# Python 2.6+ needed to run tests | ||
import json | ||
import textwrap | ||
import unittest | ||
|
||
test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}''' | ||
|
||
test2 = ''' | ||
{"/*":"*/","//":"",/*"//"*/"/*/":// | ||
"//"} | ||
class JsonMinifyTestCase(unittest.TestCase): | ||
"""Tests for json_minify""" | ||
def template(self, in_string, expected): | ||
in_dict = json.loads(json_minify(in_string)) | ||
expected_dict = json.loads(expected) | ||
self.assertEqual(in_dict, expected_dict) | ||
|
||
''' | ||
test2_res = '''{"/*":"*/","//":"","/*/":"//"}''' | ||
|
||
test3 = r'''/* | ||
this is a | ||
multi line comment */{ | ||
"foo" | ||
: | ||
"bar/*"// something | ||
, "b\"az":/* | ||
something else */"blah" | ||
} | ||
''' | ||
test3_res = r'''{"foo":"bar/*","b\"az":"blah"}''' | ||
|
||
test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", | ||
"baz\\\\": /* yay */ "fo\\\\\"*/o" | ||
} | ||
''' | ||
test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}''' | ||
|
||
assert test_json(test1) == json.loads(test1_res),'Failed test 1' | ||
assert test_json(test2) == json.loads(test2_res),'Failed test 2' | ||
assert test_json(test3) == json.loads(test3_res),'Failed test 3' | ||
assert test_json(test4) == json.loads(test4_res),'Failed test 4' | ||
if __debug__: # Don't print passed message if the asserts didn't run | ||
print 'Passed all tests' | ||
def test_1(self): | ||
self.template(textwrap.dedent(''' | ||
// this is a JSON file with comments | ||
{ | ||
"foo": "bar", // this is cool | ||
"bar": [ | ||
"baz", "bum" | ||
], | ||
/* the rest of this document is just fluff | ||
in case you are interested. */ | ||
"something": 10, | ||
"else": 20 | ||
} | ||
/* NOTE: You can easily strip the whitespace and comments | ||
from such a file with the JSON.minify() project hosted | ||
here on github at http://github.com/getify/JSON.minify | ||
*/'''), | ||
'{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}' | ||
) | ||
|
||
def test_2(self): | ||
self.template(textwrap.dedent(''' | ||
{"/*":"*/","//":"",/*"//"*/"/*/":// | ||
"//"}'''), | ||
'{"/*":"*/","//":"","/*/":"//"}' | ||
) | ||
|
||
def test_3(self): | ||
self.template(textwrap.dedent(r''' | ||
/* | ||
this is a | ||
multi line comment */{ | ||
"foo" | ||
: | ||
"bar/*"// something | ||
, "b\"az":/* | ||
something else */"blah" | ||
} | ||
'''), | ||
r'{"foo":"bar/*","b\"az":"blah"}' | ||
) | ||
|
||
def test_4(self): | ||
self.template(textwrap.dedent(r''' | ||
{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", | ||
"baz\\\\": /* yay */ "fo\\\\\"*/o" | ||
} | ||
'''), | ||
r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}' | ||
) | ||
|
||
unittest.main() |