diff --git a/minify_json.py b/minify_json.py index 4152e8c..7883d25 100644 --- a/minify_json.py +++ b/minify_json.py @@ -1,112 +1,128 @@ ''' Created on 20/01/2011 -v0.1 (C) Gerald Storer +v0.2 (C) Gerald Storer MIT License -Based on JSON.minify.js: +Based on JSON.minify.js: https://github.com/getify/JSON.minify + +Contributers: + - Pradyun S. Gedam (conditions and variable names changed) ''' + import re -def json_minify(json,strip_space=True): - tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r') +def json_minify(string, strip_space=True): + tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r') in_string = False - in_multiline_comment = False - in_singleline_comment = False - + in_multi = False + in_single = False + new_str = [] - from_index = 0 # from is a keyword in Python - - for match in re.finditer(tokenizer,json): - - if not in_multiline_comment and not in_singleline_comment: - tmp2 = json[from_index:match.start()] + index = 0 + + for match in re.finditer(tokenizer, string): + + if not (in_multi or in_single): + tmp = string[index:match.start()] if not in_string and strip_space: - tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard - new_str.append(tmp2) - - from_index = match.end() - - if match.group() == '"' and not in_multiline_comment and not in_singleline_comment: - escaped = re.search('(\\\\)*$',json[:match.start()]) - if not in_string or escaped is None or len(escaped.group()) % 2 == 0: - # start of string with ", or unescaped " character found to end string + # replace white space as defined in standard + tmp = re.sub('[ \t\n\r]*', '', tmp) + new_str.append(tmp) + + index = match.end() + val = match.group() + + if val == '"' and not (in_multi or in_single): + escaped = re.search(r'(\\)*$', string[:match.start()]) + + # start of string or unescaped quote character to end string + if not in_string or (escaped is None or len(escaped.group()) % 2 == 0): in_string = not in_string - from_index -= 1 # include " character in next catch - - elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment: - in_multiline_comment = True - elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment: - in_multiline_comment = False - elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment: - in_singleline_comment = True - elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment: - in_singleline_comment = False - elif not in_multiline_comment and not in_singleline_comment and ( - match.group() not in ['\n','\r',' ','\t'] or not strip_space): - new_str.append(match.group()) - - new_str.append(json[from_index:]) + index -= 1 # include " character in next catch + elif not (in_string or in_multi or in_single): + if val == '/*': + in_multi = True + elif val == '//': + in_single = True + elif val == '*/' and in_multi and not (in_string or in_single): + in_multi = False + elif val in '\r\n' and not (in_multi or in_string) and in_single: + in_single = False + elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)): + new_str.append(val) + + new_str.append(string[index:]) return ''.join(new_str) + if __name__ == '__main__': - import json # requires Python 2.6+ to run tests - - def test_json(s): - return json.loads(json_minify(s)) - - test1 = '''// this is a JSON file with comments -{ - "foo": "bar", // this is cool - "bar": [ - "baz", "bum", "zam" - ], -/* the rest of this document is just fluff - in case you are interested. */ - "something": 10, - "else": 20 -} - -/* NOTE: You can easily strip the whitespace and comments - from such a file with the JSON.minify() project hosted - here on github at http://github.com/getify/JSON.minify -*/ -''' + # Python 2.6+ needed to run tests + import json + import textwrap + import unittest - test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}''' - - test2 = ''' -{"/*":"*/","//":"",/*"//"*/"/*/":// -"//"} + class JsonMinifyTestCase(unittest.TestCase): + """Tests for json_minify""" + def template(self, in_string, expected): + in_dict = json.loads(json_minify(in_string)) + expected_dict = json.loads(expected) + self.assertEqual(in_dict, expected_dict) -''' - test2_res = '''{"/*":"*/","//":"","/*/":"//"}''' - - test3 = r'''/* -this is a -multi line comment */{ - -"foo" -: - "bar/*"// something - , "b\"az":/* -something else */"blah" - -} -''' - test3_res = r'''{"foo":"bar/*","b\"az":"blah"}''' - - test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", - "baz\\\\": /* yay */ "fo\\\\\"*/o" -} -''' - test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}''' - - assert test_json(test1) == json.loads(test1_res),'Failed test 1' - assert test_json(test2) == json.loads(test2_res),'Failed test 2' - assert test_json(test3) == json.loads(test3_res),'Failed test 3' - assert test_json(test4) == json.loads(test4_res),'Failed test 4' - if __debug__: # Don't print passed message if the asserts didn't run - print 'Passed all tests' \ No newline at end of file + def test_1(self): + self.template(textwrap.dedent(''' + // this is a JSON file with comments + { + "foo": "bar", // this is cool + "bar": [ + "baz", "bum" + ], + /* the rest of this document is just fluff + in case you are interested. */ + "something": 10, + "else": 20 + } + + /* NOTE: You can easily strip the whitespace and comments + from such a file with the JSON.minify() project hosted + here on github at http://github.com/getify/JSON.minify + */'''), + '{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}' + ) + + def test_2(self): + self.template(textwrap.dedent(''' + {"/*":"*/","//":"",/*"//"*/"/*/":// + "//"}'''), + '{"/*":"*/","//":"","/*/":"//"}' + ) + + def test_3(self): + self.template(textwrap.dedent(r''' + /* + this is a + multi line comment */{ + + "foo" + : + "bar/*"// something + , "b\"az":/* + something else */"blah" + + } + '''), + r'{"foo":"bar/*","b\"az":"blah"}' + ) + + def test_4(self): + self.template(textwrap.dedent(r''' + {"foo": "ba\"r//", "bar\\": "b\\\"a/*z", + "baz\\\\": /* yay */ "fo\\\\\"*/o" + } + '''), + r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}' + ) + + unittest.main()