Permalink
Browse files

Refactored minify_json.py, Uses unittest for testing.

Changed the variable names
Tests now use unittest, Conditions are shorter, more concise and I believe more readable.
  • Loading branch information...
1 parent 9024cfc commit b16562a55f01e83a7e702c0f8e4b260846d283fe @pradyunsg pradyunsg committed Sep 26, 2013
Showing with 109 additions and 93 deletions.
  1. +109 −93 minify_json.py
View
@@ -1,112 +1,128 @@
'''
Created on 20/01/2011
-v0.1 (C) Gerald Storer
+v0.2 (C) Gerald Storer
MIT License
-Based on JSON.minify.js:
+Based on JSON.minify.js:
https://github.com/getify/JSON.minify
+
+Contributers:
+ - Pradyun S. Gedam (conditions and variable names changed)
'''
+
import re
-def json_minify(json,strip_space=True):
- tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
+def json_minify(string, strip_space=True):
+ tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
in_string = False
- in_multiline_comment = False
- in_singleline_comment = False
-
+ in_multi = False
+ in_single = False
+
new_str = []
- from_index = 0 # from is a keyword in Python
-
- for match in re.finditer(tokenizer,json):
-
- if not in_multiline_comment and not in_singleline_comment:
- tmp2 = json[from_index:match.start()]
+ index = 0
+
+ for match in re.finditer(tokenizer, string):
+
+ if not (in_multi or in_single):
+ tmp = string[index:match.start()]
if not in_string and strip_space:
- tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard
- new_str.append(tmp2)
-
- from_index = match.end()
-
- if match.group() == '"' and not in_multiline_comment and not in_singleline_comment:
- escaped = re.search('(\\\\)*$',json[:match.start()])
- if not in_string or escaped is None or len(escaped.group()) % 2 == 0:
- # start of string with ", or unescaped " character found to end string
+ # replace white space as defined in standard
+ tmp = re.sub('[ \t\n\r]*', '', tmp)
+ new_str.append(tmp)
+
+ index = match.end()
+ val = match.group()
+
+ if val == '"' and not (in_multi or in_single):
+ escaped = re.search(r'(\\)*$', string[:match.start()])
+
+ # start of string or unescaped quote character to end string
+ if not in_string or (escaped is None or len(escaped.group()) % 2 == 0):
in_string = not in_string
- from_index -= 1 # include " character in next catch
-
- elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment:
- in_multiline_comment = True
- elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment:
- in_multiline_comment = False
- elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment:
- in_singleline_comment = True
- elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment:
- in_singleline_comment = False
- elif not in_multiline_comment and not in_singleline_comment and (
- match.group() not in ['\n','\r',' ','\t'] or not strip_space):
- new_str.append(match.group())
-
- new_str.append(json[from_index:])
+ index -= 1 # include " character in next catch
+ elif not (in_string or in_multi or in_single):
+ if val == '/*':
+ in_multi = True
+ elif val == '//':
+ in_single = True
+ elif val == '*/' and in_multi and not (in_string or in_single):
+ in_multi = False
+ elif val in '\r\n' and not (in_multi or in_string) and in_single:
+ in_single = False
+ elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)):
+ new_str.append(val)
+
+ new_str.append(string[index:])
return ''.join(new_str)
+
if __name__ == '__main__':
- import json # requires Python 2.6+ to run tests
-
- def test_json(s):
- return json.loads(json_minify(s))
-
- test1 = '''// this is a JSON file with comments
-{
- "foo": "bar", // this is cool
- "bar": [
- "baz", "bum", "zam"
- ],
-/* the rest of this document is just fluff
- in case you are interested. */
- "something": 10,
- "else": 20
-}
-
-/* NOTE: You can easily strip the whitespace and comments
- from such a file with the JSON.minify() project hosted
- here on github at http://github.com/getify/JSON.minify
-*/
-'''
+ # Python 2.6+ needed to run tests
+ import json
+ import textwrap
+ import unittest
- test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}'''
-
- test2 = '''
-{"/*":"*/","//":"",/*"//"*/"/*/"://
-"//"}
+ class JsonMinifyTestCase(unittest.TestCase):
+ """Tests for json_minify"""
+ def template(self, in_string, expected):
+ in_dict = json.loads(json_minify(in_string))
+ expected_dict = json.loads(expected)
+ self.assertEqual(in_dict, expected_dict)
-'''
- test2_res = '''{"/*":"*/","//":"","/*/":"//"}'''
-
- test3 = r'''/*
-this is a
-multi line comment */{
-
-"foo"
-:
- "bar/*"// something
- , "b\"az":/*
-something else */"blah"
-
-}
-'''
- test3_res = r'''{"foo":"bar/*","b\"az":"blah"}'''
-
- test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
- "baz\\\\": /* yay */ "fo\\\\\"*/o"
-}
-'''
- test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'''
-
- assert test_json(test1) == json.loads(test1_res),'Failed test 1'
- assert test_json(test2) == json.loads(test2_res),'Failed test 2'
- assert test_json(test3) == json.loads(test3_res),'Failed test 3'
- assert test_json(test4) == json.loads(test4_res),'Failed test 4'
- if __debug__: # Don't print passed message if the asserts didn't run
- print 'Passed all tests'
+ def test_1(self):
+ self.template(textwrap.dedent('''
+ // this is a JSON file with comments
+ {
+ "foo": "bar", // this is cool
+ "bar": [
+ "baz", "bum"
+ ],
+ /* the rest of this document is just fluff
+ in case you are interested. */
+ "something": 10,
+ "else": 20
+ }
+
+ /* NOTE: You can easily strip the whitespace and comments
+ from such a file with the JSON.minify() project hosted
+ here on github at http://github.com/getify/JSON.minify
+ */'''),
+ '{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}'
+ )
+
+ def test_2(self):
+ self.template(textwrap.dedent('''
+ {"/*":"*/","//":"",/*"//"*/"/*/"://
+ "//"}'''),
+ '{"/*":"*/","//":"","/*/":"//"}'
+ )
+
+ def test_3(self):
+ self.template(textwrap.dedent(r'''
+ /*
+ this is a
+ multi line comment */{
+
+ "foo"
+ :
+ "bar/*"// something
+ , "b\"az":/*
+ something else */"blah"
+
+ }
+ '''),
+ r'{"foo":"bar/*","b\"az":"blah"}'
+ )
+
+ def test_4(self):
+ self.template(textwrap.dedent(r'''
+ {"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
+ "baz\\\\": /* yay */ "fo\\\\\"*/o"
+ }
+ '''),
+ r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'
+ )
+
+ unittest.main()

0 comments on commit b16562a

Please sign in to comment.