Skip to content

Commit

Permalink
Merge pull request #11 from pradyun/patch-1
Browse files Browse the repository at this point in the history
Refactored minify_json.py, Uses unittest for testing.
  • Loading branch information
getify committed Sep 26, 2013
2 parents 9024cfc + b16562a commit 38e7efc
Showing 1 changed file with 109 additions and 93 deletions.
202 changes: 109 additions & 93 deletions minify_json.py
@@ -1,112 +1,128 @@
'''
Created on 20/01/2011
v0.1 (C) Gerald Storer
v0.2 (C) Gerald Storer
MIT License
Based on JSON.minify.js:
Based on JSON.minify.js:
https://github.com/getify/JSON.minify
Contributers:
- Pradyun S. Gedam (conditions and variable names changed)
'''


import re

def json_minify(json,strip_space=True):
tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
def json_minify(string, strip_space=True):
tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
in_string = False
in_multiline_comment = False
in_singleline_comment = False
in_multi = False
in_single = False

new_str = []
from_index = 0 # from is a keyword in Python
for match in re.finditer(tokenizer,json):
if not in_multiline_comment and not in_singleline_comment:
tmp2 = json[from_index:match.start()]
index = 0

for match in re.finditer(tokenizer, string):

if not (in_multi or in_single):
tmp = string[index:match.start()]
if not in_string and strip_space:
tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard
new_str.append(tmp2)

from_index = match.end()

if match.group() == '"' and not in_multiline_comment and not in_singleline_comment:
escaped = re.search('(\\\\)*$',json[:match.start()])
if not in_string or escaped is None or len(escaped.group()) % 2 == 0:
# start of string with ", or unescaped " character found to end string
# replace white space as defined in standard
tmp = re.sub('[ \t\n\r]*', '', tmp)
new_str.append(tmp)

index = match.end()
val = match.group()

if val == '"' and not (in_multi or in_single):
escaped = re.search(r'(\\)*$', string[:match.start()])

# start of string or unescaped quote character to end string
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0):
in_string = not in_string
from_index -= 1 # include " character in next catch

elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment:
in_multiline_comment = True
elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment:
in_multiline_comment = False
elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment:
in_singleline_comment = True
elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment:
in_singleline_comment = False
elif not in_multiline_comment and not in_singleline_comment and (
match.group() not in ['\n','\r',' ','\t'] or not strip_space):
new_str.append(match.group())

new_str.append(json[from_index:])
index -= 1 # include " character in next catch
elif not (in_string or in_multi or in_single):
if val == '/*':
in_multi = True
elif val == '//':
in_single = True
elif val == '*/' and in_multi and not (in_string or in_single):
in_multi = False
elif val in '\r\n' and not (in_multi or in_string) and in_single:
in_single = False
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)):
new_str.append(val)

new_str.append(string[index:])
return ''.join(new_str)


if __name__ == '__main__':
import json # requires Python 2.6+ to run tests

def test_json(s):
return json.loads(json_minify(s))

test1 = '''// this is a JSON file with comments
{
"foo": "bar", // this is cool
"bar": [
"baz", "bum", "zam"
],
/* the rest of this document is just fluff
in case you are interested. */
"something": 10,
"else": 20
}
/* NOTE: You can easily strip the whitespace and comments
from such a file with the JSON.minify() project hosted
here on github at http://github.com/getify/JSON.minify
*/
'''
# Python 2.6+ needed to run tests
import json
import textwrap
import unittest

test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}'''

test2 = '''
{"/*":"*/","//":"",/*"//"*/"/*/"://
"//"}
class JsonMinifyTestCase(unittest.TestCase):
"""Tests for json_minify"""
def template(self, in_string, expected):
in_dict = json.loads(json_minify(in_string))
expected_dict = json.loads(expected)
self.assertEqual(in_dict, expected_dict)

'''
test2_res = '''{"/*":"*/","//":"","/*/":"//"}'''

test3 = r'''/*
this is a
multi line comment */{
"foo"
:
"bar/*"// something
, "b\"az":/*
something else */"blah"
}
'''
test3_res = r'''{"foo":"bar/*","b\"az":"blah"}'''

test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
"baz\\\\": /* yay */ "fo\\\\\"*/o"
}
'''
test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'''

assert test_json(test1) == json.loads(test1_res),'Failed test 1'
assert test_json(test2) == json.loads(test2_res),'Failed test 2'
assert test_json(test3) == json.loads(test3_res),'Failed test 3'
assert test_json(test4) == json.loads(test4_res),'Failed test 4'
if __debug__: # Don't print passed message if the asserts didn't run
print 'Passed all tests'
def test_1(self):
self.template(textwrap.dedent('''
// this is a JSON file with comments
{
"foo": "bar", // this is cool
"bar": [
"baz", "bum"
],
/* the rest of this document is just fluff
in case you are interested. */
"something": 10,
"else": 20
}
/* NOTE: You can easily strip the whitespace and comments
from such a file with the JSON.minify() project hosted
here on github at http://github.com/getify/JSON.minify
*/'''),
'{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}'
)

def test_2(self):
self.template(textwrap.dedent('''
{"/*":"*/","//":"",/*"//"*/"/*/"://
"//"}'''),
'{"/*":"*/","//":"","/*/":"//"}'
)

def test_3(self):
self.template(textwrap.dedent(r'''
/*
this is a
multi line comment */{
"foo"
:
"bar/*"// something
, "b\"az":/*
something else */"blah"
}
'''),
r'{"foo":"bar/*","b\"az":"blah"}'
)

def test_4(self):
self.template(textwrap.dedent(r'''
{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
"baz\\\\": /* yay */ "fo\\\\\"*/o"
}
'''),
r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'
)

unittest.main()

0 comments on commit 38e7efc

Please sign in to comment.