Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored minify_json.py, Uses unittest for testing. #11

Merged
merged 1 commit into from
Sep 26, 2013
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 109 additions & 93 deletions minify_json.py
Original file line number Diff line number Diff line change
@@ -1,112 +1,128 @@
'''
Created on 20/01/2011

v0.1 (C) Gerald Storer
v0.2 (C) Gerald Storer
MIT License

Based on JSON.minify.js:
Based on JSON.minify.js:
https://github.com/getify/JSON.minify

Contributers:
- Pradyun S. Gedam (conditions and variable names changed)
'''


import re

def json_minify(json,strip_space=True):
tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
def json_minify(string, strip_space=True):
tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
in_string = False
in_multiline_comment = False
in_singleline_comment = False
in_multi = False
in_single = False

new_str = []
from_index = 0 # from is a keyword in Python
for match in re.finditer(tokenizer,json):
if not in_multiline_comment and not in_singleline_comment:
tmp2 = json[from_index:match.start()]
index = 0

for match in re.finditer(tokenizer, string):

if not (in_multi or in_single):
tmp = string[index:match.start()]
if not in_string and strip_space:
tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space defined in standard
new_str.append(tmp2)

from_index = match.end()

if match.group() == '"' and not in_multiline_comment and not in_singleline_comment:
escaped = re.search('(\\\\)*$',json[:match.start()])
if not in_string or escaped is None or len(escaped.group()) % 2 == 0:
# start of string with ", or unescaped " character found to end string
# replace white space as defined in standard
tmp = re.sub('[ \t\n\r]*', '', tmp)
new_str.append(tmp)

index = match.end()
val = match.group()

if val == '"' and not (in_multi or in_single):
escaped = re.search(r'(\\)*$', string[:match.start()])

# start of string or unescaped quote character to end string
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0):
in_string = not in_string
from_index -= 1 # include " character in next catch

elif match.group() == '/*' and not in_string and not in_multiline_comment and not in_singleline_comment:
in_multiline_comment = True
elif match.group() == '*/' and not in_string and in_multiline_comment and not in_singleline_comment:
in_multiline_comment = False
elif match.group() == '//' and not in_string and not in_multiline_comment and not in_singleline_comment:
in_singleline_comment = True
elif (match.group() == '\n' or match.group() == '\r') and not in_string and not in_multiline_comment and in_singleline_comment:
in_singleline_comment = False
elif not in_multiline_comment and not in_singleline_comment and (
match.group() not in ['\n','\r',' ','\t'] or not strip_space):
new_str.append(match.group())

new_str.append(json[from_index:])
index -= 1 # include " character in next catch
elif not (in_string or in_multi or in_single):
if val == '/*':
in_multi = True
elif val == '//':
in_single = True
elif val == '*/' and in_multi and not (in_string or in_single):
in_multi = False
elif val in '\r\n' and not (in_multi or in_string) and in_single:
in_single = False
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)):
new_str.append(val)

new_str.append(string[index:])
return ''.join(new_str)


if __name__ == '__main__':
import json # requires Python 2.6+ to run tests

def test_json(s):
return json.loads(json_minify(s))

test1 = '''// this is a JSON file with comments
{
"foo": "bar", // this is cool
"bar": [
"baz", "bum", "zam"
],
/* the rest of this document is just fluff
in case you are interested. */
"something": 10,
"else": 20
}

/* NOTE: You can easily strip the whitespace and comments
from such a file with the JSON.minify() project hosted
here on github at http://github.com/getify/JSON.minify
*/
'''
# Python 2.6+ needed to run tests
import json
import textwrap
import unittest

test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":20}'''

test2 = '''
{"/*":"*/","//":"",/*"//"*/"/*/"://
"//"}
class JsonMinifyTestCase(unittest.TestCase):
"""Tests for json_minify"""
def template(self, in_string, expected):
in_dict = json.loads(json_minify(in_string))
expected_dict = json.loads(expected)
self.assertEqual(in_dict, expected_dict)

'''
test2_res = '''{"/*":"*/","//":"","/*/":"//"}'''

test3 = r'''/*
this is a
multi line comment */{

"foo"
:
"bar/*"// something
, "b\"az":/*
something else */"blah"

}
'''
test3_res = r'''{"foo":"bar/*","b\"az":"blah"}'''

test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
"baz\\\\": /* yay */ "fo\\\\\"*/o"
}
'''
test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'''

assert test_json(test1) == json.loads(test1_res),'Failed test 1'
assert test_json(test2) == json.loads(test2_res),'Failed test 2'
assert test_json(test3) == json.loads(test3_res),'Failed test 3'
assert test_json(test4) == json.loads(test4_res),'Failed test 4'
if __debug__: # Don't print passed message if the asserts didn't run
print 'Passed all tests'
def test_1(self):
self.template(textwrap.dedent('''
// this is a JSON file with comments
{
"foo": "bar", // this is cool
"bar": [
"baz", "bum"
],
/* the rest of this document is just fluff
in case you are interested. */
"something": 10,
"else": 20
}

/* NOTE: You can easily strip the whitespace and comments
from such a file with the JSON.minify() project hosted
here on github at http://github.com/getify/JSON.minify
*/'''),
'{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}'
)

def test_2(self):
self.template(textwrap.dedent('''
{"/*":"*/","//":"",/*"//"*/"/*/"://
"//"}'''),
'{"/*":"*/","//":"","/*/":"//"}'
)

def test_3(self):
self.template(textwrap.dedent(r'''
/*
this is a
multi line comment */{

"foo"
:
"bar/*"// something
, "b\"az":/*
something else */"blah"

}
'''),
r'{"foo":"bar/*","b\"az":"blah"}'
)

def test_4(self):
self.template(textwrap.dedent(r'''
{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
"baz\\\\": /* yay */ "fo\\\\\"*/o"
}
'''),
r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'
)

unittest.main()