Skip to content

Commit

Permalink
Add support for encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Melnikow committed Mar 26, 2014
1 parent 70a8add commit 95c5f7a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 8 deletions.
20 changes: 14 additions & 6 deletions querystring_parser/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
import urllib
import types

def build(item):
def build(item, encoding=None):
def recursion(item, base=None):
pairs = list()
if(hasattr(item, 'values')):
for key, value in item.items():
if encoding:
quoted_key = urllib.quote(unicode(key).encode(encoding))
else:
quoted_key = urllib.quote(unicode(key))
if(base):
new_base = "%s[%s]" % (base, urllib.quote(unicode(key)))
new_base = "%s[%s]" % (base, quoted_key)
pairs += recursion(value, new_base)
else:
new_base = urllib.quote(unicode(key))
new_base = quoted_key
pairs += recursion(value, new_base)
elif(isinstance(item, types.ListType)):
for (index, value) in enumerate(item):
Expand All @@ -29,9 +33,13 @@ def recursion(item, base=None):
else:
pairs += recursion(value)
else:
if encoding:
quoted_item = urllib.quote(unicode(item).encode(encoding))
else:
quoted_item = urllib.quote(unicode(item))
if(base):
pairs.append("%s=%s" % (base, urllib.quote(unicode(item))))
pairs.append("%s=%s" % (base, quoted_item))
else:
pairs.append(urllib.quote(unicode(item)))
pairs.append(quoted_item)
return pairs
return '&'.join(recursion(item))
return '&'.join(recursion(item))
8 changes: 7 additions & 1 deletion querystring_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,14 @@ def parser_helper(key, val):
return pdict


def parse(query_string, unquote=True):
def parse(query_string, unquote=True, encoding='utf-8'):
'''
Main parse function
@param query_string:
@param unquote: unquote html query string ?
@param encoding: An optional encoding used to decode the keys and values. Defaults to utf-8, which the W3C declares as a defaul in the W3C algorithm for encoding.
@see http://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm
'''
mydict = {}
plist = []
Expand All @@ -115,6 +118,9 @@ def parse(query_string, unquote=True):
(var, val) = element.split("=")
except ValueError:
raise MalformedQueryStringError
if encoding:
var = var.decode(encoding)
val = val.decode(encoding)
plist.append(parser_helper(var, val))
for di in plist:
(k, v) = di.popitem()
Expand Down
26 changes: 25 additions & 1 deletion querystring_parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,21 @@ class KnownValues(unittest.TestCase):
({}),
)

knownValuesCleanWithUnicode = (
# f = some unicode
({u"f": u"\u9017"}),
)

knownValuesWithUnicode = (
# f = some unicode
({u"f": u"\u9017"}),
)

def test_parse_known_values_clean(self):
"""parse should give known result with known input"""
self.maxDiff = None
for dic in self.knownValuesClean:
result = parse(build(dic), True)
result = parse(build(dic), unquote=True)
self.assertEqual(dic, result)

def test_parse_known_values(self):
Expand All @@ -87,6 +97,20 @@ def test_parse_known_values(self):
result = parse(build(dic))
self.assertEqual(dic, result)

def test_parse_known_values_clean_with_unicode(self):
"""parse should give known result with known input"""
self.maxDiff = None
for dic in self.knownValuesClean + self.knownValuesCleanWithUnicode:
result = parse(build(dic, encoding='utf-8'), unquote=True, encoding='utf-8')
self.assertEqual(dic, result)

def test_parse_known_values_with_unicode(self):
"""parse should give known result with known input (quoted)"""
self.maxDiff = None
for dic in self.knownValues + self.knownValuesWithUnicode:
result = parse(build(dic, encoding='utf-8'), encoding='utf-8')
self.assertEqual(dic, result)


class ParseBadInput(unittest.TestCase):
'''
Expand Down

0 comments on commit 95c5f7a

Please sign in to comment.