Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mimetype tests #14

Merged
merged 8 commits into from
Aug 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions sniffpy/mimetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def is_image(self) -> bool:
def is_video_audio(self) -> bool:
return self.type == "audio" or self.type == "video" or self.essence() == "application/ogg"

def __eq__(self, obj):
return self.type == obj.type and self.subtype == obj.subtype and self.parameters == obj.parameters

def parse_mime_type(str_input: str) -> MIMEType:
str_input = str_input.strip() #might have to specify HTTP whitespace characters
Expand All @@ -52,13 +54,11 @@ def parse_mime_type(str_input: str) -> MIMEType:
return None

_parameters = dict()

while pos < len(str_input):
pos += 1
_, pos = ref.collect_code_points(str_input, ['\u000A', '\u000D', '\u0009', '\u0020'], pos)
_, pos = ref.collect_code_points(str_input, ['\u000A', '\u000D', '\u0009', '\u0020'], pos, exclusion=False)
_parameter_name, pos = ref.collect_code_points(str_input, [';', '='], pos)
_parameter_name = _parameter_name.lower()

if len(str_input) <= pos:
break
if str_input[pos] == ';':
Expand All @@ -74,7 +74,7 @@ def parse_mime_type(str_input: str) -> MIMEType:
if _parameter_value == '':
continue

if (_parameter_name == '' and terminology.check_http_token_code_points(_parameter_name)
if (_parameter_name != '' and terminology.check_http_token_code_points(_parameter_name)
and terminology.check_http_quoted_string_token_code_points(_parameter_value)
and _parameter_name not in _parameters):
_parameters[_parameter_name] = _parameter_value
Expand Down
34 changes: 31 additions & 3 deletions sniffpy/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,40 @@ def check_condition(code_point: str, condition: List[str]) -> bool:
return True
return False

def collect_code_points(str_input: str, condition: List[str], pos: int) -> (str, int): #Implements: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
def collect_code_points(str_input: str, condition: List[str], pos: int, exclusion: bool = True) -> (str, int): #Implements: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
result = []
while pos != len(str_input) and not check_condition(str_input[pos], condition):
while pos != len(str_input) and check_condition(str_input[pos], condition) ^ exclusion:
result.append(str_input[pos])
pos += 1
return ''.join(result), pos

def collect_http_quoted_string(str_input: str, pos: int, exact_value: bool = False) -> (str, int):
raise NotImplementedError
position_start = pos
value = ""
assert str_input[pos] == '"'
pos += 1

while True:
append_value, pos = collect_code_points(str_input, ['"', '\\'], pos)
value += append_value

if len(str_input) <= pos:
break

quote_or_backslash = str_input[pos]
pos += 1

if quote_or_backslash == '\\':
if len(str_input) <= pos:
val += "\\"
break
value += str_input[pos]
pos += 1
else:
assert quote_or_backslash == '"'
break

if exact_value:
return value, pos

return str_input[position_start:pos+1], pos
9 changes: 8 additions & 1 deletion sniffpy/terminology.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import re

def check_http_quoted_string_token_code_points(str_input: str) -> bool:
raise NotImplementedError

for char in str_input:
inRange1 = u'\u0009' <= char <= u'\u007e'
inRange2 = u'\u0080' <= char <= u'\u00ff'
if not (char == u'\u0009' or inRange1 or inRange2):
return False

return True

def check_http_token_code_points(str_input: str) -> bool:
reg = re.compile(r'^[a-zA-Z0-9\!#\$%&\'\*\+-\.\^_`\|~]+$')
Expand Down
25 changes: 25 additions & 0 deletions tests/test_mimetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,31 @@ def test_parse_mime_type_basic():

test_string = "text/html"
mimetype_obj = mimetype.parse_mime_type(test_string)

assert mimetype_obj.type == "text"
assert mimetype_obj.subtype == "html"

def test_parse_mime_type_single_unquoted_parameter():
"""Tests whether parsing works with a single unquoted parameter"""

test_string = "text/html;charset=ISO-8859-1"
mimetype_obj = mimetype.parse_mime_type(test_string)

assert mimetype_obj.type == "text"
assert mimetype_obj.subtype == "html"
assert len(mimetype_obj.parameters) == 1
assert "charset" in mimetype_obj.parameters
assert mimetype_obj.parameters['charset'] == "ISO-8859-1"

def test_parse_mime_type_single_quoted_parameter():
"""Tests whether parsing works with a single quoted parameter"""

test_string = 'text/html;charset="shift_jis"iso-2022-jpi'
mimetype_obj = mimetype.parse_mime_type(test_string)

assert mimetype_obj.type == "text"
assert mimetype_obj.subtype == "html"
assert len(mimetype_obj.parameters) == 1
assert "charset" in mimetype_obj.parameters
assert mimetype_obj.parameters['charset'] == "shift_jis"