codeprentice-org · asifmallik · Aug 3, 2020 · Aug 3, 2020 · Aug 3, 2020 · Aug 3, 2020
diff --git a/sniffpy/mimetype.py b/sniffpy/mimetype.py
@@ -33,6 +33,8 @@ def is_image(self) -> bool:
     def is_video_audio(self) -> bool:
         return self.type == "audio" or self.type == "video" or self.essence() == "application/ogg"
 
+    def __eq__(self, obj):
+        return self.type == obj.type and self.subtype == obj.subtype and self.parameters == obj.parameters
 
 def parse_mime_type(str_input: str) -> MIMEType:
     str_input = str_input.strip() #might have to specify HTTP whitespace characters
@@ -52,13 +54,11 @@ def parse_mime_type(str_input: str) -> MIMEType:
         return None
 
     _parameters = dict()
-
     while pos < len(str_input):
         pos += 1
-        _, pos = ref.collect_code_points(str_input, ['\u000A', '\u000D', '\u0009', '\u0020'], pos)
+        _, pos = ref.collect_code_points(str_input, ['\u000A', '\u000D', '\u0009', '\u0020'], pos, exclusion=False)
         _parameter_name, pos = ref.collect_code_points(str_input, [';', '='], pos)
         _parameter_name = _parameter_name.lower()
-
         if len(str_input) <= pos:
             break
         if  str_input[pos] == ';':
@@ -74,7 +74,7 @@ def parse_mime_type(str_input: str) -> MIMEType:
             if _parameter_value == '':
                 continue
 
-        if (_parameter_name == '' and terminology.check_http_token_code_points(_parameter_name)
+        if (_parameter_name != '' and terminology.check_http_token_code_points(_parameter_name)
                 and terminology.check_http_quoted_string_token_code_points(_parameter_value)
                 and _parameter_name not in _parameters):
             _parameters[_parameter_name] = _parameter_value

diff --git a/sniffpy/ref.py b/sniffpy/ref.py
@@ -6,12 +6,40 @@ def check_condition(code_point: str, condition: List[str]) -> bool:
             return True
     return False
 
-def collect_code_points(str_input: str, condition: List[str], pos: int) -> (str, int): #Implements: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
+def collect_code_points(str_input: str, condition: List[str], pos: int, exclusion: bool = True) -> (str, int): #Implements: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
     result = []
-    while pos != len(str_input) and not check_condition(str_input[pos], condition):
+    while pos != len(str_input) and check_condition(str_input[pos], condition) ^ exclusion:
         result.append(str_input[pos])
         pos += 1
     return ''.join(result), pos
 
 def collect_http_quoted_string(str_input: str, pos: int, exact_value: bool = False) -> (str, int):
-    raise NotImplementedError
+    position_start = pos
+    value = ""
+    assert str_input[pos] == '"'
+    pos += 1
+
+    while True:
+        append_value, pos = collect_code_points(str_input, ['"', '\\'], pos)
+        value += append_value
+
+        if len(str_input) <= pos:
+            break
+
+        quote_or_backslash = str_input[pos]
+        pos += 1
+
+        if quote_or_backslash == '\\':
+            if len(str_input) <= pos:
+                val += "\\"
+                break
+            value += str_input[pos]
+            pos += 1
+        else:
+            assert quote_or_backslash == '"'
+            break
+
+    if exact_value:
+        return value, pos
+
+    return str_input[position_start:pos+1], pos
diff --git a/sniffpy/terminology.py b/sniffpy/terminology.py
@@ -1,7 +1,14 @@
 import re
 
 def check_http_quoted_string_token_code_points(str_input: str) -> bool:
-    raise NotImplementedError
+
+    for char in str_input:
+        inRange1 = u'\u0009' <= char <= u'\u007e'
+        inRange2 = u'\u0080' <= char <= u'\u00ff' 
+        if not (char == u'\u0009' or inRange1 or inRange2):
+            return False
+
+    return True
 
 def check_http_token_code_points(str_input: str) -> bool:
     reg = re.compile(r'^[a-zA-Z0-9\!#\$%&\'\*\+-\.\^_`\|~]+$')

diff --git a/tests/test_mimetype.py b/tests/test_mimetype.py
@@ -7,6 +7,31 @@ def test_parse_mime_type_basic():
 
     test_string = "text/html"
     mimetype_obj = mimetype.parse_mime_type(test_string)
+
+    assert mimetype_obj.type == "text"
+    assert mimetype_obj.subtype == "html"
+
+def test_parse_mime_type_single_unquoted_parameter():
+    """Tests whether parsing works with a single unquoted parameter"""
 
+    test_string = "text/html;charset=ISO-8859-1"
+    mimetype_obj = mimetype.parse_mime_type(test_string)
+
     assert mimetype_obj.type == "text"
     assert mimetype_obj.subtype == "html"
+    assert len(mimetype_obj.parameters) == 1 
+    assert "charset" in mimetype_obj.parameters
+    assert mimetype_obj.parameters['charset'] == "ISO-8859-1"
+
+def test_parse_mime_type_single_quoted_parameter():
+    """Tests whether parsing works with a single quoted parameter"""
+
+    test_string = 'text/html;charset="shift_jis"iso-2022-jpi'
+    mimetype_obj = mimetype.parse_mime_type(test_string)
+
+    assert mimetype_obj.type == "text"
+    assert mimetype_obj.subtype == "html"
+    assert len(mimetype_obj.parameters) == 1 
+    assert "charset" in mimetype_obj.parameters
+    assert mimetype_obj.parameters['charset'] == "shift_jis"
+