Do not strip leading slash from path (benoitc#1511)

Fixes benoitc#1512
mjjbell · Dec 28, 2017 · 69efb73 · 69efb73
1 parent d7cc56e
commit 69efb73
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 24 deletions.
diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py
@@ -15,7 +15,7 @@
     LimitRequestLine, LimitRequestHeaders)
 from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
 from gunicorn.six import BytesIO
-from gunicorn._compat import urlsplit
+from gunicorn.util import split_request_uri
 
 MAX_REQUEST_LINE = 8190
 MAX_HEADERS = 32768
@@ -312,18 +312,10 @@ def parse_request_line(self, line_bytes):
         self.method = bits[0].upper()
 
         # URI
-        # When the path starts with //, urlsplit considers it as a
-        # relative uri while the RDF says it shouldnt
-        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
-        # considers it as an absolute url.
-        # fix issue #297
-        if bits[1].startswith("//"):
-            self.uri = bits[1][1:]
-        else:
-            self.uri = bits[1]
+        self.uri = bits[1]
 
         try:
-            parts = urlsplit(self.uri)
+            parts = split_request_uri(self.uri)
         except ValueError:
             raise InvalidRequestLine(bytes_to_str(line_bytes))
         self.path = parts.path or ""

diff --git a/gunicorn/util.py b/gunicorn/util.py
@@ -543,3 +543,15 @@ def app(environ, start_response):
         return [msg]
 
     return app
+
+
+def split_request_uri(uri):
+    if uri.startswith("//"):
+        # When the path starts with //, urlsplit considers it as a
+        # relative uri while the RFC says we should consider it as abs_path
+        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
+        # We use temporary dot prefix to workaround this behaviour
+        parts = _compat.urlsplit("." + uri)
+        return parts._replace(path=parts.path[1:])
+
+    return _compat.urlsplit(uri)
diff --git a/tests/requests/invalid/016.http b/tests/requests/invalid/016.http
@@ -1,2 +1,2 @@
-PUT ///]ufd/: HTTP/1.1\r\n
-\r\n
+PUT s://]ufd/: HTTP/1.1\r\n
+\r\n
diff --git a/tests/requests/valid/100.http b/tests/requests/valid/100.http
@@ -0,0 +1,2 @@
+GET ///keeping_slashes HTTP/1.1\r\n
+\r\n
diff --git a/tests/requests/valid/100.py b/tests/requests/valid/100.py
@@ -0,0 +1,7 @@
+request = {
+    "method": "GET",
+    "uri": uri("///keeping_slashes"),
+    "version": (1, 1),
+    "headers": [],
+    "body": b""
+}
diff --git a/tests/test_util.py b/tests/test_util.py
@@ -7,6 +7,7 @@
 
 from gunicorn import util
 from gunicorn.errors import AppImportError
+from gunicorn.six.moves.urllib.parse import SplitResult
 
 
 @pytest.mark.parametrize('test_input, expected', [
@@ -69,3 +70,19 @@ def test_to_bytestring():
         util.to_bytestring(100)
     msg = '100 is not a string'
     assert msg in str(err)
+
+
+@pytest.mark.parametrize('test_input, expected', [
+    ('https://example.org/a/b?c=1#d',
+     SplitResult(scheme='https', netloc='example.org', path='/a/b', query='c=1', fragment='d')),
+    ('a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='a/b', query='c=1', fragment='d')),
+    ('/a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='/a/b', query='c=1', fragment='d')),
+    ('//a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='//a/b', query='c=1', fragment='d')),
+    ('///a/b?c=1#d',
+     SplitResult(scheme='', netloc='', path='///a/b', query='c=1', fragment='d')),
+])
+def test_split_request_uri(test_input, expected):
+    assert util.split_request_uri(test_input) == expected
diff --git a/tests/treq.py b/tests/treq.py
@@ -10,7 +10,7 @@
 from gunicorn._compat import execfile_
 from gunicorn.config import Config
 from gunicorn.http.parser import RequestParser
-from gunicorn.six.moves.urllib.parse import urlparse
+from gunicorn.util import split_request_uri
 from gunicorn import six
 
 dirname = os.path.dirname(__file__)
@@ -19,19 +19,11 @@
 
 def uri(data):
     ret = {"raw": data}
-    parts = urlparse(data)
+    parts = split_request_uri(data)
     ret["scheme"] = parts.scheme or ''
     ret["host"] = parts.netloc.rsplit(":", 1)[0] or None
     ret["port"] = parts.port or 80
-    if parts.path and parts.params:
-        ret["path"] = ";".join([parts.path, parts.params])
-    elif parts.path:
-        ret["path"] = parts.path
-    elif parts.params:
-        # Don't think this can happen
-        ret["path"] = ";" + parts.path
-    else:
-        ret["path"] = ''
+    ret["path"] = parts.path or ''
     ret["query"] = parts.query or ''
     ret["fragment"] = parts.fragment or ''
     return ret