-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve joining of url paths in some situations
* Use url.JoinPath to join URLs from a few places. * Add util/joinpath.go from go 1.19, add the license in REUSE 3.0 compatible manner. resolve #223 Co-authored-by: Bernhard Reiter <bernhard@intevation.de>
- Loading branch information
1 parent
324de3a
commit 9cba4ee
Showing
4 changed files
with
388 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
Copyright (c) 2009 The Go Authors. All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are | ||
met: | ||
|
||
* Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
* Redistributions in binary form must reproduce the above | ||
copyright notice, this list of conditions and the following disclaimer | ||
in the documentation and/or other materials provided with the | ||
distribution. | ||
* Neither the name of Google Inc. nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
|
||
Additional IP Rights Grant (Patents) | ||
|
||
"This implementation" means the copyrightable works distributed by | ||
Google as part of the Go project. | ||
|
||
Google hereby grants to You a perpetual, worldwide, non-exclusive, | ||
no-charge, royalty-free, irrevocable (except as stated in this section) | ||
patent license to make, have made, use, offer to sell, sell, import, | ||
transfer and otherwise run, modify and propagate the contents of this | ||
implementation of Go, where such license applies only to those patent | ||
claims, both currently owned or controlled by Google and acquired in | ||
the future, licensable by Google that are necessarily infringed by this | ||
implementation of Go. This grant does not include claims that would be | ||
infringed only as a consequence of further modification of this | ||
implementation. If you or your agent or exclusive licensee institute or | ||
order or agree to the institution of patent litigation against any | ||
entity (including a cross-claim or counterclaim in a lawsuit) alleging | ||
that this implementation of Go or any code incorporated within this | ||
implementation of Go constitutes direct or contributory patent | ||
infringement, or inducement of patent infringement, then any patent | ||
rights granted to you under this License for this implementation of Go | ||
shall terminate as of the date such litigation is filed. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,301 @@ | ||
// SPDX-License-Identifier: LicenseRef-Go119-BSD-Patentgrant | ||
// SPDX-FileCopyrightText: 2009 The Go Authors, Google Inc. | ||
|
||
// The code of this file was extracted and adjusted from | ||
// https://cs.opensource.google/go/go/+/refs/tags/go1.19rc2:src/net/url/url.go | ||
// by Intevation 2022 | ||
|
||
//go:build !go1.19 | ||
|
||
package util | ||
|
||
import ( | ||
"net/url" | ||
"path" | ||
"strings" | ||
) | ||
|
||
type encoding int | ||
|
||
const ( | ||
encodePath encoding = 1 + iota | ||
encodePathSegment | ||
encodeHost | ||
encodeZone | ||
encodeUserPassword | ||
encodeQueryComponent | ||
encodeFragment | ||
) | ||
|
||
const upperhex = "0123456789ABCDEF" | ||
|
||
func ishex(c byte) bool { | ||
switch { | ||
case '0' <= c && c <= '9': | ||
return true | ||
case 'a' <= c && c <= 'f': | ||
return true | ||
case 'A' <= c && c <= 'F': | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
func unhex(c byte) byte { | ||
switch { | ||
case '0' <= c && c <= '9': | ||
return c - '0' | ||
case 'a' <= c && c <= 'f': | ||
return c - 'a' + 10 | ||
case 'A' <= c && c <= 'F': | ||
return c - 'A' + 10 | ||
} | ||
return 0 | ||
} | ||
|
||
// Return true if the specified character should be escaped when | ||
// appearing in a URL string, according to RFC 3986. | ||
// | ||
// Please be informed that for now shouldEscape does not check all | ||
// reserved characters correctly. See golang.org/issue/5684. | ||
func shouldEscape(c byte, mode encoding) bool { | ||
// §2.3 Unreserved characters (alphanum) | ||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { | ||
return false | ||
} | ||
|
||
if mode == encodeHost || mode == encodeZone { | ||
// §3.2.2 Host allows | ||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" | ||
// as part of reg-name. | ||
// We add : because we include :port as part of host. | ||
// We add [ ] because we include [ipv6]:port as part of host. | ||
// We add < > because they're the only characters left that | ||
// we could possibly allow, and Parse will reject them if we | ||
// escape them (because hosts can't use %-encoding for | ||
// ASCII bytes). | ||
switch c { | ||
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': | ||
return false | ||
} | ||
} | ||
|
||
switch c { | ||
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) | ||
return false | ||
|
||
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) | ||
// Different sections of the URL allow a few of | ||
// the reserved characters to appear unescaped. | ||
switch mode { | ||
case encodePath: // §3.3 | ||
// The RFC allows : @ & = + $ but saves / ; , for assigning | ||
// meaning to individual path segments. This package | ||
// only manipulates the path as a whole, so we allow those | ||
// last three as well. That leaves only ? to escape. | ||
return c == '?' | ||
|
||
case encodePathSegment: // §3.3 | ||
// The RFC allows : @ & = + $ but saves / ; , for assigning | ||
// meaning to individual path segments. | ||
return c == '/' || c == ';' || c == ',' || c == '?' | ||
|
||
case encodeUserPassword: // §3.2.1 | ||
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in | ||
// userinfo, so we must escape only '@', '/', and '?'. | ||
// The parsing of userinfo treats ':' as special so we must escape | ||
// that too. | ||
return c == '@' || c == '/' || c == '?' || c == ':' | ||
|
||
case encodeQueryComponent: // §3.4 | ||
// The RFC reserves (so we must escape) everything. | ||
return true | ||
|
||
case encodeFragment: // §4.1 | ||
// The RFC text is silent but the grammar allows | ||
// everything, so escape nothing. | ||
return false | ||
} | ||
} | ||
|
||
if mode == encodeFragment { | ||
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are | ||
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not | ||
// need to be escaped. To minimize potential breakage, we apply two restrictions: | ||
// (1) we always escape sub-delims outside of the fragment, and (2) we always | ||
// escape single quote to avoid breaking callers that had previously assumed that | ||
// single quotes would be escaped. See issue #19917. | ||
switch c { | ||
case '!', '(', ')', '*': | ||
return false | ||
} | ||
} | ||
|
||
// Everything else must be escaped. | ||
return true | ||
} | ||
|
||
// unescape unescapes a string; the mode specifies | ||
// which section of the URL string is being unescaped. | ||
func unescape(s string, mode encoding) (string, error) { | ||
// Count %, check that they're well-formed. | ||
n := 0 | ||
hasPlus := false | ||
for i := 0; i < len(s); { | ||
switch s[i] { | ||
case '%': | ||
n++ | ||
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { | ||
s = s[i:] | ||
if len(s) > 3 { | ||
s = s[:3] | ||
} | ||
return "", url.EscapeError(s) | ||
} | ||
// Per https://tools.ietf.org/html/rfc3986#page-21 | ||
// in the host component %-encoding can only be used | ||
// for non-ASCII bytes. | ||
// But https://tools.ietf.org/html/rfc6874#section-2 | ||
// introduces %25 being allowed to escape a percent sign | ||
// in IPv6 scoped-address literals. Yay. | ||
if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" { | ||
return "", url.EscapeError(s[i : i+3]) | ||
} | ||
if mode == encodeZone { | ||
// RFC 6874 says basically "anything goes" for zone identifiers | ||
// and that even non-ASCII can be redundantly escaped, | ||
// but it seems prudent to restrict %-escaped bytes here to those | ||
// that are valid host name bytes in their unescaped form. | ||
// That is, you can use escaping in the zone identifier but not | ||
// to introduce bytes you couldn't just write directly. | ||
// But Windows puts spaces here! Yay. | ||
v := unhex(s[i+1])<<4 | unhex(s[i+2]) | ||
if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) { | ||
return "", url.EscapeError(s[i : i+3]) | ||
} | ||
} | ||
i += 3 | ||
case '+': | ||
hasPlus = mode == encodeQueryComponent | ||
i++ | ||
default: | ||
if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) { | ||
return "", url.InvalidHostError(s[i : i+1]) | ||
} | ||
i++ | ||
} | ||
} | ||
|
||
if n == 0 && !hasPlus { | ||
return s, nil | ||
} | ||
|
||
var t strings.Builder | ||
t.Grow(len(s) - 2*n) | ||
for i := 0; i < len(s); i++ { | ||
switch s[i] { | ||
case '%': | ||
t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) | ||
i += 2 | ||
case '+': | ||
if mode == encodeQueryComponent { | ||
t.WriteByte(' ') | ||
} else { | ||
t.WriteByte('+') | ||
} | ||
default: | ||
t.WriteByte(s[i]) | ||
} | ||
} | ||
return t.String(), nil | ||
} | ||
|
||
func escape(s string, mode encoding) string { | ||
spaceCount, hexCount := 0, 0 | ||
for i := 0; i < len(s); i++ { | ||
c := s[i] | ||
if shouldEscape(c, mode) { | ||
if c == ' ' && mode == encodeQueryComponent { | ||
spaceCount++ | ||
} else { | ||
hexCount++ | ||
} | ||
} | ||
} | ||
|
||
if spaceCount == 0 && hexCount == 0 { | ||
return s | ||
} | ||
|
||
var buf [64]byte | ||
var t []byte | ||
|
||
required := len(s) + 2*hexCount | ||
if required <= len(buf) { | ||
t = buf[:required] | ||
} else { | ||
t = make([]byte, required) | ||
} | ||
|
||
if hexCount == 0 { | ||
copy(t, s) | ||
for i := 0; i < len(s); i++ { | ||
if s[i] == ' ' { | ||
t[i] = '+' | ||
} | ||
} | ||
return string(t) | ||
} | ||
|
||
j := 0 | ||
for i := 0; i < len(s); i++ { | ||
switch c := s[i]; { | ||
case c == ' ' && mode == encodeQueryComponent: | ||
t[j] = '+' | ||
j++ | ||
case shouldEscape(c, mode): | ||
t[j] = '%' | ||
t[j+1] = upperhex[c>>4] | ||
t[j+2] = upperhex[c&15] | ||
j += 3 | ||
default: | ||
t[j] = s[i] | ||
j++ | ||
} | ||
} | ||
return string(t) | ||
} | ||
|
||
func setPath(u *url.URL, p string) error { | ||
path, err := unescape(p, encodePath) | ||
if err != nil { | ||
return err | ||
} | ||
u.Path = path | ||
if escp := escape(path, encodePath); p == escp { | ||
// Default encoding is fine. | ||
u.RawPath = "" | ||
} else { | ||
u.RawPath = p | ||
} | ||
return nil | ||
} | ||
|
||
// JoinURLPath returns a new URL with the provided path elements joined to | ||
// any existing path and the resulting path cleaned of any ./ or ../ elements. | ||
// Any sequences of multiple / characters will be reduced to a single /. | ||
func JoinURLPath(u *url.URL, elem ...string) *url.URL { | ||
|
||
url := *u | ||
if len(elem) > 0 { | ||
elem = append([]string{u.EscapedPath()}, elem...) | ||
p := path.Join(elem...) | ||
// path.Join will remove any trailing slashes. | ||
// Preserve at least one. | ||
if strings.HasSuffix(elem[len(elem)-1], "/") && !strings.HasSuffix(p, "/") { | ||
p += "/" | ||
} | ||
setPath(&url, p) | ||
} | ||
return &url | ||
} |
Oops, something went wrong.