Skip to content

Commit

Permalink
Improve joining of url paths in some situations
Browse files Browse the repository at this point in the history
* Use url.JoinPath to join URLs from a few places.
* Add util/joinpath.go from go 1.19, add the license in REUSE 3.0 compatible manner.

resolve #223

Co-authored-by: Bernhard Reiter <bernhard@intevation.de>
  • Loading branch information
s-l-teichmann and bernhardreiter authored Jul 18, 2022
1 parent 324de3a commit 9cba4ee
Show file tree
Hide file tree
Showing 4 changed files with 388 additions and 3 deletions.
51 changes: 51 additions & 0 deletions LICENSES/LicenseRef-Go119-BSD-Patentgrant.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Copyright (c) 2009 The Go Authors. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


Additional IP Rights Grant (Patents)

"This implementation" means the copyrightable works distributed by
Google as part of the Go project.

Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.
19 changes: 16 additions & 3 deletions cmd/csaf_checker/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,13 @@ func (p *processor) processROLIEFeed(feed string) error {
func (p *processor) checkIndex(base string, mask whereType) error {
client := p.httpClient()

index := base + "/index.txt"
bu, err := url.Parse(base)
if err != nil {
return err
}

index := util.JoinURLPath(bu, "index.txt").String()

p.checkTLS(index)

p.badIndices.use()
Expand Down Expand Up @@ -680,9 +686,16 @@ func (p *processor) checkIndex(base string, mask whereType) error {
// of the fields' values and if they are sorted properly. Then it passes the files to the
// "integrity" functions. It returns error if some test fails, otherwise nil.
func (p *processor) checkChanges(base string, mask whereType) error {
client := p.httpClient()
changes := base + "/changes.csv"

bu, err := url.Parse(base)
if err != nil {
return err
}
changes := util.JoinURLPath(bu, "changes.csv").String()

p.checkTLS(changes)

client := p.httpClient()
res, err := client.Get(changes)

p.badChanges.use()
Expand Down
301 changes: 301 additions & 0 deletions util/joinpath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
// SPDX-License-Identifier: LicenseRef-Go119-BSD-Patentgrant
// SPDX-FileCopyrightText: 2009 The Go Authors, Google Inc.

// The code of this file was extracted and adjusted from
// https://cs.opensource.google/go/go/+/refs/tags/go1.19rc2:src/net/url/url.go
// by Intevation 2022

//go:build !go1.19

package util

import (
"net/url"
"path"
"strings"
)

type encoding int

const (
encodePath encoding = 1 + iota
encodePathSegment
encodeHost
encodeZone
encodeUserPassword
encodeQueryComponent
encodeFragment
)

const upperhex = "0123456789ABCDEF"

func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}

func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0
}

// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
//
// Please be informed that for now shouldEscape does not check all
// reserved characters correctly. See golang.org/issue/5684.
func shouldEscape(c byte, mode encoding) bool {
// §2.3 Unreserved characters (alphanum)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
return false
}

if mode == encodeHost || mode == encodeZone {
// §3.2.2 Host allows
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
// as part of reg-name.
// We add : because we include :port as part of host.
// We add [ ] because we include [ipv6]:port as part of host.
// We add < > because they're the only characters left that
// we could possibly allow, and Parse will reject them if we
// escape them (because hosts can't use %-encoding for
// ASCII bytes).
switch c {
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
return false
}
}

switch c {
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
return false

case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch mode {
case encodePath: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. This package
// only manipulates the path as a whole, so we allow those
// last three as well. That leaves only ? to escape.
return c == '?'

case encodePathSegment: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments.
return c == '/' || c == ';' || c == ',' || c == '?'

case encodeUserPassword: // §3.2.1
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
// userinfo, so we must escape only '@', '/', and '?'.
// The parsing of userinfo treats ':' as special so we must escape
// that too.
return c == '@' || c == '/' || c == '?' || c == ':'

case encodeQueryComponent: // §3.4
// The RFC reserves (so we must escape) everything.
return true

case encodeFragment: // §4.1
// The RFC text is silent but the grammar allows
// everything, so escape nothing.
return false
}
}

if mode == encodeFragment {
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
// need to be escaped. To minimize potential breakage, we apply two restrictions:
// (1) we always escape sub-delims outside of the fragment, and (2) we always
// escape single quote to avoid breaking callers that had previously assumed that
// single quotes would be escaped. See issue #19917.
switch c {
case '!', '(', ')', '*':
return false
}
}

// Everything else must be escaped.
return true
}

// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
// Count %, check that they're well-formed.
n := 0
hasPlus := false
for i := 0; i < len(s); {
switch s[i] {
case '%':
n++
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
s = s[i:]
if len(s) > 3 {
s = s[:3]
}
return "", url.EscapeError(s)
}
// Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used
// for non-ASCII bytes.
// But https://tools.ietf.org/html/rfc6874#section-2
// introduces %25 being allowed to escape a percent sign
// in IPv6 scoped-address literals. Yay.
if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
return "", url.EscapeError(s[i : i+3])
}
if mode == encodeZone {
// RFC 6874 says basically "anything goes" for zone identifiers
// and that even non-ASCII can be redundantly escaped,
// but it seems prudent to restrict %-escaped bytes here to those
// that are valid host name bytes in their unescaped form.
// That is, you can use escaping in the zone identifier but not
// to introduce bytes you couldn't just write directly.
// But Windows puts spaces here! Yay.
v := unhex(s[i+1])<<4 | unhex(s[i+2])
if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
return "", url.EscapeError(s[i : i+3])
}
}
i += 3
case '+':
hasPlus = mode == encodeQueryComponent
i++
default:
if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
return "", url.InvalidHostError(s[i : i+1])
}
i++
}
}

if n == 0 && !hasPlus {
return s, nil
}

var t strings.Builder
t.Grow(len(s) - 2*n)
for i := 0; i < len(s); i++ {
switch s[i] {
case '%':
t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
i += 2
case '+':
if mode == encodeQueryComponent {
t.WriteByte(' ')
} else {
t.WriteByte('+')
}
default:
t.WriteByte(s[i])
}
}
return t.String(), nil
}

func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c, mode) {
if c == ' ' && mode == encodeQueryComponent {
spaceCount++
} else {
hexCount++
}
}
}

if spaceCount == 0 && hexCount == 0 {
return s
}

var buf [64]byte
var t []byte

required := len(s) + 2*hexCount
if required <= len(buf) {
t = buf[:required]
} else {
t = make([]byte, required)
}

if hexCount == 0 {
copy(t, s)
for i := 0; i < len(s); i++ {
if s[i] == ' ' {
t[i] = '+'
}
}
return string(t)
}

j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ' && mode == encodeQueryComponent:
t[j] = '+'
j++
case shouldEscape(c, mode):
t[j] = '%'
t[j+1] = upperhex[c>>4]
t[j+2] = upperhex[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}

func setPath(u *url.URL, p string) error {
path, err := unescape(p, encodePath)
if err != nil {
return err
}
u.Path = path
if escp := escape(path, encodePath); p == escp {
// Default encoding is fine.
u.RawPath = ""
} else {
u.RawPath = p
}
return nil
}

// JoinURLPath returns a new URL with the provided path elements joined to
// any existing path and the resulting path cleaned of any ./ or ../ elements.
// Any sequences of multiple / characters will be reduced to a single /.
func JoinURLPath(u *url.URL, elem ...string) *url.URL {

url := *u
if len(elem) > 0 {
elem = append([]string{u.EscapedPath()}, elem...)
p := path.Join(elem...)
// path.Join will remove any trailing slashes.
// Preserve at least one.
if strings.HasSuffix(elem[len(elem)-1], "/") && !strings.HasSuffix(p, "/") {
p += "/"
}
setPath(&url, p)
}
return &url
}
Loading

0 comments on commit 9cba4ee

Please sign in to comment.