Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a more general (and faster) method to sanitize URLs with credentials #19239

Merged
merged 16 commits into from
Mar 31, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/migrations/v180.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func removeCredentials(payload string) (string, error) {

opts.AuthPassword = ""
opts.AuthToken = ""
opts.CloneAddr = util.NewStringURLSanitizer(opts.CloneAddr, true).Replace(opts.CloneAddr)
opts.CloneAddr = util.SanitizeCredentialURLs(opts.CloneAddr)

confBytes, err := json.Marshal(opts)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion models/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func FinishMigrateTask(task *Task) error {
}
conf.AuthPassword = ""
conf.AuthToken = ""
conf.CloneAddr = util.NewStringURLSanitizer(conf.CloneAddr, true).Replace(conf.CloneAddr)
conf.CloneAddr = util.SanitizeCredentialURLs(conf.CloneAddr)
conf.AuthPasswordEncrypted = ""
conf.AuthTokenEncrypted = ""
conf.CloneAddrEncrypted = ""
Expand Down
2 changes: 1 addition & 1 deletion modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func (c *Command) RunWithContext(rc *RunContext) error {
args = make([]string, len(c.args))
copy(args, c.args)
for _, urlArgIndex := range argSensitiveURLIndexes {
args[urlArgIndex] = util.NewStringURLSanitizer(args[urlArgIndex], true).Replace(args[urlArgIndex])
args[urlArgIndex] = util.SanitizeCredentialURLs(args[urlArgIndex])
}
}
desc = fmt.Sprintf("%s %s [repo_path: %s]", c.name, strings.Join(args, " "), rc.Dir)
Expand Down
4 changes: 2 additions & 2 deletions modules/git/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func CloneWithArgs(ctx context.Context, from, to string, args []string, opts Clo
cmd.AddArguments("--", from, to)

if strings.Contains(from, "://") && strings.Contains(from, "@") {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.NewStringURLSanitizer(from, true).Replace(from), to, opts.Shared, opts.Mirror, opts.Depth))
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.SanitizeCredentialURLs(from), to, opts.Shared, opts.Mirror, opts.Depth))
} else {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, from, to, opts.Shared, opts.Mirror, opts.Depth))
}
Expand Down Expand Up @@ -209,7 +209,7 @@ func Push(ctx context.Context, repoPath string, opts PushOptions) error {
cmd.AddArguments(opts.Branch)
}
if strings.Contains(opts.Remote, "://") && strings.Contains(opts.Remote, "@") {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.NewStringURLSanitizer(opts.Remote, true).Replace(opts.Remote), opts.Force, opts.Mirror))
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.SanitizeCredentialURLs(opts.Remote), opts.Force, opts.Mirror))
} else {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, opts.Remote, opts.Force, opts.Mirror))
}
Expand Down
86 changes: 48 additions & 38 deletions modules/util/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,69 @@
package util

import (
"net/url"
"strings"
)
"bytes"

const (
userPlaceholder = "sanitized-credential"
unparsableURL = "(unparsable url)"
"github.com/yuin/goldmark/util"
)

type sanitizedError struct {
err error
replacer *strings.Replacer
err error
}

func (err sanitizedError) Error() string {
return err.replacer.Replace(err.err.Error())
return SanitizeCredentialURLs(err.err.Error())
}

// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text.
func NewSanitizedError(err error, oldnew ...string) error {
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)}
func (err sanitizedError) Unwrap() error {
return err.err
}

// NewURLSanitizedError wraps an error and replaces the url credential or removes them.
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)}
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
func SanitizeErrorCredentialURLs(err error) error {
return sanitizedError{err: err}
}

// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them.
// If the url can't get parsed it gets replaced with a placeholder string.
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)}
}
const userPlaceholder = "sanitized-credential"

// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed.
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer {
old := u.String()
var schemeSep = []byte("://")

if u.User != nil && usePlaceholder {
u.User = url.User(userPlaceholder)
} else {
u.User = nil
// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code only removes the credential from the first URL it finds in the string

If we want to strip out credentials from all urls in the string we're gonna need to do something else. (Possibly iterate across all "://" in the string however see below...)

One other issue is that there are URLs which do not have the ":", for example //username:password@www.google.com which says use the current protocol. I think /\username:password@www.google.com also works.

IMO I don't think we need to necessarily sanitize these - we should probably just make it clear that we're not going to do this.

Copy link
Contributor Author

@wxiaoguang wxiaoguang Mar 30, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I will add it to comment. IMO "//xxxx" couldn't be treated as a valid URL in strings. It's only valid in a context with scheme already (say, inside a browser)

And /\ is not standard, it's just a browser's dirty hack. Golang's url.Parse doesn't support /\ either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"This code only removes the credential from the first URL it finds in the string"

No, it removes all. See the test case

		{
			"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
			"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
		},

func SanitizeCredentialURLs(s string) string {
bs := util.StringToReadOnlyBytes(s)
schemeSepPos := bytes.Index(bs, schemeSep)
if schemeSepPos == -1 || bytes.IndexByte(bs, '@') == -1 {
wxiaoguang marked this conversation as resolved.
Show resolved Hide resolved
return s // fast return if there is no URL scheme or no userinfo
}
return strings.NewReplacer(old, u.String())
}

// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed.
// If the url can't get parsed it gets replaced with a placeholder string
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer {
u, err := url.Parse(unsanitizedURL)
if err != nil {
// don't log the error, since it might contain unsanitized URL.
return strings.NewReplacer(unsanitizedURL, unparsableURL)
out := make([]byte, 0, len(bs)+len(userPlaceholder))
for schemeSepPos != -1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop:
for ; sepEndPos < len(bs); sepEndPos++ {
c := bs[sepEndPos]
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
continue
}
switch c {
case '@':
sepAtPos = sepEndPos
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default:
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
}
}
if sepAtPos != -1 {
out = append(out, bs[:schemeSepPos]...)
out = append(out, userPlaceholder...)
out = append(out, bs[sepAtPos:sepEndPos]...)
} else {
out = append(out, bs[:sepEndPos]...)
}
bs = bs[sepEndPos:]
schemeSepPos = bytes.Index(bs, schemeSep)
}
return NewURLSanitizer(u, usePlaceholder)
out = append(out, bs...)
return util.BytesToReadOnlyString(out)
}
139 changes: 19 additions & 120 deletions modules/util/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,154 +11,53 @@ import (
"github.com/stretchr/testify/assert"
)

func TestNewSanitizedError(t *testing.T) {
err := errors.New("error while secret on test")
err2 := NewSanitizedError(err)
assert.Equal(t, err.Error(), err2.Error())

cases := []struct {
input error
oldnew []string
expected string
}{
// case 0
{
errors.New("error while secret on test"),
[]string{"secret", "replaced"},
"error while replaced on test",
},
// case 1
{
errors.New("error while sec-ret on test"),
[]string{"secret", "replaced"},
"error while sec-ret on test",
},
}

for n, c := range cases {
err := NewSanitizedError(c.input, c.oldnew...)

assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n)
}
func TestSanitizeErrorCredentialURLs(t *testing.T) {
err := errors.New("error with https://a@b.com")
se := SanitizeErrorCredentialURLs(err)
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
}

func TestNewStringURLSanitizer(t *testing.T) {
func TestSanitizeCredentialURLs(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
input string
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 2
{
"https://mytoken@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need this normal cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which case? I think I have covered most.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean why we delete these original test cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All code are rewritten, all new cases cover old ones. If you feel which is missing, please just point out and add it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And many cases are indeed the old cases, for example, these github.com cases.

{
"https://mytoken@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 4
{
"https://user:password@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 5
{
"https://user:password@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"ftp://x@",
"ftp://" + userPlaceholder + "@",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
"ftp://x/@",
"ftp://x/@",
},
}

for n, c := range cases {
// uses NewURLSanitizer internally
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input)

assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}

func TestNewStringURLSanitizedError(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
"ftp://@x/@", // test multiple @ chars
"ftp://" + userPlaceholder + "@x/@",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"😊ftp://@x😊", // test unicode
"😊ftp://" + userPlaceholder + "@x😊",
},
// case 2
{
"https://mytoken@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
},
// case 3
{
"https://mytoken@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 4
{
"https://user:password@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 5
{
"https://user:password@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
},
}

encloseText := func(input string) string {
return "test " + input + " test"
}

for n, c := range cases {
err := errors.New(encloseText(c.input))

result := NewStringURLSanitizedError(err, c.input, c.placeholder)

assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n)
result := SanitizeCredentialURLs(c.input)
assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}
2 changes: 1 addition & 1 deletion routers/api/v1/repo/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func handleMigrateError(ctx *context.APIContext, repoOwner *user_model.User, rem
case base.IsErrNotSupported(err):
ctx.Error(http.StatusUnprocessableEntity, "", err)
default:
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {
Expand Down
3 changes: 1 addition & 2 deletions routers/web/repo/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ func handleMigrateError(ctx *context.Context, owner *user_model.User, err error,
ctx.Data["Err_RepoName"] = true
ctx.RenderWithErr(ctx.Tr("repo.form.name_pattern_not_allowed", err.(db.ErrNamePatternNotAllowed).Pattern), tpl, form)
default:
remoteAddr, _ := forms.ParseRemoteAddr(form.CloneAddr, form.AuthUsername, form.AuthPassword)
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {
Expand Down
Loading