From bbe9ca366dd8fdadfa06714ac207de07e64d411c Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 30 Apr 2026 14:15:00 +0100 Subject: [PATCH] Redact credentials from git remote URLs in reports git remote get-url returns whatever is in .git/config, which can include personal access tokens embedded as userinfo. These were being printed verbatim in the JSON, text and markdown reports, and were also being passed to the scorecard API during enrichment. Remote URLs now go through redactURL before they reach the report. Any password component is stripped, and bare usernames that look like tokens (known github_pat_/ghp_/glpat- prefixes, or 24+ char alphanumeric blobs) are replaced with REDACTED. Ordinary ssh git@ remotes and short real usernames are left untouched. Also stop echoing the raw clone URL in remote.cloneURL errors; git's own stderr already prints a credential-safe failure message. --- detect/detect.go | 75 +++++++++++++++++++++++++++++- detect/redact_test.go | 104 ++++++++++++++++++++++++++++++++++++++++++ remote/remote.go | 4 +- 3 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 detect/redact_test.go diff --git a/detect/detect.go b/detect/detect.go index ffa2a5c..f7bf3d3 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -6,10 +6,12 @@ import ( "errors" "fmt" "io" + "net/url" "os" "os/exec" "path" "path/filepath" + "regexp" "sort" "strings" "sync" @@ -1260,7 +1262,7 @@ func (e *Engine) detectGit(absPath string) *brief.GitInfo { for _, name := range strings.Fields(string(out)) { if url, err := e.git(absPath, "remote", "get-url", name); err == nil { mu.Lock() - info.Remotes[name] = strings.TrimSpace(string(url)) + info.Remotes[name] = redactURL(strings.TrimSpace(string(url))) mu.Unlock() } } @@ -1289,6 +1291,77 @@ func (e *Engine) detectGit(absPath string) *brief.GitInfo { return info } +const redactedPlaceholder = "REDACTED" + +var scpURLUserinfo = regexp.MustCompile(`^[^@/]+(:[^@/]*)?@`) + +// redactURL strips embedded credentials from a git remote URL so they don't +// end up in reports or terminal scrollback. Tokens can appear as either the +// password or the username (e.g. https://@github.com/...), so the whole +// userinfo section is replaced rather than relying on url.Redacted. +func redactURL(raw string) string { + if !strings.Contains(raw, "@") { + return raw + } + + if u, err := url.Parse(raw); err == nil && u.User != nil { + if redactUserinfo(u.User) { + u.User = url.User(redactedPlaceholder) + return u.String() + } + return raw + } + + // scp-like syntax (user@host:path) that url.Parse can't handle. + if loc := scpURLUserinfo.FindStringIndex(raw); loc != nil { + userinfo := raw[:loc[1]-1] + if strings.Contains(userinfo, ":") || looksLikeToken(userinfo) { + return redactedPlaceholder + "@" + raw[loc[1]:] + } + } + + return raw +} + +func redactUserinfo(u *url.Userinfo) bool { + if _, hasPassword := u.Password(); hasPassword { + return true + } + return looksLikeToken(u.Username()) +} + +var tokenPrefixes = []string{ + "github_pat_", "ghp_", "gho_", "ghu_", "ghs_", "ghr_", "github_", + "glpat-", "gldt-", "glrt-", "glsoat-", "glcbt-", + "ATCTT", "BBDC-", +} + +func looksLikeToken(s string) bool { + if s == "" { + return false + } + for _, p := range tokenPrefixes { + if strings.HasPrefix(s, p) { + return true + } + } + // Heuristic: long alphanumeric blobs used as bare usernames are almost + // certainly access tokens rather than real account names. + const suspiciousLen = 24 + if len(s) < suspiciousLen { + return false + } + for _, r := range s { + if r == '-' || r == '_' || r == '.' { + continue + } + if (r < '0' || r > '9') && (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') { + return false + } + } + return true +} + // git runs a git command in the given directory and returns its output. func (e *Engine) git(dir string, args ...string) ([]byte, error) { cmd := exec.Command("git", args...) diff --git a/detect/redact_test.go b/detect/redact_test.go new file mode 100644 index 0000000..8b4c9c9 --- /dev/null +++ b/detect/redact_test.go @@ -0,0 +1,104 @@ +package detect + +import ( + "fmt" + "strings" + "testing" +) + +func TestRedactURL(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + { + "https token as username", + "https://github_pat_11ABCDEF0abcdefghijklmnop@github.com/owner/repo.git", + "https://REDACTED@github.com/owner/repo.git", + }, + { + "https user and password", + "https://x-access-token:ghp_abcdefghijklmnopqrstuvwxyz012345@github.com/owner/repo.git", + "https://REDACTED@github.com/owner/repo.git", + }, + { + "https basic auth", + "https://deploy:hunter2@gitlab.example.com/group/proj.git", + "https://REDACTED@gitlab.example.com/group/proj.git", + }, + { + "http long opaque token", + "http://0123456789abcdef0123456789abcdef01234567@bitbucket.org/team/repo.git", + "http://REDACTED@bitbucket.org/team/repo.git", + }, + { + "gitlab pat prefix", + "https://glpat-xxx@gitlab.com/group/proj.git", + "https://REDACTED@gitlab.com/group/proj.git", + }, + { + "ssh url with git user left alone", + "ssh://git@github.com/owner/repo.git", + "ssh://git@github.com/owner/repo.git", + }, + { + "scp syntax left alone", + "git@github.com:owner/repo.git", + "git@github.com:owner/repo.git", + }, + { + "scp syntax with password", + "deploy:secret@host.example.com:path/repo.git", + "REDACTED@host.example.com:path/repo.git", + }, + { + "plain https no userinfo", + "https://github.com/owner/repo.git", + "https://github.com/owner/repo.git", + }, + { + "short real username preserved", + "https://andrew@git.example.com/repo.git", + "https://andrew@git.example.com/repo.git", + }, + { + "local path", + "/srv/git/repo.git", + "/srv/git/repo.git", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := redactURL(tc.in) + if got != tc.want { + t.Errorf("redactURL(%q)\n got %q\n want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestRedactURLNeverContainsToken(t *testing.T) { + // Fixtures deliberately don't match real provider token regexes so + // secret scanners don't block pushes of this file. + tokens := []string{ + "github_pat_" + strings.Repeat("X", 80), + "ghp_" + strings.Repeat("X", 36), + "glpat-" + strings.Repeat("X", 20), + } + wrappers := []string{ + "https://%s@github.com/o/r.git", + "https://x:%s@github.com/o/r.git", + "https://%s:x-oauth-basic@github.com/o/r.git", + } + for _, tok := range tokens { + for _, w := range wrappers { + in := fmt.Sprintf(w, tok) + out := redactURL(in) + if strings.Contains(out, tok) { + t.Fatalf("token leaked: in=%q out=%q", in, out) + } + } + } +} diff --git a/remote/remote.go b/remote/remote.go index b11e08e..d5cc939 100644 --- a/remote/remote.go +++ b/remote/remote.go @@ -147,7 +147,9 @@ func cloneURL(ctx context.Context, url, name string, opts Options) (*Source, err if managed { _ = os.RemoveAll(dir) } - return nil, fmt.Errorf("cloning %s: %w", url, err) + // git already wrote a (credential-redacted) failure message to + // stderr, so don't repeat the raw URL here. + return nil, fmt.Errorf("git clone failed: %w", err) } cleanup := func() {