diff --git a/detect/detect.go b/detect/detect.go index ffa2a5c..f7bf3d3 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -6,10 +6,12 @@ import ( "errors" "fmt" "io" + "net/url" "os" "os/exec" "path" "path/filepath" + "regexp" "sort" "strings" "sync" @@ -1260,7 +1262,7 @@ func (e *Engine) detectGit(absPath string) *brief.GitInfo { for _, name := range strings.Fields(string(out)) { if url, err := e.git(absPath, "remote", "get-url", name); err == nil { mu.Lock() - info.Remotes[name] = strings.TrimSpace(string(url)) + info.Remotes[name] = redactURL(strings.TrimSpace(string(url))) mu.Unlock() } } @@ -1289,6 +1291,77 @@ func (e *Engine) detectGit(absPath string) *brief.GitInfo { return info } +const redactedPlaceholder = "REDACTED" + +var scpURLUserinfo = regexp.MustCompile(`^[^@/]+(:[^@/]*)?@`) + +// redactURL strips embedded credentials from a git remote URL so they don't +// end up in reports or terminal scrollback. Tokens can appear as either the +// password or the username (e.g. https://@github.com/...), so the whole +// userinfo section is replaced rather than relying on url.Redacted. +func redactURL(raw string) string { + if !strings.Contains(raw, "@") { + return raw + } + + if u, err := url.Parse(raw); err == nil && u.User != nil { + if redactUserinfo(u.User) { + u.User = url.User(redactedPlaceholder) + return u.String() + } + return raw + } + + // scp-like syntax (user@host:path) that url.Parse can't handle. + if loc := scpURLUserinfo.FindStringIndex(raw); loc != nil { + userinfo := raw[:loc[1]-1] + if strings.Contains(userinfo, ":") || looksLikeToken(userinfo) { + return redactedPlaceholder + "@" + raw[loc[1]:] + } + } + + return raw +} + +func redactUserinfo(u *url.Userinfo) bool { + if _, hasPassword := u.Password(); hasPassword { + return true + } + return looksLikeToken(u.Username()) +} + +var tokenPrefixes = []string{ + "github_pat_", "ghp_", "gho_", "ghu_", "ghs_", "ghr_", "github_", + "glpat-", "gldt-", "glrt-", "glsoat-", "glcbt-", + "ATCTT", "BBDC-", +} + +func looksLikeToken(s string) bool { + if s == "" { + return false + } + for _, p := range tokenPrefixes { + if strings.HasPrefix(s, p) { + return true + } + } + // Heuristic: long alphanumeric blobs used as bare usernames are almost + // certainly access tokens rather than real account names. + const suspiciousLen = 24 + if len(s) < suspiciousLen { + return false + } + for _, r := range s { + if r == '-' || r == '_' || r == '.' { + continue + } + if (r < '0' || r > '9') && (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') { + return false + } + } + return true +} + // git runs a git command in the given directory and returns its output. func (e *Engine) git(dir string, args ...string) ([]byte, error) { cmd := exec.Command("git", args...) diff --git a/detect/redact_test.go b/detect/redact_test.go new file mode 100644 index 0000000..8b4c9c9 --- /dev/null +++ b/detect/redact_test.go @@ -0,0 +1,104 @@ +package detect + +import ( + "fmt" + "strings" + "testing" +) + +func TestRedactURL(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + { + "https token as username", + "https://github_pat_11ABCDEF0abcdefghijklmnop@github.com/owner/repo.git", + "https://REDACTED@github.com/owner/repo.git", + }, + { + "https user and password", + "https://x-access-token:ghp_abcdefghijklmnopqrstuvwxyz012345@github.com/owner/repo.git", + "https://REDACTED@github.com/owner/repo.git", + }, + { + "https basic auth", + "https://deploy:hunter2@gitlab.example.com/group/proj.git", + "https://REDACTED@gitlab.example.com/group/proj.git", + }, + { + "http long opaque token", + "http://0123456789abcdef0123456789abcdef01234567@bitbucket.org/team/repo.git", + "http://REDACTED@bitbucket.org/team/repo.git", + }, + { + "gitlab pat prefix", + "https://glpat-xxx@gitlab.com/group/proj.git", + "https://REDACTED@gitlab.com/group/proj.git", + }, + { + "ssh url with git user left alone", + "ssh://git@github.com/owner/repo.git", + "ssh://git@github.com/owner/repo.git", + }, + { + "scp syntax left alone", + "git@github.com:owner/repo.git", + "git@github.com:owner/repo.git", + }, + { + "scp syntax with password", + "deploy:secret@host.example.com:path/repo.git", + "REDACTED@host.example.com:path/repo.git", + }, + { + "plain https no userinfo", + "https://github.com/owner/repo.git", + "https://github.com/owner/repo.git", + }, + { + "short real username preserved", + "https://andrew@git.example.com/repo.git", + "https://andrew@git.example.com/repo.git", + }, + { + "local path", + "/srv/git/repo.git", + "/srv/git/repo.git", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := redactURL(tc.in) + if got != tc.want { + t.Errorf("redactURL(%q)\n got %q\n want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestRedactURLNeverContainsToken(t *testing.T) { + // Fixtures deliberately don't match real provider token regexes so + // secret scanners don't block pushes of this file. + tokens := []string{ + "github_pat_" + strings.Repeat("X", 80), + "ghp_" + strings.Repeat("X", 36), + "glpat-" + strings.Repeat("X", 20), + } + wrappers := []string{ + "https://%s@github.com/o/r.git", + "https://x:%s@github.com/o/r.git", + "https://%s:x-oauth-basic@github.com/o/r.git", + } + for _, tok := range tokens { + for _, w := range wrappers { + in := fmt.Sprintf(w, tok) + out := redactURL(in) + if strings.Contains(out, tok) { + t.Fatalf("token leaked: in=%q out=%q", in, out) + } + } + } +} diff --git a/remote/remote.go b/remote/remote.go index b11e08e..d5cc939 100644 --- a/remote/remote.go +++ b/remote/remote.go @@ -147,7 +147,9 @@ func cloneURL(ctx context.Context, url, name string, opts Options) (*Source, err if managed { _ = os.RemoveAll(dir) } - return nil, fmt.Errorf("cloning %s: %w", url, err) + // git already wrote a (credential-redacted) failure message to + // stderr, so don't repeat the raw URL here. + return nil, fmt.Errorf("git clone failed: %w", err) } cleanup := func() {