diff --git a/go.mod b/go.mod index 3c4eb85..68d0ea3 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,8 @@ go 1.25.6 require ( github.com/cenk/backoff v2.2.1+incompatible - github.com/git-pkgs/purl v0.1.10 + github.com/git-pkgs/pom v0.1.2 + github.com/git-pkgs/purl v0.1.11 github.com/git-pkgs/spdx v0.1.2 github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 github.com/rubyist/circuitbreaker v2.2.1+incompatible @@ -12,11 +13,9 @@ require ( require ( github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect - github.com/git-pkgs/packageurl-go v0.3.1 // indirect github.com/git-pkgs/vers v0.2.4 // indirect github.com/github/go-spdx/v2 v2.4.0 // indirect + github.com/package-url/packageurl-go v0.1.6 // indirect github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea // indirect golang.org/x/sync v0.20.0 // indirect ) - -replace github.com/package-url/packageurl-go => github.com/git-pkgs/packageurl-go v0.0.0-20260115093137-a0c26f7ee19e diff --git a/go.sum b/go.sum index 414cdf6..515904a 100644 --- a/go.sum +++ b/go.sum @@ -4,16 +4,18 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a h1:yDWHCSQ40h88yih2JAcL6Ls/kVkSE8GFACTGVnMPruw= github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a/go.mod h1:7Ga40egUymuWXxAe151lTNnCv97MddSOVsjpPPkityA= -github.com/git-pkgs/packageurl-go v0.3.1 h1:WM3RBABQZLaRBxgKyYughc3cVBE8KyQxbSC6Jt5ak7M= -github.com/git-pkgs/packageurl-go v0.3.1/go.mod h1:rcIxiG37BlQLB6FZfgdj9Fm7yjhRQd3l+5o7J0QPAk4= -github.com/git-pkgs/purl v0.1.10 h1:NMjeF10nzFn3tdQlz6rbmHB+i+YkyrFQxho3e33ePTQ= -github.com/git-pkgs/purl v0.1.10/go.mod h1:C5Vp/kyZ/wGckCLexx4wPVfUxEiToRkdsOPh5Z7ig/I= +github.com/git-pkgs/pom v0.1.2 h1:Y2hn8G9p88odySbELABvJtcBXogjwSnRgNJARDRigz8= +github.com/git-pkgs/pom v0.1.2/go.mod h1:ufdMBe1lKzqOeP9IUb9NPZ458xKV8E8NvuyBMxOfwIk= +github.com/git-pkgs/purl v0.1.11 h1:VS3tux2BHgOZEg/uhrbOUyoX0NSEPEfhEuF7EbzOAOM= +github.com/git-pkgs/purl v0.1.11/go.mod h1:jCXDpAOwFQZR27ALTaOLH1ByhEgkcC+06Hp9uzh02ZE= github.com/git-pkgs/spdx v0.1.2 h1:wHSK+CqFsO5N7yDTPvxDmer5LgNEa7vAsiZhi5Aci0A= github.com/git-pkgs/spdx v0.1.2/go.mod h1:V98MgZapNgYw54/pdGR82d7RU93qzJoybahbpZqTfw8= github.com/git-pkgs/vers v0.2.4 h1:Zr3jR/Xf1i/6cvBaJKPxhCwjzqz7uvYHE0Fhid/GPBk= github.com/git-pkgs/vers v0.2.4/go.mod h1:biTbSQK1qdbrsxDEKnqe3Jzclxz8vW6uDcwKjfUGcOo= github.com/github/go-spdx/v2 v2.4.0 h1:+4IwVwJJbm3rzvrQ6P1nI9BDMcy3la4RchRy5uehV/M= github.com/github/go-spdx/v2 v2.4.0/go.mod h1:/5rwgS0txhGtRdUZwc02bTglzg6HK3FfuEbECKlK2Sg= +github.com/package-url/packageurl-go v0.1.6 h1:YO3p6u1XmCUliivUg/qWphaY8vI6hxSnnPv7Bfg3m5M= +github.com/package-url/packageurl-go v0.1.6/go.mod h1:nKAWB8E6uk1MHqiS/lQb9pYBGH2+mdJ2PJc2s50dQY0= github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea h1:sKwxy1H95npauwu8vtF95vG/syrL0p8fSZo/XlDg5gk= github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea/go.mod h1:1VcHEd3ro4QMoHfiNl/j7Jkln9+KQuorp0PItHMJYNg= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/maven/maven.go b/internal/maven/maven.go index f817edd..872a57f 100644 --- a/internal/maven/maven.go +++ b/internal/maven/maven.go @@ -4,20 +4,21 @@ package maven import ( "context" "encoding/xml" + "errors" "fmt" "net/url" "strings" "time" + "github.com/git-pkgs/pom" "github.com/git-pkgs/registries/internal/core" "github.com/git-pkgs/registries/internal/urlparser" ) const ( - DefaultURL = "https://repo1.maven.org/maven2" - SearchURL = "https://search.maven.org" - ecosystem = "maven" - maxParentDepth = 5 + DefaultURL = "https://repo1.maven.org/maven2" + SearchURL = "https://search.maven.org" + ecosystem = "maven" // minCoordParts is the minimum number of parts in a Maven coordinate (group:artifact) minCoordParts = 2 // coordPartsWithVersion is the number of parts when version is included (group:artifact:version) @@ -34,6 +35,7 @@ type Registry struct { baseURL string searchURL string client *core.Client + resolver *pom.Resolver urls *URLs } @@ -47,9 +49,30 @@ func New(baseURL string, client *core.Client) *Registry { client: client, } r.urls = &URLs{baseURL: r.baseURL} + r.resolver = pom.NewResolver(pom.NewCachingFetcher(&clientFetcher{client: client, baseURL: r.baseURL})) return r } +// clientFetcher adapts core.Client to pom.Fetcher so the resolver's HTTP +// goes through the same rate limiting, retries and circuit breaker as +// every other registry call. +type clientFetcher struct { + client *core.Client + baseURL string +} + +func (f *clientFetcher) Fetch(ctx context.Context, gav pom.GAV) (*pom.POM, error) { + body, err := f.client.GetBody(ctx, pom.POMURL(f.baseURL, gav)) + if err != nil { + return nil, err + } + return pom.ParsePOM(body) +} + +func (r *Registry) effectivePOM(ctx context.Context, groupID, artifactID, version string) (*pom.EffectivePOM, error) { + return r.resolver.Resolve(ctx, pom.GAV{GroupID: groupID, ArtifactID: artifactID, Version: version}, pom.Options{}) +} + func (r *Registry) Ecosystem() string { return ecosystem } @@ -69,58 +92,18 @@ type searchResponseBody struct { } type searchDoc struct { - ID string `json:"id"` - GroupID string `json:"g"` - ArtifactID string `json:"a"` - Version string `json:"latestVersion"` - Timestamp int64 `json:"timestamp"` - VersionCount int `json:"versionCount"` + ID string `json:"id"` + GroupID string `json:"g"` + ArtifactID string `json:"a"` + Version string `json:"latestVersion"` + Timestamp int64 `json:"timestamp"` + VersionCount int `json:"versionCount"` } -// POM XML structures -type pomXML struct { - XMLName xml.Name `xml:"project"` - GroupID string `xml:"groupId"` - ArtifactID string `xml:"artifactId"` - Version string `xml:"version"` - Name string `xml:"name"` - Description string `xml:"description"` - URL string `xml:"url"` - Licenses []pomLicense `xml:"licenses>license"` - SCM pomSCM `xml:"scm"` - Parent *pomParent `xml:"parent"` - Dependencies []pomDep `xml:"dependencies>dependency"` - DependencyManagement struct { - Dependencies []pomDep `xml:"dependencies>dependency"` - } `xml:"dependencyManagement"` +// pomDevelopers is the minimal XML shape needed for FetchMaintainers; the +// pom library doesn't carry . +type pomDevelopers struct { Developers []pomDeveloper `xml:"developers>developer"` - Properties map[string]string -} - -type pomParent struct { - GroupID string `xml:"groupId"` - ArtifactID string `xml:"artifactId"` - Version string `xml:"version"` -} - -type pomLicense struct { - Name string `xml:"name"` - URL string `xml:"url"` -} - -type pomSCM struct { - URL string `xml:"url"` - Connection string `xml:"connection"` - DevConnection string `xml:"developerConnection"` -} - -type pomDep struct { - GroupID string `xml:"groupId"` - ArtifactID string `xml:"artifactId"` - Version string `xml:"version"` - Scope string `xml:"scope"` - Optional string `xml:"optional"` - Type string `xml:"type"` } type pomDeveloper struct { @@ -171,8 +154,8 @@ func (r *Registry) FetchPackage(ctx context.Context, name string) (*core.Package if err := r.client.GetJSON(ctx, searchURL, &searchResp); err == nil && searchResp.Response.NumFound > 0 { doc := searchResp.Response.Docs[0] // Fetch the POM for more details - pom, _ := r.fetchPOM(ctx, groupID, artifactID, doc.Version, 0) - return r.packageFromSearchAndPOM(doc, pom), nil + ep, _ := r.effectivePOM(ctx, groupID, artifactID, doc.Version) + return r.packageFromSearchAndPOM(doc, ep), nil } // Fallback: try to get maven-metadata.xml @@ -198,8 +181,8 @@ func (r *Registry) FetchPackage(ctx context.Context, name string) (*core.Package latestVersion = metadata.Versioning.Versions[len(metadata.Versioning.Versions)-1] } - pom, _ := r.fetchPOM(ctx, groupID, artifactID, latestVersion, 0) - return r.packageFromMetadataAndPOM(metadata, pom), nil + ep, _ := r.effectivePOM(ctx, groupID, artifactID, latestVersion) + return r.packageFromMetadataAndPOM(metadata, ep), nil } type mavenMetadata struct { @@ -214,62 +197,7 @@ type versioning struct { Versions []string `xml:"versions>version"` } -func (r *Registry) fetchPOM(ctx context.Context, groupID, artifactID, version string, depth int) (*pomXML, error) { - if depth > maxParentDepth { - return nil, fmt.Errorf("max parent depth exceeded") - } - - pomURL := fmt.Sprintf("%s/%s/%s/%s/%s-%s.pom", - r.baseURL, groupIDToPath(groupID), artifactID, version, artifactID, version) - - body, err := r.client.GetBody(ctx, pomURL) - if err != nil { - return nil, err - } - - var pom pomXML - if err := xml.Unmarshal(body, &pom); err != nil { - return nil, err - } - - // Resolve parent POM if present - if pom.Parent != nil && depth < maxParentDepth { - parentPOM, err := r.fetchPOM(ctx, pom.Parent.GroupID, pom.Parent.ArtifactID, pom.Parent.Version, depth+1) - if err == nil { - mergePOMs(&pom, parentPOM) - } - } - - // Fill in groupID/version from parent if not set - if pom.GroupID == "" && pom.Parent != nil { - pom.GroupID = pom.Parent.GroupID - } - if pom.Version == "" && pom.Parent != nil { - pom.Version = pom.Parent.Version - } - - return &pom, nil -} - -func mergePOMs(child, parent *pomXML) { - if child.Description == "" { - child.Description = parent.Description - } - if child.URL == "" { - child.URL = parent.URL - } - if len(child.Licenses) == 0 { - child.Licenses = parent.Licenses - } - if child.SCM.URL == "" { - child.SCM = parent.SCM - } - if len(child.Developers) == 0 { - child.Developers = parent.Developers - } -} - -func (r *Registry) packageFromSearchAndPOM(doc searchDoc, pom *pomXML) *core.Package { +func (r *Registry) packageFromSearchAndPOM(doc searchDoc, ep *pom.EffectivePOM) *core.Package { pkg := &core.Package{ Name: fmt.Sprintf("%s:%s", doc.GroupID, doc.ArtifactID), Namespace: doc.GroupID, @@ -280,17 +208,11 @@ func (r *Registry) packageFromSearchAndPOM(doc searchDoc, pom *pomXML) *core.Pac }, } - if pom != nil { - pkg.Description = pom.Description - pkg.Homepage = pom.URL - pkg.Repository = extractRepository(pom) - pkg.Licenses = formatLicenses(pom.Licenses) - } - + applyPOMMetadata(pkg, ep) return pkg } -func (r *Registry) packageFromMetadataAndPOM(metadata mavenMetadata, pom *pomXML) *core.Package { +func (r *Registry) packageFromMetadataAndPOM(metadata mavenMetadata, ep *pom.EffectivePOM) *core.Package { pkg := &core.Package{ Name: fmt.Sprintf("%s:%s", metadata.GroupID, metadata.ArtifactID), Namespace: metadata.GroupID, @@ -300,26 +222,22 @@ func (r *Registry) packageFromMetadataAndPOM(metadata mavenMetadata, pom *pomXML }, } - if pom != nil { - pkg.Description = pom.Description - pkg.Homepage = pom.URL - pkg.Repository = extractRepository(pom) - pkg.Licenses = formatLicenses(pom.Licenses) - } - + applyPOMMetadata(pkg, ep) return pkg } -func extractRepository(pom *pomXML) string { - return urlparser.FirstRepoURL(pom.SCM.URL, pom.SCM.Connection) -} - -func formatLicenses(licenses []pomLicense) string { - names := make([]string, len(licenses)) - for i, l := range licenses { +func applyPOMMetadata(pkg *core.Package, ep *pom.EffectivePOM) { + if ep == nil { + return + } + pkg.Description = ep.Description + pkg.Homepage = ep.URL + pkg.Repository = urlparser.FirstRepoURL(ep.SCM.URL, ep.SCM.Connection) + names := make([]string, len(ep.Licenses)) + for i, l := range ep.Licenses { names[i] = l.Name } - return strings.Join(names, ",") + pkg.Licenses = strings.Join(names, ",") } func (r *Registry) FetchVersions(ctx context.Context, name string) ([]core.Version, error) { @@ -381,28 +299,26 @@ func (r *Registry) FetchDependencies(ctx context.Context, name, version string) return nil, fmt.Errorf("invalid Maven coordinate: %s (expected groupId:artifactId)", name) } - pom, err := r.fetchPOM(ctx, groupID, artifactID, version, 0) + ep, err := r.effectivePOM(ctx, groupID, artifactID, version) if err != nil { - if httpErr, ok := err.(*core.HTTPError); ok && httpErr.IsNotFound() { + var httpErr *core.HTTPError + if errors.As(err, &httpErr) && httpErr.IsNotFound() { return nil, &core.NotFoundError{Ecosystem: ecosystem, Name: name, Version: version} } return nil, err } - var deps []core.Dependency - for _, d := range pom.Dependencies { + deps := make([]core.Dependency, 0, len(ep.Dependencies)) + for _, d := range ep.Dependencies { scope := mapMavenScope(d.Scope) - optional := d.Optional == "true" - - if optional { + if d.Optional { scope = core.Optional } - deps = append(deps, core.Dependency{ Name: fmt.Sprintf("%s:%s", d.GroupID, d.ArtifactID), Requirements: d.Version, Scope: scope, - Optional: optional, + Optional: d.Optional, }) } @@ -440,13 +356,17 @@ func (r *Registry) FetchMaintainers(ctx context.Context, name string) ([]core.Ma } latestVersion := versions[0].Number - pom, err := r.fetchPOM(ctx, groupID, artifactID, latestVersion, 0) + body, err := r.client.GetBody(ctx, pom.POMURL(r.baseURL, pom.GAV{GroupID: groupID, ArtifactID: artifactID, Version: latestVersion})) if err != nil { return nil, err } + var devs pomDevelopers + if err := xml.Unmarshal(body, &devs); err != nil { + return nil, err + } - maintainers := make([]core.Maintainer, len(pom.Developers)) - for i, dev := range pom.Developers { + maintainers := make([]core.Maintainer, len(devs.Developers)) + for i, dev := range devs.Developers { maintainers[i] = core.Maintainer{ UUID: dev.ID, Login: dev.ID, diff --git a/internal/maven/maven_test.go b/internal/maven/maven_test.go index 3d2a19c..b4eef91 100644 --- a/internal/maven/maven_test.go +++ b/internal/maven/maven_test.go @@ -12,10 +12,10 @@ import ( func TestParseCoordinates(t *testing.T) { tests := []struct { - input string - groupID string + input string + groupID string artifactID string - version string + version string }{ {"com.google.guava:guava", "com.google.guava", "guava", ""}, {"com.google.guava:guava:32.1.0", "com.google.guava", "guava", "32.1.0"}, @@ -42,10 +42,10 @@ func TestFetchPackage(t *testing.T) { NumFound: 1, Docs: []searchDoc{ { - ID: "com.google.guava:guava", - GroupID: "com.google.guava", - ArtifactID: "guava", - Version: "32.1.0-jre", + ID: "com.google.guava:guava", + GroupID: "com.google.guava", + ArtifactID: "guava", + Version: "32.1.0-jre", VersionCount: 150, }, }, @@ -325,6 +325,17 @@ func TestParentPOMResolution(t *testing.T) { child Child Project + + + com.example + sibling + ${project.version} + + + org.lib + lib + + ` _, _ = w.Write([]byte(pom)) }) @@ -346,6 +357,18 @@ func TestParentPOMResolution(t *testing.T) { https://github.com/example/parent + + 2.5 + + + + + org.lib + lib + ${lib.version} + + + ` _, _ = w.Write([]byte(pom)) }) @@ -355,20 +378,35 @@ func TestParentPOMResolution(t *testing.T) { reg := New(server.URL, core.DefaultClient()) - pom, err := reg.fetchPOM(context.Background(), "com.example", "child", "1.0.0", 0) + ep, err := reg.effectivePOM(context.Background(), "com.example", "child", "1.0.0") if err != nil { - t.Fatalf("fetchPOM failed: %v", err) + t.Fatalf("effectivePOM failed: %v", err) } // Should inherit from parent - if pom.Description != "Parent project description" { - t.Errorf("expected inherited description, got %q", pom.Description) + if ep.Description != "Parent project description" { + t.Errorf("expected inherited description, got %q", ep.Description) + } + if ep.GAV.GroupID != "com.example" { + t.Errorf("expected groupId from parent, got %q", ep.GAV.GroupID) + } + if len(ep.Licenses) != 1 || ep.Licenses[0].Name != "MIT" { + t.Errorf("expected inherited license, got %v", ep.Licenses) + } + + deps, err := reg.FetchDependencies(context.Background(), "com.example:child", "1.0.0") + if err != nil { + t.Fatalf("FetchDependencies failed: %v", err) + } + got := map[string]string{} + for _, d := range deps { + got[d.Name] = d.Requirements } - if pom.GroupID != "com.example" { - t.Errorf("expected groupId from parent, got %q", pom.GroupID) + if got["com.example:sibling"] != "1.0.0" { + t.Errorf("expected ${project.version} interpolated to 1.0.0, got %q", got["com.example:sibling"]) } - if len(pom.Licenses) != 1 || pom.Licenses[0].Name != "MIT" { - t.Errorf("expected inherited license, got %v", pom.Licenses) + if got["org.lib:lib"] != "2.5" { + t.Errorf("expected version from parent depMgmt+property, got %q", got["org.lib:lib"]) } }