From c45861cfde7981e2f0953cbf05cbe14e3eedd4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Furkan=20T=C3=BCrkal?= Date: Wed, 20 Sep 2023 23:07:37 +0300 Subject: [PATCH] convert/python: parse requirements.txt and skip if exist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Furkan Türkal Co-authored-by: Batuhan --- docs/md/melange_build.md | 2 +- docs/md/melange_convert_python.md | 1 + pkg/cli/python.go | 82 ++++++++- pkg/convert/python/python.go | 215 +++++++++++++++++------- pkg/convert/python/python_test.go | 23 +++ pkg/convert/python/requirements.go | 129 ++++++++++++++ pkg/convert/python/requirements_test.go | 112 ++++++++++++ pkg/index/index.go | 45 +++-- pkg/index/index_test.go | 17 ++ pkg/manifest/manifest.go | 1 + 10 files changed, 547 insertions(+), 80 deletions(-) create mode 100644 pkg/convert/python/requirements.go create mode 100644 pkg/convert/python/requirements_test.go create mode 100644 pkg/index/index_test.go diff --git a/docs/md/melange_build.md b/docs/md/melange_build.md index c2bddde8e..1d9cdf0f9 100644 --- a/docs/md/melange_build.md +++ b/docs/md/melange_build.md @@ -52,7 +52,7 @@ melange build [flags] --overlay-binsh string use specified file as /bin/sh overlay in build environment --pipeline-dir string directory used to extend defined built-in pipelines -r, --repository-append strings path to extra repositories to include in the build environment - --runner string which runner to use to enable running commands, default is based on your platform. Options are ["bubblewrap" "docker" "lima" "kubernetes"] (default "bubblewrap") + --runner string which runner to use to enable running commands, default is based on your platform. Options are ["bubblewrap" "docker" "lima" "kubernetes"] (default "docker") --signing-key string key to use for signing --source-dir string directory used for included sources --strip-origin-name whether origin names should be stripped (for bootstrap) diff --git a/docs/md/melange_convert_python.md b/docs/md/melange_convert_python.md index 787fadad2..02915194d 100644 --- a/docs/md/melange_convert_python.md +++ b/docs/md/melange_convert_python.md @@ -34,6 +34,7 @@ convert python botocore -h, --help help for python --package-version string version of the python package to convert --python-version string version of the python to build the package (default "3") + --use-existing **experimental** if true, use the existing packages in the Wolfi APK repo ``` ### Options inherited from parent commands diff --git a/pkg/cli/python.go b/pkg/cli/python.go index 11b71fb98..2b46908eb 100644 --- a/pkg/cli/python.go +++ b/pkg/cli/python.go @@ -16,6 +16,12 @@ package cli import ( "context" + "io" + "net/http" + "os" + "strings" + + "chainguard.dev/melange/pkg/index" "chainguard.dev/melange/pkg/convert/python" "chainguard.dev/melange/pkg/convert/relmon" @@ -34,6 +40,7 @@ type pythonOptions struct { packageVersion string ghClient *github.Client mf *relmon.MonitorFinder + useExistingPackages bool } // PythonBuild is the top-level `convert python` cobra command @@ -60,7 +67,7 @@ convert python botocore`, if err != nil { return err } - o.ghClient, err = getGithubClient(context.TODO(), cmd) + o.ghClient, err = getGithubClient(cmd.Context(), cmd) if err != nil { return err } @@ -76,12 +83,49 @@ convert python botocore`, cmd.Flags().StringVar(&o.baseURIFormat, "base-uri-format", "https://pypi.org", "URI to use for querying gems for provided package name") cmd.Flags().StringVar(&o.pythonVersion, "python-version", "3", "version of the python to build the package") + + // Experimental flag to use the already existing packages in the Wolfi APK repo + cmd.Flags().BoolVar(&o.useExistingPackages, "use-existing", false, "**experimental** if true, use the existing packages in the Wolfi APK repo") + return cmd } -// pythonBuild is the main cli function. It just sets up the PythonBuild context and +func (o pythonOptions) pythonBuild(ctx context.Context, arg string) error { + var ( + r io.ReadCloser + err error + ) + + switch { + case strings.HasPrefix(arg, "http://"), strings.HasPrefix(arg, "https://"): + resp, err := http.Get(arg) + if err != nil { + return errors.Wrapf(err, "getting %s", arg) + } + r = resp.Body + case strings.Contains(arg, "/"), strings.Contains(arg, "requirements"): + r, err = os.Open(arg) + default: + // If we neither have a HTTP(s) URL, nor a file path, we assume it's a + // package name, and try to convert it as-is way. + return o.pythonPackageBuild(ctx, arg, nil) + } + + if err != nil { + return errors.Wrap(err, "read") + } + + pkgs, err := python.ParseRequirementsTxt(r) + if err != nil { + return errors.Wrap(err, "parse requirements") + } + + return o.pythonPackageBuild(ctx, arg, pkgs) +} + +// pythonPackageBuild is the main cli function. It just sets up the PythonBuild context and // then executes the manifest generation. -func (o pythonOptions) pythonBuild(ctx context.Context, packageName string) error { +func (o pythonOptions) pythonPackageBuild(ctx context.Context, packageName string, initialDeps []string) error { pythonContext, err := python.New(packageName) if err != nil { return errors.Wrap(err, "initialising python command") @@ -94,12 +138,42 @@ func (o pythonOptions) pythonBuild(ctx context.Context, packageName string) erro pythonContext.PackageVersion = o.packageVersion pythonContext.PythonVersion = o.pythonVersion pythonContext.PackageName = packageName + pythonContext.ToCheck = initialDeps // These two are conditionally set above, and if nil, they are unused. pythonContext.GithubClient = o.ghClient pythonContext.MonitoringClient = o.mf + if o.useExistingPackages { + ep, err := getExistedPythonPackagesFromIndex() + if err != nil { + return errors.Wrap(err, "existing packages from index") + } + pythonContext.ExistingPackages = ep + } + pythonContext.Logger.Printf("generating convert config files for python package %s version: %s on python version: %s", pythonContext.PackageName, pythonContext.PythonVersion, pythonContext.PackageVersion) - return pythonContext.Generate(ctx) + if len(pythonContext.ToCheck) > 0 { + return pythonContext.GenerateFromRequirements(ctx) + } + + return pythonContext.GenerateFromIndex(ctx) +} + +func getExistedPythonPackagesFromIndex() ([]string, error) { + ic, err := index.New(index.WithExpectedArch("x86_64")) + if err != nil { + return nil, err + } + if err := ic.LoadIndex("https://packages.wolfi.dev/os"); err != nil { + return nil, err + } + var existedPackages []string + for _, pkg := range ic.Index.Packages { + if strings.HasPrefix(pkg.Name, "py3") { + existedPackages = append(existedPackages, pkg.Name) + } + } + return existedPackages, nil } diff --git a/pkg/convert/python/python.go b/pkg/convert/python/python.go index fef97a41d..3d949c711 100644 --- a/pkg/convert/python/python.go +++ b/pkg/convert/python/python.go @@ -18,7 +18,7 @@ import ( "context" "fmt" "log" - "regexp" + "os" "strings" apkotypes "chainguard.dev/apko/pkg/build/types" @@ -81,6 +81,10 @@ type PythonContext struct { // If non-nil, this is the Release Monitoring client to use for fetching // metadata to get the monitoring data for the package. MonitoringClient *relmon.MonitorFinder + + // If non-nil, this is the existing packages in Wolfi index repository + // to skip generating melange files for existing packages. + ExistingPackages []string } // New initialises a new PythonContext. @@ -93,11 +97,16 @@ func New(packageName string) (PythonContext, error) { return context, nil } -// Generate is the entrypoint to generate a ruby gem melange file. It handles -// recursively finding all dependencies for a pypi package and generating a melange file -// for each. -func (c *PythonContext) Generate(ctx context.Context) error { - c.Logger.Printf("[%s] Generating manifests", c.PackageName) +func (c *PythonContext) GenerateFromRequirements(ctx context.Context) error { + c.Logger.Printf("[%s] Generating manifests from given requirements.txt", c.PackageName) + + c.PackageIndex = NewPackageIndex(c.BaseURIFormat) + + return c.generate(ctx) +} + +func (c *PythonContext) GenerateFromIndex(ctx context.Context) error { + c.Logger.Printf("[%s] Generating manifests from remote upstream index", c.PackageName) c.PackageIndex = NewPackageIndex(c.BaseURIFormat) @@ -112,85 +121,112 @@ func (c *PythonContext) Generate(ctx context.Context) error { // add self to check to start the find dep tree c.ToCheck = append(c.ToCheck, p.Info.Name) + return c.generate(ctx) +} + +func (c *PythonContext) generate(ctx context.Context) error { + // If EXPERIMENTAL flag is set, merge the locally-generated manifests with + // the existing ones to avoid re-fetching and re-generating the same ones. + if len(c.ExistingPackages) > 0 { + c.ExistingPackages = append(c.ExistingPackages, c.gatherGeneratedPackages()...) + } + // download the package json metadata and find all it's deps - err = c.findDep(ctx) - if err != nil { + if err := c.findDep(ctx); err != nil { return err } c.Logger.Printf("[%s] Generating %v files", c.PackageName, len(c.ToGenerate)) + return c.generatePackages(ctx) +} + +// Generate is the entrypoint to generate a ruby gem melange file. It handles +// recursively finding all dependencies for a pypi package and generating a melange file +// for each. +func (c *PythonContext) generatePackages(ctx context.Context) error { + errs := make(map[string]error) + // generate melange files for all dependencies for m, pack := range c.ToGenerate { c.Logger.Printf("[%s] Index %v Package %v ", pack.Info.Name, m, pack.Info.Name) c.Logger.Printf("[%s] Create manifest", pack.Info.Name) - version := pack.Info.Version - // if were generating the package asked for , check the version wasn't specified - if c.PackageName == pack.Info.Name && c.PackageVersion != "" { - version = c.PackageVersion - } - ghVersions := []githubpkg.TagData{} - var relmon *relmon.Item - if c.GithubClient != nil { - c.Logger.Printf("Trying to get commit data for %s", pack.Info.Name) - // If we have a github client, then try to get the commit data. - githubURL := pack.Info.GetSourceURL() - if githubURL != "" { - c.Logger.Printf("[%s] Using github URL %s for %s", pack.Info.Name, githubURL, pack.Info.Name) - owner, repo, err := githubpkg.ParseGithubURL(githubURL) - if err != nil { - c.Logger.Printf("error parsing github url %s - %s ", githubURL, err) - } else { - client := githubpkg.NewGithubRepoClient(c.GithubClient, owner, repo) - client.Logger = c.Logger - versions, err := client.GetVersions(ctx, version) - if err != nil { - c.Logger.Printf("error getting versions for %s - %s ", pack.Info.Name, err) - } - // This is fine in error case, since it's nothing. - for _, version := range versions { - c.Logger.Printf("[%s] got github version: %+v\n", pack.Info.Name, version) - } - ghVersions = versions - } - } + if err := c.finalizePackage(ctx, pack); err != nil { + errs[pack.Info.Name] = err } + } + + for p, e := range errs { + c.Logger.Printf("[%s] FAILED TO CREATE PACKAGE %v", p, e) + } - // If the release monitoring client has been configured, see if we can - // fetch the data for this package. - if c.MonitoringClient != nil { - monitoring, err := c.MonitoringClient.FindMonitor(ctx, pack.Info.Name) + if len(errs) > 0 { + return fmt.Errorf("failed to generate %d packages", len(errs)) + } + + return nil +} + +func (c *PythonContext) finalizePackage(ctx context.Context, pack Package) error { + version := pack.Info.Version + // if were generating the package asked for , check the version wasn't specified + if c.PackageName == pack.Info.Name && c.PackageVersion != "" { + version = c.PackageVersion + } + + ghVersions := []githubpkg.TagData{} + var relmon *relmon.Item + if c.GithubClient != nil { + c.Logger.Printf("Trying to get commit data for %s", pack.Info.Name) + // If we have a github client, then try to get the commit data. + githubURL := pack.Info.GetSourceURL() + if githubURL != "" { + c.Logger.Printf("[%s] Using github URL %s for %s", pack.Info.Name, githubURL, pack.Info.Name) + owner, repo, err := githubpkg.ParseGithubURL(githubURL) if err != nil { - fmt.Printf("Failed to find monitoring: %v\n", err) + c.Logger.Printf("error parsing github url %s - %s ", githubURL, err) } else { - fmt.Printf("Found monitoring: %+v\n", monitoring) - relmon = monitoring + client := githubpkg.NewGithubRepoClient(c.GithubClient, owner, repo) + client.Logger = c.Logger + versions, err := client.GetVersions(ctx, version) + if err != nil { + c.Logger.Printf("error getting versions for %s - %s ", pack.Info.Name, err) + } + // This is fine in error case, since it's nothing. + for _, version := range versions { + c.Logger.Printf("[%s] got github version: %+v\n", pack.Info.Name, version) + } + ghVersions = versions } } + } - generated, err := c.generateManifest(ctx, pack, version, ghVersions, relmon) + // If the release monitoring client has been configured, see if we can + // fetch the data for this package. + if c.MonitoringClient != nil { + monitoring, err := c.MonitoringClient.FindMonitor(ctx, pack.Info.Name) if err != nil { - c.Logger.Printf("[%s] FAILED TO CREATE MANIFEST %v", pack.Info.Name, err) - return err + fmt.Printf("Failed to find monitoring: %v\n", err) + } else { + fmt.Printf("Found monitoring: %+v\n", monitoring) + relmon = monitoring } + } - err = generated.Write(c.OutDir) - if err != nil { - c.Logger.Printf("[%s] FAILED TO WRITE MANIFEST %v", pack.Info.Name, err) - return err - } + generated, err := c.generateManifest(ctx, pack, version, ghVersions, relmon) + if err != nil { + c.Logger.Printf("[%s] FAILED TO CREATE MANIFEST %v", pack.Info.Name, err) + return err } - return nil -} + err = generated.Write(c.OutDir) + if err != nil { + c.Logger.Printf("[%s] FAILED TO WRITE MANIFEST %v", pack.Info.Name, err) + return err + } -func stripDep(dep string) (string, error) { - // removing all the special chars from the requirements like "importlib-metadata (>=3.6.0) ; python_version < \"3.10\"" - re := regexp.MustCompile(`[;()\[\]!~=<>]`) - dep = re.ReplaceAllString(dep, " ") - depStrip := strings.Split(dep, " ") - return depStrip[0], nil + return nil } // FindDep - given a python package retrieve all its dependencies @@ -199,15 +235,23 @@ func (c *PythonContext) findDep(ctx context.Context) error { return nil } + current := c.ToCheck[0] + + if c.checkIfPackageExist(current) { + c.Logger.Printf("[%s] Package already exists in Wolfi index, skipping", current) + c.ToCheck = c.ToCheck[1:] + return c.findDep(ctx) + } + c.Logger.Printf("[%s] Check Dependency list: %v", c.PackageName, c.ToCheck) - c.Logger.Printf("[%s] Fetch Package Data", c.ToCheck[0]) + c.Logger.Printf("[%s] Fetch Package Data", current) - p, err := c.PackageIndex.GetLatest(ctx, c.ToCheck[0]) + p, err := c.PackageIndex.GetLatest(ctx, current) if err != nil { return err } - c.Logger.Printf("[%s] %s Add to generate list", c.ToCheck[0], p.Info.Name) + c.Logger.Printf("[%s] %s Add to generate list", current, p.Info.Name) c.ToCheck = c.ToCheck[1:] c.Logger.Printf("[%s] Check for dependencies", p.Info.Name) @@ -243,6 +287,49 @@ func (c *PythonContext) findDep(ctx context.Context) error { return c.findDep(ctx) } +// checkIfPackageExist checks if the package already exists in the ExistingPackages. +func (c *PythonContext) checkIfPackageExist(pkg string) bool { + for _, p := range c.ExistingPackages { + if c.trimPackageName(p) == pkg || p == pkg { + return true + } + } + return false +} + +// gatherGeneratedPackages returns the list of files generated by the melange +// itself. So that we don't have to fetch and generate the same manifests +// again. +func (c *PythonContext) gatherGeneratedPackages() []string { + files, err := os.ReadDir(c.OutDir) + if err != nil { + return []string{} + } + var packages []string + for _, f := range files { + if f.IsDir() { + continue + } + packages = append(packages, c.trimPackageName(f.Name())) + } + return packages +} + +// trimPackageName trims the `py-` prefix and `.yaml` suffix from the +// package name to get the actual package name. +func (c *PythonContext) trimPackageName(name string) string { + pkg := strings.TrimPrefix(name, "py"+c.PythonVersion+"-") + pkg = strings.TrimSuffix(pkg, ".yaml") + // To handle the case where the package name starts with py but + // version is different. + if strings.HasPrefix(pkg, "py") { + if idx := strings.Index(pkg, "-"); idx != -1 { + pkg = pkg[idx+1:] + } + } + return pkg +} + func (c *PythonContext) generateManifest(ctx context.Context, pack Package, version string, ghVersions []githubpkg.TagData, monitorInfo *relmon.Item) (manifest.GeneratedMelangeConfig, error) { // The actual generated manifest struct generated := manifest.GeneratedMelangeConfig{Logger: c.Logger} diff --git a/pkg/convert/python/python_test.go b/pkg/convert/python/python_test.go index 9308397c9..561b9d783 100644 --- a/pkg/convert/python/python_test.go +++ b/pkg/convert/python/python_test.go @@ -266,6 +266,7 @@ func SetupContext(version string) ([]*PythonContext, error) { jsonschemapythonctx.PackageName = "jsonschema" jsonschemapythonctx.PackageVersion = "4.17.3" jsonschemapythonctx.PythonVersion = version + jsonschemapythonctx.OutDir = "./testdata/generated" // Read the gem meta into data, err = os.ReadFile(filepath.Join(jsonschemaMeta, "json")) @@ -376,3 +377,25 @@ func removeVersionsFromURL(inputURL string) (string, error) { parsedURL.Path = strings.Join(segments, "/") return parsedURL.String(), nil } + +func TestPythonContext_gatherGeneratedPackages(t *testing.T) { + c, err := SetupContext("3.10") + assert.NoError(t, err) + p := c[1] + + pkgs := p.gatherGeneratedPackages() + assert.Equal(t, []string{"foo", "bar", "baz"}, pkgs) +} + +func TestPythonContext_checkIfPackageExist(t *testing.T) { + c, err := SetupContext("3.10") + assert.NoError(t, err) + p := c[1] + + p.ExistingPackages = []string{"foo", "py3-bar", "py3.10-baz", "py3.11-qux"} + assert.True(t, p.checkIfPackageExist("foo")) + assert.True(t, p.checkIfPackageExist("bar")) + assert.True(t, p.checkIfPackageExist("baz")) + assert.True(t, p.checkIfPackageExist("qux")) + assert.False(t, p.checkIfPackageExist("quux")) +} diff --git a/pkg/convert/python/requirements.go b/pkg/convert/python/requirements.go new file mode 100644 index 000000000..1ec1029d3 --- /dev/null +++ b/pkg/convert/python/requirements.go @@ -0,0 +1,129 @@ +package python + +import ( + "bufio" + "fmt" + "io" + "regexp" + "strings" +) + +// ParseRequirementsTxt parses a requirements.txt file and returns a list of +// package names. It ignores comments, extras, and version constraints. +// This is from a modified version of battle-tested code: +// https://github.com/google/osv-scanner/blob/main/pkg/lockfile/parse-requirements-txt.go +func ParseRequirementsTxt(r io.Reader) ([]string, error) { + packages := []string{} + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + + for isLineContinuation(line) { + line = strings.TrimSuffix(line, "\\") + + if scanner.Scan() { + line += scanner.Text() + } + } + + line = removeComments(line) + + if isNotRequirementLine(line) { + continue + } + + detail := parseLine(line) + + packages = append(packages, detail) + } + + if err := scanner.Err(); err != nil { + return []string{}, fmt.Errorf("error while scanning: %w", err) + } + + return packages, nil +} + +func isLineContinuation(line string) bool { + // checks that the line ends with an odd number of back slashes, + // meaning the last one isn't escaped + re := regexp.MustCompile(`([^\\]|^)(\\{2})*\\$`) + + return re.MatchString(line) +} + +func isNotRequirementLine(line string) bool { + return line == "" || + // flags are not supported + strings.HasPrefix(line, "-") || + // file urls + strings.HasPrefix(line, "https://") || + strings.HasPrefix(line, "http://") || + // file paths are not supported (relative or absolute) + strings.HasPrefix(line, ".") || + strings.HasPrefix(line, "/") +} + +// https://pip.pypa.io/en/stable/reference/requirements-file-format/#example +func parseLine(line string) string { + var constraint string + name := line + + if strings.Contains(line, "==") { + constraint = "==" + } + + if strings.Contains(line, ">=") { + constraint = ">=" + } + + if strings.Contains(line, "~=") { + constraint = "~=" + } + + if strings.Contains(line, "!=") { + constraint = "!=" + } + + if constraint != "" { + unprocessedName, _, _ := strings.Cut(line, constraint) + name = strings.TrimSpace(unprocessedName) + } + + return normalizedRequirementName(name) +} + +// normalizedName ensures that the package name is normalized per PEP-0503 +// and then removing "added support" syntax if present. +// +// This is done to ensure we don't miss any advisories, as while the OSV +// specification says that the normalized name should be used for advisories, +// that's not the case currently in our databases, _and_ Pip itself supports +// non-normalized names in the requirements.txt, so we need to normalize +// on both sides to ensure we don't have false negatives. +// +// It's possible that this will cause some false positives, but that is better +// than false negatives, and can be dealt with when/if it actually happens. +func normalizedRequirementName(name string) string { + // per https://www.python.org/dev/peps/pep-0503/#normalized-names + name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-") + name = strings.ToLower(name) + name, _, _ = strings.Cut(name, "[") + + return name +} + +func removeComments(line string) string { + var re = regexp.MustCompile(`(^|\s+)#.*$`) + + return strings.TrimSpace(re.ReplaceAllString(line, "")) +} + +func stripDep(dep string) (string, error) { + // removing all the special chars from the requirements like "importlib-metadata (>=3.6.0) ; python_version < \"3.10\"" + re := regexp.MustCompile(`[;()\[\]!~=<>]`) + dep = re.ReplaceAllString(dep, " ") + depStrip := strings.Split(dep, " ") + return depStrip[0], nil +} diff --git a/pkg/convert/python/requirements_test.go b/pkg/convert/python/requirements_test.go new file mode 100644 index 000000000..e5f2cddbf --- /dev/null +++ b/pkg/convert/python/requirements_test.go @@ -0,0 +1,112 @@ +package python + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseRequirementsTxt(t *testing.T) { + tests := []struct { + name string + data string + want []string + }{ + { + "empty", + ``, + []string{}, + }, + { + "comment", + `# comment`, + []string{}, + }, + { + "comment with single package", + `# comment +foo`, + []string{"foo"}, + }, + { + "single package", + `foo==0.12.0 # via foo`, + []string{"foo"}, + }, + { + "multiple packages with extra", + `bar[x]>=2.34.0 # via foo, bar +baz[x,z]=<1.31.5 # via -r -`, + []string{"bar", "baz"}, + }, + { + "full", + `# + # This file is autogenerated by pip-compile + # To update, run: + # + # pip-compile --output-file=- - + # + foo==0.12.0 # via foo + bar[x]>=2.34.0 # via foo, bar + baz[x,z]=<1.31.5 # via -r - + qux # via -r - + # The following packages are considered to be unsafe in a requirements file: + # setuptools + `, + []string{"foo", "bar", "baz", "qux"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := strings.NewReader(tt.data) + got, err := ParseRequirementsTxt(r) + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_stripDep(t *testing.T) { + tests := []struct { + dep string + want string + }{ + { + "", + "", + }, + { + "foo", + "foo", + }, + { + "foo==1.2.3", + "foo", + }, + { + "foo>=1.2.3", + "foo", + }, + { + "foo[x,y]==1.2.3", + "foo", + }, + { + "foo[x,y]>=1.2.3", + "foo", + }, + { + "foo[x,y]<1.2.3", + "foo", + }, + } + for _, tt := range tests { + t.Run(tt.dep, func(t *testing.T) { + got, err := stripDep(tt.dep) + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/index/index.go b/pkg/index/index.go index 01500dd55..5df21c4a9 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -17,9 +17,9 @@ package index import ( "context" "encoding/json" - "errors" "fmt" "io" + "net/http" "os" "path/filepath" "strings" @@ -130,18 +130,15 @@ func New(opts ...Option) (*Index, error) { return &idx, nil } -func (idx *Index) LoadIndex(sourceFile string) error { - f, err := os.Open(sourceFile) +// LoadIndex loads an APKINDEX file from a URL or local file. +func (idx *Index) LoadIndex(source string) error { + rc, err := idx.read(source) if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil - } - - return err + return fmt.Errorf("failed to read index file from source: %w", err) } - defer f.Close() + defer rc.Close() - index, err := apkrepo.IndexFromArchive(f) + index, err := apkrepo.IndexFromArchive(rc) if err != nil { return fmt.Errorf("failed to read apkindex from archive file: %w", err) } @@ -149,11 +146,37 @@ func (idx *Index) LoadIndex(sourceFile string) error { idx.Index.Description = index.Description idx.Index.Packages = append(idx.Index.Packages, index.Packages...) - idx.Logger.Printf("loaded %d/%d packages from index %s", len(idx.Index.Packages), len(index.Packages), sourceFile) + idx.Logger.Printf("loaded %d/%d packages from index %s", len(idx.Index.Packages), len(index.Packages), source) return nil } +func (idx *Index) read(source string) (io.ReadCloser, error) { + var rc io.ReadCloser + if strings.HasPrefix(source, "http://") || strings.HasPrefix(source, "https://") { + url := fmt.Sprintf("%s/%s/APKINDEX.tar.gz", source, idx.ExpectedArch) + resp, err := http.Get(url) //nolint:gosec + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + return nil, fmt.Errorf("GET %s (%d): %s", url, resp.StatusCode, b) + } + rc = resp.Body + } else { + f, err := os.Open(source) + if err != nil { + return nil, fmt.Errorf("opening %q: %w", source, err) + } + rc = f + } + return rc, nil +} + func (idx *Index) UpdateIndex() error { packages := make([]*apkrepo.Package, len(idx.PackageFiles)) var mtx sync.Mutex diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go new file mode 100644 index 000000000..ec29e4561 --- /dev/null +++ b/pkg/index/index_test.go @@ -0,0 +1,17 @@ +package index + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIndex_LoadIndex(t *testing.T) { + idx, err := New(WithExpectedArch("x86_64")) + assert.NoError(t, err) + + err = idx.LoadIndex("https://packages.wolfi.dev/os") + assert.NoError(t, err) + + assert.NotEmpty(t, idx.Index.Packages) +} diff --git a/pkg/manifest/manifest.go b/pkg/manifest/manifest.go index 742666e89..d6e75f65b 100644 --- a/pkg/manifest/manifest.go +++ b/pkg/manifest/manifest.go @@ -60,6 +60,7 @@ func (m *GeneratedMelangeConfig) Write(dir string) error { } ye := yaml.NewEncoder(f) + ye.SetIndent(2) defer ye.Close() if err := ye.Encode(m); err != nil {