-
Notifications
You must be signed in to change notification settings - Fork 82
/
python.go
454 lines (386 loc) · 14.3 KB
/
python.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
// Copyright 2022 Chainguard, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package python
import (
"context"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
apkotypes "chainguard.dev/apko/pkg/build/types"
"chainguard.dev/melange/pkg/config"
githubpkg "chainguard.dev/melange/pkg/convert/github"
"chainguard.dev/melange/pkg/convert/relmon"
"chainguard.dev/melange/pkg/manifest"
"github.com/chainguard-dev/clog"
"github.com/google/go-github/v54/github"
"github.com/pkg/errors"
)
// PythonContext is the execution context for the python subcommand.
type PythonContext struct {
// PackageName is the name of the python package to build and install
PackageName string
// PackageVersion is the version of python package to build and install
PackageVersion string
// PythonVersion is the version of python to build the package against
PythonVersion string
// PackageIndex - Client for talking to pypi
PackageIndex *PackageIndex
// OutDir is the output directory for the generated melange files.
OutDir string
// BaseURIFormat is the base URI which should contain a %s for the
// package name.
BaseURIFormat string
// AdditionalRepositories contains any additional apk repos to add
// to the manifest.
AdditionalRepositories []string
// AdditionalKeyrings contains any additional apk keys to add
// to the manifest.
AdditionalKeyrings []string
// ToGenerate is the map of dependencies that have been visited when the
// transitive dependency list is being calculated.
ToGenerate map[string]Package
// Pypi Package metadata about package
Package Package
// ToCheck is the list of dependencies that have yet to be checked for
// transitive dependencies.
ToCheck []string
// If non-nil, this is the github client to use for fetching metadata
// to get the commit data for the package.
GithubClient *github.Client
// If non-nil, this is the Release Monitoring client to use for fetching
// metadata to get the monitoring data for the package.
MonitoringClient *relmon.MonitorFinder
}
// New initialises a new PythonContext.
func New(packageName string) (PythonContext, error) {
context := PythonContext{
PackageName: packageName,
ToGenerate: make(map[string]Package),
}
return context, nil
}
// Generate is the entrypoint to generate a python package melange file. It handles
// recursively finding all dependencies for a pypi package and generating a melange file
// for each.
func (c *PythonContext) Generate(ctx context.Context) error {
log := clog.FromContext(ctx)
log.Infof("[%s] Generating manifests", c.PackageName)
c.PackageIndex = NewPackageIndex(c.BaseURIFormat)
log.Infof("[%s] Retrieving Package information from %s", c.PackageName, c.PackageIndex.url)
p, err := c.PackageIndex.Get(ctx, c.PackageName, c.PackageVersion)
if err != nil {
log.Infof("error getting latest for package %s - %s ", c.PackageName, err)
return err
}
c.Package = *p
// add self to check to start the find dep tree
c.ToCheck = append(c.ToCheck, p.Info.Name)
// download the package json metadata and find all it's deps
if err := c.findDep(ctx); err != nil {
return err
}
log.Infof("[%s] Generating %v files", c.PackageName, len(c.ToGenerate))
// generate melange files for all dependencies
for m, pack := range c.ToGenerate {
log.Infof("[%s] Index %v Package %v ", pack.Info.Name, m, pack.Info.Name)
log.Infof("[%s] Create manifest", pack.Info.Name)
version := pack.Info.Version
// if were generating the package asked for , check the version wasn't specified
if c.PackageName == pack.Info.Name && c.PackageVersion != "" {
version = c.PackageVersion
}
ghVersions := []githubpkg.TagData{}
var relmon *relmon.Item
if c.GithubClient != nil {
log.Infof("Trying to get commit data for %s", pack.Info.Name)
// If we have a github client, then try to get the commit data.
githubURL := pack.Info.GetSourceURL()
if githubURL != "" {
log.Infof("[%s] Using github URL %s for %s", pack.Info.Name, githubURL, pack.Info.Name)
owner, repo, err := githubpkg.ParseGithubURL(githubURL)
if err != nil {
log.Infof("error parsing github url %s - %s ", githubURL, err)
} else {
client := githubpkg.NewGithubRepoClient(c.GithubClient, owner, repo)
versions, err := client.GetVersions(ctx, version)
if err != nil {
log.Infof("error getting versions for %s - %s ", pack.Info.Name, err)
}
// This is fine in error case, since it's nothing.
for _, version := range versions {
log.Infof("[%s] got github version: %+v\n", pack.Info.Name, version)
}
ghVersions = versions
}
}
}
// If the release monitoring client has been configured, see if we can
// fetch the data for this package.
if c.MonitoringClient != nil {
monitoring, err := c.MonitoringClient.FindMonitor(ctx, pack.Info.Name)
if err != nil {
log.Errorf("Failed to find monitoring: %v\n", err)
return err
} else {
log.Errorf("Found monitoring: %+v\n", monitoring)
relmon = monitoring
}
}
generated, err := c.generateManifest(ctx, pack, version, ghVersions, relmon)
if err != nil {
log.Infof("[%s] FAILED TO CREATE MANIFEST %v", pack.Info.Name, err)
return err
}
err = generated.Write(ctx, c.OutDir)
if err != nil {
log.Infof("[%s] FAILED TO WRITE MANIFEST %v", pack.Info.Name, err)
return err
}
}
return nil
}
func stripDep(dep string) (string, error) {
// removing all the special chars from the requirements like "importlib-metadata (>=3.6.0) ; python_version < \"3.10\""
re := regexp.MustCompile(`[;()\[\]!~=<>]`)
dep = re.ReplaceAllString(dep, " ")
depStrip := strings.Split(dep, " ")
return depStrip[0], nil
}
// FindDep - given a python package retrieve all its dependencies
func (c *PythonContext) findDep(ctx context.Context) error {
log := clog.FromContext(ctx)
if len(c.ToCheck) == 0 {
return nil
}
log.Infof("[%s] Check Dependency list: %v", c.PackageName, c.ToCheck)
log.Infof("[%s] Fetch Package Data", c.ToCheck[0])
p, err := c.PackageIndex.GetLatest(ctx, c.ToCheck[0])
if err != nil {
return err
}
log.Infof("[%s] %s Add to generate list", c.ToCheck[0], p.Info.Name)
c.ToCheck = c.ToCheck[1:]
log.Infof("[%s] Check for dependencies", p.Info.Name)
if len(p.Info.RequiresDist) == 0 {
log.Infof("[%s] Searching source for dependencies", p.Info.Name)
err := c.PackageIndex.CheckSourceDeps(p.Info.Name)
if err != nil {
return err
}
}
// need to find dep here, then cycle through recursively
for _, dep := range p.Info.RequiresDist {
// Removing all the extras from requirements
if strings.Contains(dep, "extra") {
continue
}
dep, err = stripDep(dep)
if err != nil {
return err
}
p.Dependencies = append(p.Dependencies, "py"+c.PythonVersion+"-"+dep)
// if dep is not already visited then check if it has deps
_, found := c.ToGenerate[dep]
if !found {
c.ToCheck = append(c.ToCheck, dep)
}
}
if _, err := os.Stat(filepath.Join(c.OutDir, "py3-"+p.Info.Name+".yaml")); err == nil {
// Package already exists, so skip it.
// We may still need to crawl its deps though.
log.Infof("[%s] Package already exists, skipping", p.Info.Name)
} else {
c.ToGenerate[p.Info.Name] = *p
}
log.Infof("[%s] %v Number of deps", p.Info.Name, len(p.Dependencies))
// recursive call
return c.findDep(ctx)
}
func (c *PythonContext) generateManifest(ctx context.Context, pack Package, version string, ghVersions []githubpkg.TagData, monitorInfo *relmon.Item) (manifest.GeneratedMelangeConfig, error) {
// The actual generated manifest struct
generated := manifest.GeneratedMelangeConfig{}
// Generate each field in the manifest
generated.GeneratedFromComment = pack.Info.ProjectURL
generated.Package = c.generatePackage(ctx, pack, version)
generated.Environment = c.generateEnvironment(ctx, pack)
pipelines, err := c.generatePipeline(ctx, pack, version, ghVersions)
if err != nil {
return manifest.GeneratedMelangeConfig{}, err
}
generated.Pipeline = pipelines
// If the release monitoring has been filled, add an Update block for it.
if monitorInfo != nil {
generated.Update = config.Update{
Enabled: true,
ReleaseMonitor: &config.ReleaseMonitor{
Identifier: monitorInfo.ID,
},
}
} else if len(ghVersions) > 0 {
// HACKITY HACK. Check if we found a latest release, and if we did,
// then do not add UseTags==true, since we want to use releases.
hasReleases := false
for _, v := range ghVersions {
if v.IsLatest {
hasReleases = true
}
}
// Just use the first version to extract the stuff we need.
v := ghVersions[0]
// We already parsed this earlier, so this absolutely should not fail.
owner, repo, err := githubpkg.ParseGithubURL(v.Repo)
if err != nil {
return manifest.GeneratedMelangeConfig{}, fmt.Errorf("failed to parse github URL %s : %w", v.Repo, err)
}
// Set up the update block to use the GitHub API
generated.Update = config.Update{
Enabled: true,
GitHubMonitor: &config.GitHubMonitor{
Identifier: owner + "/" + repo,
},
}
if !hasReleases {
generated.Update.GitHubMonitor.StripPrefix = v.TagPrefix
if v.TagPrefix != "" {
generated.Update.GitHubMonitor.UseTags = true
}
}
}
return generated, nil
}
// generatePackage handles generating the Package field of the melange manifest
//
// It will iterate through all licenses returned by rubygems.org and place them
// under the copyright section.
func (c *PythonContext) generatePackage(ctx context.Context, pack Package, version string) config.Package {
log := clog.FromContext(ctx)
log.Infof("[%s] Generate Package", pack.Info.Name)
log.Infof("[%s] Run time Deps %v", pack.Info.Name, pack.Dependencies)
pack.Dependencies = append(pack.Dependencies, "python-"+c.PythonVersion)
pkg := config.Package{
Name: fmt.Sprintf("py%s-%s", c.PythonVersion, pack.Info.Name),
Version: version,
Epoch: 0,
Description: pack.Info.Summary,
Copyright: []config.Copyright{},
Dependencies: config.Dependencies{
Runtime: pack.Dependencies,
},
}
pkg.Copyright = append(pkg.Copyright, config.Copyright{
License: pack.Info.License,
})
return pkg
}
// generateEnvironment handles generating the Environment field of the melange manifest
//
// It will handle adding any extra repositories and keyrings to the manifest.
func (c *PythonContext) generateEnvironment(ctx context.Context, pack Package) apkotypes.ImageConfiguration {
log := clog.FromContext(ctx)
log.Infof("[%s] Generate Environment", pack.Info.Name)
pythonStandard := []string{
"build-base",
"busybox",
"ca-certificates-bundle",
"wolfi-base",
}
env := apkotypes.ImageConfiguration{
Contents: apkotypes.ImageContents{
Packages: pythonStandard,
},
}
return env
}
// generatePipeline handles generating the Pipeline field of the melange manifest
//
// It currently consists of three pipelines
// 1. fetch - fetches the artifact. NOTE: There can be multiple of these in
// case there are multiple versions that we find. Seems safest to let the
// human decide which one to use.
// 2. patch - generates the patch pipeline in case it's needed
// 3. runs - runs the actual build and install
//
// The sha256 of the artifact should be generated automatically. If the
// generation fails for any reason it will spit logs and place a default string
// in the manifest and move on.
func (c *PythonContext) generatePipeline(ctx context.Context, pack Package, version string, ghVersions []githubpkg.TagData) ([]config.Pipeline, error) {
log := clog.FromContext(ctx)
var pipeline []config.Pipeline
log.Infof("[%s] Generate Pipeline for version %s", pack.Info.Name, version)
// This uses the ftp method to get the package, but if we were configured
// and able to fetch GitHub versions, then we should use those instead.
if len(ghVersions) == 0 {
releases, ok := pack.Releases[version]
// If the key exists
if !ok {
return pipeline, fmt.Errorf("package version %s was not in releases for %s", version, pack.Info.Name)
}
var release Release
for _, r := range releases {
if r.PackageType == "sdist" {
release = r
}
}
if release.URL == "" {
return pipeline, errors.New("could not find any sdist package in available releases")
}
releaseURL := release.URL
uri := strings.ReplaceAll(releaseURL, version, "${{package.version}}")
if strings.Contains(release.URL, "https://files.pythonhosted.org") {
packageName := strings.TrimPrefix(pack.Info.Name, fmt.Sprintf("py%s", release.PythonVersion))
releaseURL = fmt.Sprintf("https://files.pythonhosted.org/packages/source/%c/%s/%s-%s.tar.gz", packageName[0], packageName, packageName, version)
uri = strings.ReplaceAll(releaseURL, version, "${{package.version}}")
}
artifact256SHA, err := c.PackageIndex.Client.GetArtifactSHA256(ctx, releaseURL)
if err != nil {
log.Infof("[%s] SHA256 Generation FAILED. %v", pack.Info.Name, err)
log.Infof("[%s] Or try 'curl %s' to check out the API", pack.Info.Name, pack.Info.DownloadURL)
artifact256SHA = fmt.Sprintf("FAILED GENERATION. Investigate by going to %s", pack.Info.ProjectURL)
}
if artifact256SHA != release.Digest.Sha256 {
return pipeline, fmt.Errorf("artifact 256SHA %s did not match Package data SHA256 %s",
artifact256SHA, release.Digest.Sha256)
}
fetch := config.Pipeline{
Uses: "fetch",
With: map[string]string{
"uri": uri,
"expected-sha256": artifact256SHA,
},
}
pipeline = append(pipeline, fetch)
}
// Add all the github versions to the fetch pipeline.
for _, ghVersion := range ghVersions {
pipeline = append(pipeline, config.Pipeline{
Uses: "git-checkout",
With: map[string]string{
"repository": ghVersion.Repo,
"tag": ghVersion.TagPrefix + "${{package.version}}",
"expected-commit": ghVersion.SHA,
}})
}
pythonBuild := config.Pipeline{
Name: "Python Build",
Uses: "python/build-wheel",
}
strip := config.Pipeline{
Uses: "strip",
}
pipeline = append(pipeline, pythonBuild)
pipeline = append(pipeline, strip)
return pipeline, nil
}