From 2614c5ea21a513df2a188293b89f37a69a0a18d3 Mon Sep 17 00:00:00 2001 From: fuxiaohei Date: Fri, 2 Jun 2023 18:31:06 +0800 Subject: [PATCH 1/5] artifacts support multiple files uploading --- models/actions/artifact.go | 37 ++- routers/api/actions/artifacts.go | 355 ++++++------------------ routers/api/actions/artifacts_chunks.go | 184 ++++++++++++ routers/api/actions/artifacts_utils.go | 82 ++++++ 4 files changed, 371 insertions(+), 287 deletions(-) create mode 100644 routers/api/actions/artifacts_chunks.go create mode 100644 routers/api/actions/artifacts_utils.go diff --git a/models/actions/artifact.go b/models/actions/artifact.go index 1b45fce0673bc..e7f55926b7840 100644 --- a/models/actions/artifact.go +++ b/models/actions/artifact.go @@ -31,7 +31,7 @@ func init() { // ActionArtifact is a file that is stored in the artifact storage. type ActionArtifact struct { ID int64 `xorm:"pk autoincr"` - RunID int64 `xorm:"index UNIQUE(runid_name)"` // The run id of the artifact + RunID int64 `xorm:"index index(runid_name_path)"` // The run id of the artifact RunnerID int64 RepoID int64 `xorm:"index"` OwnerID int64 @@ -40,27 +40,28 @@ type ActionArtifact struct { FileSize int64 // The size of the artifact in bytes FileCompressedSize int64 // The size of the artifact in bytes after gzip compression ContentEncoding string // The content encoding of the artifact - ArtifactPath string // The path to the artifact when runner uploads it - ArtifactName string `xorm:"UNIQUE(runid_name)"` // The name of the artifact when runner uploads it - Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete + ArtifactPath string `xorm:"index index(runid_name_path)"` // The path to the artifact when runner uploads it + ArtifactName string `xorm:"index index(runid_name_path)"` // The name of the artifact when runner uploads it + Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete CreatedUnix timeutil.TimeStamp `xorm:"created"` UpdatedUnix timeutil.TimeStamp `xorm:"updated index"` } -// CreateArtifact create a new artifact with task info or get same named artifact in the same run -func CreateArtifact(ctx context.Context, t *ActionTask, artifactName string) (*ActionArtifact, error) { +func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string) (*ActionArtifact, error) { if err := t.LoadJob(ctx); err != nil { return nil, err } - artifact, err := getArtifactByArtifactName(ctx, t.Job.RunID, artifactName) + artifact, err := getArtifactByNameAndPath(ctx, t.Job.RunID, artifactName, artifactPath) if errors.Is(err, util.ErrNotExist) { artifact := &ActionArtifact{ - RunID: t.Job.RunID, - RunnerID: t.RunnerID, - RepoID: t.RepoID, - OwnerID: t.OwnerID, - CommitSHA: t.CommitSHA, - Status: ArtifactStatusUploadPending, + ArtifactName: artifactName, + ArtifactPath: artifactPath, + RunID: t.Job.RunID, + RunnerID: t.RunnerID, + RepoID: t.RepoID, + OwnerID: t.OwnerID, + CommitSHA: t.CommitSHA, + Status: ArtifactStatusUploadPending, } if _, err := db.GetEngine(ctx).Insert(artifact); err != nil { return nil, err @@ -72,9 +73,9 @@ func CreateArtifact(ctx context.Context, t *ActionTask, artifactName string) (*A return artifact, nil } -func getArtifactByArtifactName(ctx context.Context, runID int64, name string) (*ActionArtifact, error) { +func getArtifactByNameAndPath(ctx context.Context, runID int64, name, fpath string) (*ActionArtifact, error) { var art ActionArtifact - has, err := db.GetEngine(ctx).Where("run_id = ? AND artifact_name = ?", runID, name).Get(&art) + has, err := db.GetEngine(ctx).Where("run_id = ? AND artifact_name = ? AND artifact_path = ?", runID, name, fpath).Get(&art) if err != nil { return nil, err } else if !has { @@ -120,3 +121,9 @@ func ListArtifactsByRepoID(ctx context.Context, repoID int64) ([]*ActionArtifact arts := make([]*ActionArtifact, 0, 10) return arts, db.GetEngine(ctx).Where("repo_id=?", repoID).Find(&arts) } + +// ListArtifactsByRunIDAndName returns artifacts by name of a run +func ListArtifactsByRunIDAndName(ctx context.Context, runID int64, name string) ([]*ActionArtifact, error) { + arts := make([]*ActionArtifact, 0, 10) + return arts, db.GetEngine(ctx).Where("run_id=? AND artifact_name=?", runID, name).Find(&arts) +} diff --git a/routers/api/actions/artifacts.go b/routers/api/actions/artifacts.go index 4b10cd7ad1108..c570e0556e239 100644 --- a/routers/api/actions/artifacts.go +++ b/routers/api/actions/artifacts.go @@ -62,17 +62,12 @@ package actions // import ( - "compress/gzip" "crypto/md5" - "encoding/base64" "errors" "fmt" - "io" "net/http" - "sort" "strconv" "strings" - "time" "code.gitea.io/gitea/models/actions" "code.gitea.io/gitea/modules/context" @@ -84,11 +79,6 @@ import ( "code.gitea.io/gitea/modules/web" ) -const ( - artifactXTfsFileLengthHeader = "x-tfs-filelength" - artifactXActionsResultsMD5Header = "x-actions-results-md5" -) - const artifactRouteBase = "/_apis/pipelines/workflows/{run_id}/artifacts" type artifactContextKeyType struct{} @@ -120,11 +110,10 @@ func ArtifactsRoutes(prefix string) *web.Route { // retrieve, list and confirm artifacts m.Combo("").Get(r.listArtifacts).Post(r.getUploadArtifactURL).Patch(r.comfirmUploadArtifact) // handle container artifacts list and download - m.Group("/{artifact_id}", func() { - m.Put("/upload", r.uploadArtifact) - m.Get("/path", r.getDownloadArtifactURL) - m.Get("/download", r.downloadArtifact) - }) + m.Put("/{artifact_hash}/upload", r.uploadArtifact) + // handle artifacts download + m.Get("/download/path", r.getDownloadArtifactURL) + m.Get("/{artifact_id}/download", r.downloadArtifact) }) return m @@ -172,10 +161,10 @@ type artifactRoutes struct { fs storage.ObjectStorage } -func (ar artifactRoutes) buildArtifactURL(runID, artifactID int64, suffix string) string { +func (ar artifactRoutes) buildArtifactURL(runID int64, artifactHash, suffix string) string { uploadURL := strings.TrimSuffix(setting.AppURL, "/") + strings.TrimSuffix(ar.prefix, "/") + strings.ReplaceAll(artifactRouteBase, "{run_id}", strconv.FormatInt(runID, 10)) + - "/" + strconv.FormatInt(artifactID, 10) + "/" + suffix + "/" + artifactHash + "/" + suffix return uploadURL } @@ -188,20 +177,9 @@ type getUploadArtifactResponse struct { FileContainerResourceURL string `json:"fileContainerResourceUrl"` } -func (ar artifactRoutes) validateRunID(ctx *ArtifactContext) (*actions.ActionTask, int64, bool) { - task := ctx.ActionTask - runID := ctx.ParamsInt64("run_id") - if task.Job.RunID != runID { - log.Error("Error runID not match") - ctx.Error(http.StatusBadRequest, "run-id does not match") - return nil, 0, false - } - return task, runID, true -} - // getUploadArtifactURL generates a URL for uploading an artifact func (ar artifactRoutes) getUploadArtifactURL(ctx *ArtifactContext) { - task, runID, ok := ar.validateRunID(ctx) + _, runID, ok := validateRunID(ctx) if !ok { return } @@ -213,131 +191,59 @@ func (ar artifactRoutes) getUploadArtifactURL(ctx *ArtifactContext) { return } - artifact, err := actions.CreateArtifact(ctx, task, req.Name) - if err != nil { - log.Error("Error creating artifact: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) - return - } + // use md5(artifact_name) to create upload url + artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(req.Name))) resp := getUploadArtifactResponse{ - FileContainerResourceURL: ar.buildArtifactURL(runID, artifact.ID, "upload"), + FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"), } - log.Debug("[artifact] get upload url: %s, artifact id: %d", resp.FileContainerResourceURL, artifact.ID) + log.Debug("[artifact] get upload url: %s", resp.FileContainerResourceURL) ctx.JSON(http.StatusOK, resp) } -// getUploadFileSize returns the size of the file to be uploaded. -// The raw size is the size of the file as reported by the header X-TFS-FileLength. -func (ar artifactRoutes) getUploadFileSize(ctx *ArtifactContext) (int64, int64, error) { - contentLength := ctx.Req.ContentLength - xTfsLength, _ := strconv.ParseInt(ctx.Req.Header.Get(artifactXTfsFileLengthHeader), 10, 64) - if xTfsLength > 0 { - return xTfsLength, contentLength, nil - } - return contentLength, contentLength, nil -} - -func (ar artifactRoutes) saveUploadChunk(ctx *ArtifactContext, - artifact *actions.ActionArtifact, - contentSize, runID int64, -) (int64, error) { - contentRange := ctx.Req.Header.Get("Content-Range") - start, end, length := int64(0), int64(0), int64(0) - if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil { - return -1, fmt.Errorf("parse content range error: %v", err) - } - - storagePath := fmt.Sprintf("tmp%d/%d-%d-%d.chunk", runID, artifact.ID, start, end) - - // use io.TeeReader to avoid reading all body to md5 sum. - // it writes data to hasher after reading end - // if hash is not matched, delete the read-end result - hasher := md5.New() - r := io.TeeReader(ctx.Req.Body, hasher) - - // save chunk to storage - writtenSize, err := ar.fs.Save(storagePath, r, -1) - if err != nil { - return -1, fmt.Errorf("save chunk to storage error: %v", err) - } - - // check md5 - reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header) - chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) - log.Debug("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String) - if reqMd5String != chunkMd5String || writtenSize != contentSize { - if err := ar.fs.Delete(storagePath); err != nil { - log.Error("Error deleting chunk: %s, %v", storagePath, err) - } - return -1, fmt.Errorf("md5 not match") - } - - log.Debug("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d", - storagePath, contentSize, artifact.ID, start, end) - - return length, nil -} - -// The rules are from https://github.com/actions/toolkit/blob/main/packages/artifact/src/internal/path-and-artifact-name-validation.ts#L32 -var invalidArtifactNameChars = strings.Join([]string{"\\", "/", "\"", ":", "<", ">", "|", "*", "?", "\r", "\n"}, "") - func (ar artifactRoutes) uploadArtifact(ctx *ArtifactContext) { - _, runID, ok := ar.validateRunID(ctx) + task, runID, ok := validateRunID(ctx) if !ok { return } - artifactID := ctx.ParamsInt64("artifact_id") - - artifact, err := actions.GetArtifactByID(ctx, artifactID) - if errors.Is(err, util.ErrNotExist) { - log.Error("Error getting artifact: %v", err) - ctx.Error(http.StatusNotFound, err.Error()) - return - } else if err != nil { - log.Error("Error getting artifact: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) + artifactName, artifactPath, ok := parseArtifactItemPath(ctx) + if !ok { return } - // itemPath is generated from upload-artifact action - // it's formatted as {artifact_name}/{artfict_path_in_runner} - itemPath := util.PathJoinRel(ctx.Req.URL.Query().Get("itemPath")) - artifactName := strings.Split(itemPath, "/")[0] - - // checkArtifactName checks if the artifact name contains invalid characters. - // If the name contains invalid characters, an error is returned. - if strings.ContainsAny(artifactName, invalidArtifactNameChars) { - log.Error("Error checking artifact name contains invalid character") - ctx.Error(http.StatusBadRequest, err.Error()) + // get upload file size + fileRealTotalSize, contentLength, err := getUploadFileSize(ctx) + if err != nil { + log.Error("Error get upload file size: %v", err) + ctx.Error(http.StatusInternalServerError, "Error get upload file size") return } - // get upload file size - fileSize, contentLength, err := ar.getUploadFileSize(ctx) + // create or get artifact with name and path + artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath) if err != nil { - log.Error("Error getting upload file size: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) + log.Error("Error create or get artifact: %v", err) + ctx.Error(http.StatusInternalServerError, "Error create or get artifact") return } - // save chunk - chunkAllLength, err := ar.saveUploadChunk(ctx, artifact, contentLength, runID) + // save chunk to storage, if success, return chunk stotal size + // if artifact is not gzip when uploading, chunksTotalSize == fileRealTotalSize + // if artifact is gzip when uploading, chunksTotalSize < fileRealTotalSize + chunksTotalSize, err := saveUploadChunk(ar.fs, ctx, artifact, contentLength, runID) if err != nil { - log.Error("Error saving upload chunk: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) + log.Error("Error save upload chunk: %v", err) + ctx.Error(http.StatusInternalServerError, "Error save upload chunk") return } - // if artifact name is not set, update it - if artifact.ArtifactName == "" { - artifact.ArtifactName = artifactName - artifact.ArtifactPath = itemPath // path in container - artifact.FileSize = fileSize // this is total size of all chunks - artifact.FileCompressedSize = chunkAllLength + // update artifact size if zero + if artifact.FileSize == 0 || artifact.FileCompressedSize == 0 { + artifact.FileSize = fileRealTotalSize + artifact.FileCompressedSize = chunksTotalSize artifact.ContentEncoding = ctx.Req.Header.Get("Content-Encoding") if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { - log.Error("Error updating artifact: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) + log.Error("Error update artifact: %v", err) + ctx.Error(http.StatusInternalServerError, "Error update artifact") return } } @@ -350,135 +256,21 @@ func (ar artifactRoutes) uploadArtifact(ctx *ArtifactContext) { // comfirmUploadArtifact comfirm upload artifact. // if all chunks are uploaded, merge them to one file. func (ar artifactRoutes) comfirmUploadArtifact(ctx *ArtifactContext) { - _, runID, ok := ar.validateRunID(ctx) + _, runID, ok := validateRunID(ctx) if !ok { return } - if err := ar.mergeArtifactChunks(ctx, runID); err != nil { - log.Error("Error merging chunks: %v", err) - ctx.Error(http.StatusInternalServerError, err.Error()) + artifactName := ctx.Req.URL.Query().Get("artifactName") + if err := mergeChunksForRun(ctx, ar.fs, runID, artifactName); err != nil { + log.Error("Error merge chunks: %v", err) + ctx.Error(http.StatusInternalServerError, "Error merge chunks") return } - ctx.JSON(http.StatusOK, map[string]string{ "message": "success", }) } -type chunkItem struct { - ArtifactID int64 - Start int64 - End int64 - Path string -} - -func (ar artifactRoutes) mergeArtifactChunks(ctx *ArtifactContext, runID int64) error { - storageDir := fmt.Sprintf("tmp%d", runID) - var chunks []*chunkItem - if err := ar.fs.IterateObjects(storageDir, func(path string, obj storage.Object) error { - item := chunkItem{Path: path} - if _, err := fmt.Sscanf(path, storageDir+"/%d-%d-%d.chunk", &item.ArtifactID, &item.Start, &item.End); err != nil { - return fmt.Errorf("parse content range error: %v", err) - } - chunks = append(chunks, &item) - return nil - }); err != nil { - return err - } - // group chunks by artifact id - chunksMap := make(map[int64][]*chunkItem) - for _, c := range chunks { - chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c) - } - - for artifactID, cs := range chunksMap { - // get artifact to handle merged chunks - artifact, err := actions.GetArtifactByID(ctx, cs[0].ArtifactID) - if err != nil { - return fmt.Errorf("get artifact error: %v", err) - } - - sort.Slice(cs, func(i, j int) bool { - return cs[i].Start < cs[j].Start - }) - - allChunks := make([]*chunkItem, 0) - startAt := int64(-1) - // check if all chunks are uploaded and in order and clean repeated chunks - for _, c := range cs { - // startAt is -1 means this is the first chunk - // previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order - // StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing - if c.Start == (startAt + 1) { - allChunks = append(allChunks, c) - startAt = c.End - } - } - - // if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely - if startAt+1 != artifact.FileCompressedSize { - log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifactID) - break - } - - // use multiReader - readers := make([]io.Reader, 0, len(allChunks)) - closeReaders := func() { - for _, r := range readers { - _ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function - } - readers = nil - } - defer closeReaders() - - for _, c := range allChunks { - var readCloser io.ReadCloser - if readCloser, err = ar.fs.Open(c.Path); err != nil { - return fmt.Errorf("open chunk error: %v, %s", err, c.Path) - } - readers = append(readers, readCloser) - } - mergedReader := io.MultiReader(readers...) - - // if chunk is gzip, decompress it - if artifact.ContentEncoding == "gzip" { - var err error - mergedReader, err = gzip.NewReader(mergedReader) - if err != nil { - return fmt.Errorf("gzip reader error: %v", err) - } - } - - // save merged file - storagePath := fmt.Sprintf("%d/%d/%d.chunk", runID%255, artifactID%255, time.Now().UnixNano()) - written, err := ar.fs.Save(storagePath, mergedReader, -1) - if err != nil { - return fmt.Errorf("save merged file error: %v", err) - } - if written != artifact.FileSize { - return fmt.Errorf("merged file size is not equal to chunk length") - } - - // save storage path to artifact - log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath) - artifact.StoragePath = storagePath - artifact.Status = actions.ArtifactStatusUploadConfirmed - if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { - return fmt.Errorf("update artifact error: %v", err) - } - - closeReaders() // close before delete - - // drop chunks - for _, c := range cs { - if err := ar.fs.Delete(c.Path); err != nil { - return fmt.Errorf("delete chunk file error: %v", err) - } - } - } - return nil -} - type ( listArtifactsResponse struct { Count int64 `json:"count"` @@ -491,7 +283,7 @@ type ( ) func (ar artifactRoutes) listArtifacts(ctx *ArtifactContext) { - _, runID, ok := ar.validateRunID(ctx) + _, runID, ok := validateRunID(ctx) if !ok { return } @@ -502,17 +294,19 @@ func (ar artifactRoutes) listArtifacts(ctx *ArtifactContext) { ctx.Error(http.StatusInternalServerError, err.Error()) return } + if len(artifacts) == 0 { + log.Debug("[artifact] handleListArtifacts, no artifacts") + ctx.Error(http.StatusNotFound) + return + } - artficatsData := make([]listArtifactsResponseItem, 0, len(artifacts)) - for _, a := range artifacts { - artficatsData = append(artficatsData, listArtifactsResponseItem{ - Name: a.ArtifactName, - FileContainerResourceURL: ar.buildArtifactURL(runID, a.ID, "path"), - }) + item := listArtifactsResponseItem{ + Name: artifacts[0].ArtifactName, + FileContainerResourceURL: ar.buildArtifactURL(runID, "download", "path"), } respData := listArtifactsResponse{ - Count: int64(len(artficatsData)), - Value: artficatsData, + Count: 1, + Value: []listArtifactsResponseItem{item}, } ctx.JSON(http.StatusOK, respData) } @@ -528,37 +322,52 @@ type ( } ) +// getDownloadArtifactURL generates download url for each artifact func (ar artifactRoutes) getDownloadArtifactURL(ctx *ArtifactContext) { - _, runID, ok := ar.validateRunID(ctx) + _, runID, ok := validateRunID(ctx) if !ok { return } - artifactID := ctx.ParamsInt64("artifact_id") - artifact, err := actions.GetArtifactByID(ctx, artifactID) - if errors.Is(err, util.ErrNotExist) { - log.Error("Error getting artifact: %v", err) - ctx.Error(http.StatusNotFound, err.Error()) - return - } else if err != nil { - log.Error("Error getting artifact: %v", err) + artifacts, err := actions.ListArtifactsByRunID(ctx, runID) + if err != nil { + log.Error("Error getting artifacts: %v", err) ctx.Error(http.StatusInternalServerError, err.Error()) return } - downloadURL := ar.buildArtifactURL(runID, artifact.ID, "download") + if len(artifacts) == 0 { + log.Debug("[artifact] getDownloadArtifactURL, no artifacts") + ctx.Error(http.StatusNotFound) + return + } + itemPath := util.PathJoinRel(ctx.Req.URL.Query().Get("itemPath")) - respData := downloadArtifactResponse{ - Value: []downloadArtifactResponseItem{{ + if itemPath != artifacts[0].ArtifactName { + log.Error("Error dismatch artifact name, itemPath: %v, artifact: %v", itemPath, artifacts[0].ArtifactName) + ctx.Error(http.StatusBadRequest, "Error dismatch artifact name") + return + } + + var items []downloadArtifactResponseItem + for _, artifact := range artifacts { + downloadURL := ar.buildArtifactURL(runID, strconv.FormatInt(artifact.ID, 10), "download") + item := downloadArtifactResponseItem{ Path: util.PathJoinRel(itemPath, artifact.ArtifactPath), ItemType: "file", ContentLocation: downloadURL, - }}, + } + log.Debug("[artifact] getDownloadArtifactURL, path: %s, url: %s", item.Path, item.ContentLocation) + items = append(items, item) + } + respData := downloadArtifactResponse{ + Value: items, } ctx.JSON(http.StatusOK, respData) } +// downloadArtifact downloads artifact content func (ar artifactRoutes) downloadArtifact(ctx *ArtifactContext) { - _, runID, ok := ar.validateRunID(ctx) + _, runID, ok := validateRunID(ctx) if !ok { return } @@ -588,9 +397,11 @@ func (ar artifactRoutes) downloadArtifact(ctx *ArtifactContext) { } defer fd.Close() - if strings.HasSuffix(artifact.ArtifactPath, ".gz") { + // if artifact is compressed, set content-encoding header to gzip + if artifact.ContentEncoding == "gzip" { ctx.Resp.Header().Set("Content-Encoding", "gzip") } + log.Debug("[artifact] downloadArtifact, name: %s, path: %s, storage: %s, size: %d", artifact.ArtifactName, artifact.ArtifactPath, artifact.StoragePath, artifact.FileSize) ctx.ServeContent(fd, &context.ServeHeaderOptions{ Filename: artifact.ArtifactName, LastModified: artifact.CreatedUnix.AsLocalTime(), diff --git a/routers/api/actions/artifacts_chunks.go b/routers/api/actions/artifacts_chunks.go new file mode 100644 index 0000000000000..1a03fd99f63b9 --- /dev/null +++ b/routers/api/actions/artifacts_chunks.go @@ -0,0 +1,184 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package actions + +import ( + "crypto/md5" + "encoding/base64" + "fmt" + "io" + "sort" + "time" + + "code.gitea.io/gitea/models/actions" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/storage" +) + +func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext, + artifact *actions.ActionArtifact, + contentSize, runID int64, +) (int64, error) { + // parse content-range header, format: bytes 0-1023/146515 + contentRange := ctx.Req.Header.Get("Content-Range") + start, end, length := int64(0), int64(0), int64(0) + if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil { + return -1, fmt.Errorf("parse content range error: %v", err) + } + // build chunk store path + storagePath := fmt.Sprintf("tmp%d/%d-%d-%d.chunk", runID, artifact.ID, start, end) + // use io.TeeReader to avoid reading all body to md5 sum. + // it writes data to hasher after reading end + // if hash is not matched, delete the read-end result + hasher := md5.New() + r := io.TeeReader(ctx.Req.Body, hasher) + // save chunk to storage + writtenSize, err := st.Save(storagePath, r, -1) + if err != nil { + return -1, fmt.Errorf("save chunk to storage error: %v", err) + } + // check md5 + reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header) + chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String) + // if md5 not match, delete the chunk + if reqMd5String != chunkMd5String || writtenSize != contentSize { + if err := st.Delete(storagePath); err != nil { + log.Error("Error deleting chunk: %s, %v", storagePath, err) + } + return -1, fmt.Errorf("md5 not match") + } + log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d", + storagePath, contentSize, artifact.ID, start, end) + // return chunk total size + return length, nil +} + +type chunkFileItem struct { + ArtifactID int64 + Start int64 + End int64 + Path string +} + +func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) { + storageDir := fmt.Sprintf("tmp%d", runID) + var chunks []*chunkFileItem + if err := st.IterateObjects(storageDir, func(path string, obj storage.Object) error { + item := chunkFileItem{Path: path} + if _, err := fmt.Sscanf(path, storageDir+"/%d-%d-%d.chunk", &item.ArtifactID, &item.Start, &item.End); err != nil { + return fmt.Errorf("parse content range error: %v", err) + } + chunks = append(chunks, &item) + return nil + }); err != nil { + return nil, err + } + // chunks group by artifact id + chunksMap := make(map[int64][]*chunkFileItem) + for _, c := range chunks { + chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c) + } + return chunksMap, nil +} + +func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error { + // read all db artifacts by name + artifacts, err := actions.ListArtifactsByRunIDAndName(ctx, runID, artifactName) + if err != nil { + return err + } + // read all uploading chunks from storage + chunksMap, err := listChunksByRunID(st, runID) + if err != nil { + return err + } + // range db artifacts to merge chunks + for _, art := range artifacts { + chunks, ok := chunksMap[art.ID] + if !ok { + return fmt.Errorf("artifact %d chunks not found", art.ID) + } + if err := mergeChunksForArtifact(ctx, chunks, st, art); err != nil { + return err + } + } + return nil +} + +func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact) error { + sort.Slice(chunks, func(i, j int) bool { + return chunks[i].Start < chunks[j].Start + }) + allChunks := make([]*chunkFileItem, 0) + startAt := int64(-1) + // check if all chunks are uploaded and in order and clean repeated chunks + for _, c := range chunks { + // startAt is -1 means this is the first chunk + // previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order + // StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing + if c.Start == (startAt + 1) { + allChunks = append(allChunks, c) + startAt = c.End + } + } + // if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely + if startAt+1 != artifact.FileCompressedSize { + log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID) + return nil + } + // use multiReader + readers := make([]io.Reader, 0, len(allChunks)) + closeReaders := func() { + for _, r := range readers { + _ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function + } + readers = nil + } + defer closeReaders() + for _, c := range allChunks { + var readCloser io.ReadCloser + var err error + if readCloser, err = st.Open(c.Path); err != nil { + return fmt.Errorf("open chunk error: %v, %s", err, c.Path) + } + readers = append(readers, readCloser) + } + mergedReader := io.MultiReader(readers...) + + // if chunk is gzip, use gz as extension + // download-artifact action will use content-encoding header to decide if it should decompress the file + extension := "chunk" + if artifact.ContentEncoding == "gzip" { + extension = "chunk.gz" + } + + // save merged file + storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension) + written, err := st.Save(storagePath, mergedReader, -1) + if err != nil { + return fmt.Errorf("save merged file error: %v", err) + } + if written != artifact.FileCompressedSize { + return fmt.Errorf("merged file size is not equal to chunk length") + } + + // save storage path to artifact + log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath) + artifact.StoragePath = storagePath + artifact.Status = actions.ArtifactStatusUploadConfirmed + if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { + return fmt.Errorf("update artifact error: %v", err) + } + + closeReaders() // close before delete + + // drop chunks + for _, c := range chunks { + if err := st.Delete(c.Path); err != nil { + return fmt.Errorf("delete chunk file error: %v", err) + } + } + return nil +} diff --git a/routers/api/actions/artifacts_utils.go b/routers/api/actions/artifacts_utils.go new file mode 100644 index 0000000000000..4c939348622b7 --- /dev/null +++ b/routers/api/actions/artifacts_utils.go @@ -0,0 +1,82 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package actions + +import ( + "crypto/md5" + "fmt" + "net/http" + "strconv" + "strings" + + "code.gitea.io/gitea/models/actions" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" +) + +const ( + artifactXTfsFileLengthHeader = "x-tfs-filelength" + artifactXActionsResultsMD5Header = "x-actions-results-md5" +) + +// The rules are from https://github.com/actions/toolkit/blob/main/packages/artifact/src/internal/path-and-artifact-name-validation.ts#L32 +var invalidArtifactNameChars = strings.Join([]string{"\\", "/", "\"", ":", "<", ">", "|", "*", "?", "\r", "\n"}, "") + +func validateArtifactName(ctx *ArtifactContext, artifactName string) bool { + if strings.ContainsAny(artifactName, invalidArtifactNameChars) { + log.Error("Error checking artifact name contains invalid character") + ctx.Error(http.StatusBadRequest, "Error checking artifact name contains invalid character") + return false + } + return true +} + +func validateRunID(ctx *ArtifactContext) (*actions.ActionTask, int64, bool) { + task := ctx.ActionTask + runID := ctx.ParamsInt64("run_id") + if task.Job.RunID != runID { + log.Error("Error runID not match") + ctx.Error(http.StatusBadRequest, "run-id does not match") + return nil, 0, false + } + return task, runID, true +} + +func validateArtifactHash(ctx *ArtifactContext, artifactName string) bool { + paramHash := ctx.Params("artifact_hash") + // use artifact name to create upload url + artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(artifactName))) + if paramHash == artifactHash { + return true + } + log.Error("Invalid artifact hash: %s", paramHash) + ctx.Error(http.StatusBadRequest, "Invalid artifact hash") + return false +} + +func parseArtifactItemPath(ctx *ArtifactContext) (string, string, bool) { + // itemPath is generated from upload-artifact action + // it's formatted as {artifact_name}/{artfict_path_in_runner} + itemPath := util.PathJoinRel(ctx.Req.URL.Query().Get("itemPath")) + artifactName := strings.Split(itemPath, "/")[0] + artifactPath := strings.TrimPrefix(itemPath, artifactName+"/") + if !validateArtifactHash(ctx, artifactName) { + return "", "", false + } + if !validateArtifactName(ctx, artifactName) { + return "", "", false + } + return artifactName, artifactPath, true +} + +// getUploadFileSize returns the size of the file to be uploaded. +// The raw size is the size of the file as reported by the header X-TFS-FileLength. +func getUploadFileSize(ctx *ArtifactContext) (int64, int64, error) { + contentLength := ctx.Req.ContentLength + xTfsLength, _ := strconv.ParseInt(ctx.Req.Header.Get(artifactXTfsFileLengthHeader), 10, 64) + if xTfsLength > 0 { + return xTfsLength, contentLength, nil + } + return contentLength, contentLength, nil +} From 33868f9f0f1a346664e9b9ba5ed30bb31c8a0491 Mon Sep 17 00:00:00 2001 From: fuxiaohei Date: Fri, 2 Jun 2023 22:11:03 +0800 Subject: [PATCH 2/5] update artifacts list in run action view --- models/actions/artifact.go | 16 ++++++ routers/web/repo/actions/view.go | 65 ++++++++++++++++-------- routers/web/web.go | 2 +- web_src/js/components/RepoActionView.vue | 4 +- 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/models/actions/artifact.go b/models/actions/artifact.go index e7f55926b7840..e59c7b9425a2f 100644 --- a/models/actions/artifact.go +++ b/models/actions/artifact.go @@ -116,6 +116,22 @@ func ListUploadedArtifactsByRunID(ctx context.Context, runID int64) ([]*ActionAr return arts, db.GetEngine(ctx).Where("run_id=? AND status=?", runID, ArtifactStatusUploadConfirmed).Find(&arts) } +// ActionArtifactMeta is the meta data of an artifact +type ActionArtifactMeta struct { + ArtifactName string + FileSize int64 +} + +// ListUploadedArtifactsMeta returns all uploaded artifacts meta of a run +func ListUploadedArtifactsMeta(ctx context.Context, runID int64) ([]*ActionArtifactMeta, error) { + arts := make([]*ActionArtifactMeta, 0, 10) + return arts, db.GetEngine(ctx).Table("action_artifact"). + Where("run_id=? AND status=?", runID, ArtifactStatusUploadConfirmed). + GroupBy("artifact_name"). + Select("artifact_name, sum(file_size) as file_size"). + Find(&arts) +} + // ListArtifactsByRepoID returns all artifacts of a repo func ListArtifactsByRepoID(ctx context.Context, repoID int64) ([]*ActionArtifact, error) { arts := make([]*ActionArtifact, 0, 10) diff --git a/routers/web/repo/actions/view.go b/routers/web/repo/actions/view.go index 7c2e9d63d6d32..b7045cc666314 100644 --- a/routers/web/repo/actions/view.go +++ b/routers/web/repo/actions/view.go @@ -4,9 +4,12 @@ package actions import ( + "archive/zip" + "compress/gzip" "context" "errors" "fmt" + "io" "net/http" "time" @@ -429,7 +432,6 @@ type ArtifactsViewResponse struct { type ArtifactsViewItem struct { Name string `json:"name"` Size int64 `json:"size"` - ID int64 `json:"id"` } func ArtifactsView(ctx *context_module.Context) { @@ -443,7 +445,7 @@ func ArtifactsView(ctx *context_module.Context) { ctx.Error(http.StatusInternalServerError, err.Error()) return } - artifacts, err := actions_model.ListUploadedArtifactsByRunID(ctx, run.ID) + artifacts, err := actions_model.ListUploadedArtifactsMeta(ctx, run.ID) if err != nil { ctx.Error(http.StatusInternalServerError, err.Error()) return @@ -455,7 +457,6 @@ func ArtifactsView(ctx *context_module.Context) { artifactsResponse.Artifacts = append(artifactsResponse.Artifacts, &ArtifactsViewItem{ Name: art.ArtifactName, Size: art.FileSize, - ID: art.ID, }) } ctx.JSON(http.StatusOK, artifactsResponse) @@ -463,15 +464,8 @@ func ArtifactsView(ctx *context_module.Context) { func ArtifactsDownloadView(ctx *context_module.Context) { runIndex := ctx.ParamsInt64("run") - artifactID := ctx.ParamsInt64("id") + artifactName := ctx.Params("artifact_name") - artifact, err := actions_model.GetArtifactByID(ctx, artifactID) - if errors.Is(err, util.ErrNotExist) { - ctx.Error(http.StatusNotFound, err.Error()) - } else if err != nil { - ctx.Error(http.StatusInternalServerError, err.Error()) - return - } run, err := actions_model.GetRunByIndex(ctx, ctx.Repo.Repository.ID, runIndex) if err != nil { if errors.Is(err, util.ErrNotExist) { @@ -481,20 +475,49 @@ func ArtifactsDownloadView(ctx *context_module.Context) { ctx.Error(http.StatusInternalServerError, err.Error()) return } - if artifact.RunID != run.ID { - ctx.Error(http.StatusNotFound, "artifact not found") - return - } - f, err := storage.ActionsArtifacts.Open(artifact.StoragePath) + artifacts, err := actions_model.ListArtifactsByRunIDAndName(ctx, run.ID, artifactName) if err != nil { ctx.Error(http.StatusInternalServerError, err.Error()) return } - defer f.Close() + if len(artifacts) == 0 { + ctx.Error(http.StatusNotFound, "artifact not found") + return + } - ctx.ServeContent(f, &context_module.ServeHeaderOptions{ - Filename: artifact.ArtifactName, - LastModified: artifact.CreatedUnix.AsLocalTime(), - }) + ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s.zip; filename*=UTF-8''%s.zip", artifactName, artifactName)) + + writer := zip.NewWriter(ctx.Resp) + defer writer.Close() + for _, art := range artifacts { + + f, err := storage.ActionsArtifacts.Open(art.StoragePath) + if err != nil { + ctx.Error(http.StatusInternalServerError, err.Error()) + return + } + + var r io.ReadCloser + if art.ContentEncoding == "gzip" { + r, err = gzip.NewReader(f) + if err != nil { + ctx.Error(http.StatusInternalServerError, err.Error()) + return + } + } else { + r = f + } + defer r.Close() + + w, err := writer.Create(art.ArtifactPath) + if err != nil { + ctx.Error(http.StatusInternalServerError, err.Error()) + return + } + if _, err := io.Copy(w, r); err != nil { + ctx.Error(http.StatusInternalServerError, err.Error()) + return + } + } } diff --git a/routers/web/web.go b/routers/web/web.go index da6064257bd20..6b800e81f66b9 100644 --- a/routers/web/web.go +++ b/routers/web/web.go @@ -1197,7 +1197,7 @@ func registerRoutes(m *web.Route) { m.Post("/cancel", reqRepoActionsWriter, actions.Cancel) m.Post("/approve", reqRepoActionsWriter, actions.Approve) m.Post("/artifacts", actions.ArtifactsView) - m.Get("/artifacts/{id}", actions.ArtifactsDownloadView) + m.Get("/artifacts/{artifact_name}", actions.ArtifactsDownloadView) m.Post("/rerun", reqRepoActionsWriter, actions.RerunAll) }) }, reqRepoActionsReader, actions.MustEnableActions) diff --git a/web_src/js/components/RepoActionView.vue b/web_src/js/components/RepoActionView.vue index 704ffa0706a00..5cd3f88662c8b 100644 --- a/web_src/js/components/RepoActionView.vue +++ b/web_src/js/components/RepoActionView.vue @@ -49,8 +49,8 @@ {{ locale.artifactsTitle }}