Skip to content

Commit

Permalink
Fail cache downloads when required, retry on memory alloc fail
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnburke committed Feb 4, 2022
1 parent 4d7f93f commit 7d857b4
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 8 deletions.
1 change: 1 addition & 0 deletions common/network.go
Expand Up @@ -273,6 +273,7 @@ type Artifacts []Artifact
type Cache struct {
Key string `json:"key"`
Untracked bool `json:"untracked"`
Required bool `json:"required"`
Policy CachePolicy `json:"policy"`
Paths ArtifactPaths `json:"paths"`
When CacheWhen `json:"when"`
Expand Down
57 changes: 56 additions & 1 deletion helpers/archives/zip_extract.go
Expand Up @@ -6,6 +6,9 @@ import (
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strings"
"time"

"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -83,6 +86,12 @@ func extractZipFile(file *zip.File) (err error) {
return
}

// this string is consistently used across platforms
// see https://github.com/golang/go/blob/go1.17.6/src/syscall/zerrors_linux_386.go#L1393 and other zerrors*.go
const errCannotAllocateMemory = "cannot allocate memory"
const memoryErrorRetries = 3
const memoryErrorWaitTime = time.Second

func ExtractZipArchive(archive *zip.Reader) error {
tracker := newPathErrorTracker()

Expand All @@ -91,8 +100,23 @@ func ExtractZipArchive(archive *zip.Reader) error {
printGitArchiveWarning("extract")
}

if err := extractZipFile(file); tracker.actionable(err) {
// we sometimes get memory errors unzipping, do a retry
retries := memoryErrorRetries

for {

err := extractZipFile(file)
if err == nil || !tracker.actionable(err) {
break
}

logrus.Warningf("%s: %s (suppressing repeats)", file.Name, err)

err, retries = checkMemoryAllocRetry(err, retries)

if err != nil || retries <= 0 {
return err
}
}
}

Expand All @@ -111,6 +135,37 @@ func ExtractZipArchive(archive *zip.Reader) error {
return nil
}

var disableWait = false // for testing

func checkMemoryAllocRetry(err error, retriesRemaining int) (error, int) {

// When running in containers, we will occasionally get errors allocating
// memory under pressure. So we retry a few times, then fail the extraction
if err == nil || !strings.Contains(err.Error(), errCannotAllocateMemory) {
return err, 0
}

if retriesRemaining <= 0 {
return err, 0
}

mem := runtime.MemStats{}
runtime.ReadMemStats(&mem)

logrus.Warningf(
"Can't allocate memory, waiting %s, %d retries left (total used MB=%d)",
memoryErrorWaitTime.String(),
retriesRemaining,
mem.Sys/1024/1024,
)

if !disableWait {
time.Sleep(memoryErrorWaitTime)
}
retriesRemaining--
return nil, retriesRemaining
}

func ExtractZipFile(fileName string) error {
archive, err := zip.OpenReader(fileName)
if err != nil {
Expand Down
54 changes: 54 additions & 0 deletions helpers/archives/zip_extract_test.go
@@ -1,10 +1,13 @@
//go:build !integration
// +build !integration

package archives

import (
"archive/zip"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
Expand Down Expand Up @@ -96,3 +99,54 @@ func TestExtractZipFileNotFound(t *testing.T) {
err := ExtractZipFile("non_existing_zip_file.zip")
assert.Error(t, err)
}

func TestMemoryAllocRetry(t *testing.T) {

memErr := errors.New(errCannotAllocateMemory)
otherErr := errors.New("foo")

cases := []struct {
err error
retries int
res error
remaining int
}{
{
err: memErr,
res: nil,
retries: 1,
remaining: 0,
},
{
err: memErr,
res: memErr,
retries: 0,
remaining: 0,
},
{
err: otherErr,
res: otherErr,
retries: 2,
remaining: 0,
},
{
err: nil,
res: nil,
retries: 0,
remaining: 0,
},
}

disableWait = true

for i, c := range cases {

t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) {
err, retries := checkMemoryAllocRetry(c.err , c.retries)

assert.Equal(t, c.res, err)
assert.Equal(t, c.remaining, retries)
})
}

}
14 changes: 10 additions & 4 deletions shells/abstract.go
Expand Up @@ -107,7 +107,7 @@ func (b *AbstractShell) cacheExtractor(w ShellWriter, info common.ShellScriptInf
continue
}

b.extractCacheOrFallbackCacheWrapper(w, info, cacheFile, cacheKey)
b.extractCacheOrFallbackCacheWrapper(w, info, cacheFile, cacheKey, cacheOptions.Required)
}

if skipRestoreCache {
Expand All @@ -122,12 +122,13 @@ func (b *AbstractShell) extractCacheOrFallbackCacheWrapper(
info common.ShellScriptInfo,
cacheFile string,
cacheKey string,
required bool,
) {
cacheFallbackKey := info.Build.GetAllVariables().Get("CACHE_FALLBACK_KEY")

// Execute cache-extractor command. Failure is not fatal.
// Execute cache-extractor command. Failure is not fatal unless required is true.
b.guardRunnerCommand(w, info.RunnerCommand, "Extracting cache", func() {
b.addExtractCacheCommand(w, info, cacheFile, cacheKey, cacheFallbackKey)
b.addExtractCacheCommand(w, info, cacheFile, cacheKey, cacheFallbackKey, required)
})
}

Expand All @@ -137,6 +138,7 @@ func (b *AbstractShell) addExtractCacheCommand(
cacheFile string,
cacheKey string,
cacheFallbackKey string,
required bool,
) {
args := []string{
"cache-extractor",
Expand All @@ -154,7 +156,11 @@ func (b *AbstractShell) addExtractCacheCommand(
w.Else()
w.Warningf("Failed to extract cache")
if cacheFallbackKey != "" {
b.addExtractCacheCommand(w, info, cacheFile, cacheFallbackKey, "")
b.addExtractCacheCommand(w, info, cacheFile, cacheFallbackKey, "", required)
} else if required {
// if no cache key, then set the exit code to fail the build
// since most jobs will not be able to run without a cache in any case
w.Exit(1)
}
w.EndIf()
}
Expand Down
20 changes: 17 additions & 3 deletions shells/abstract_test.go
@@ -1,3 +1,4 @@
//go:build !integration
// +build !integration

package shells
Expand Down Expand Up @@ -755,6 +756,14 @@ func TestAbstractShell_writeSubmoduleUpdateCmd(t *testing.T) {
}

func TestAbstractShell_extractCacheWithFallbackKey(t *testing.T) {
testCacheCommon(t, false)
}

func TestAbstractShell_extractCacheWithFallbackKeyRequired(t *testing.T) {
testCacheCommon(t, true)
}

func testCacheCommon(t *testing.T, cacheRequired bool) {
testCacheKey := "test-cache-key"
testFallbackCacheKey := "test-fallback-cache-key"

Expand All @@ -778,9 +787,10 @@ func TestAbstractShell_extractCacheWithFallbackKey(t *testing.T) {
},
Cache: common.Caches{
{
Key: testCacheKey,
Policy: common.CachePolicyPullPush,
Paths: []string{"path1", "path2"},
Key: testCacheKey,
Policy: common.CachePolicyPullPush,
Paths: []string{"path1", "path2"},
Required: cacheRequired,
},
},
Variables: common.JobVariables{
Expand Down Expand Up @@ -834,6 +844,10 @@ func TestAbstractShell_extractCacheWithFallbackKey(t *testing.T) {
mockWriter.On("EndIf").Once()
mockWriter.On("Else").Once()
mockWriter.On("Warningf", "Missing %s. %s is disabled.", "runner-command", "Extracting cache").Once()

if cacheRequired {
mockWriter.On("Exit", 1).Once()
}
mockWriter.On("EndIf").Once()

err := shell.cacheExtractor(mockWriter, info)
Expand Down
5 changes: 5 additions & 0 deletions shells/bash.go
Expand Up @@ -71,6 +71,11 @@ func (b *BashWriter) CheckForErrors() {
b.Line("_runner_exit_code=$?; if [[ $_runner_exit_code -ne 0 ]]; then exit $_runner_exit_code; fi")
}

func (b *BashWriter) Exit(code int) {
b.Line(fmt.Sprintf("exit %d", code))
b.Line("")
}

func (b *BashWriter) Indent() {
b.indent++
}
Expand Down
5 changes: 5 additions & 0 deletions shells/cmd.go
Expand Up @@ -77,6 +77,11 @@ func (b *CmdWriter) CheckForErrors() {
b.checkErrorLevel()
}

func (b *CmdWriter) Exit(code int) {
b.Line(fmt.Sprintf("exit /b %d", code))
b.Line("")
}

func (b *CmdWriter) Indent() {
b.indent++
}
Expand Down
5 changes: 5 additions & 0 deletions shells/mock_ShellWriter.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions shells/powershell.go
Expand Up @@ -133,6 +133,11 @@ func (p *PsWriter) CheckForErrors() {
p.checkErrorLevel()
}

func (p *PsWriter) Exit(code int) {
p.Line(fmt.Sprintf("Exit %d", code))
p.Line("")
}

func (p *PsWriter) Indent() {
p.indent++
}
Expand Down
1 change: 1 addition & 0 deletions shells/shell_writer.go
Expand Up @@ -33,6 +33,7 @@ type ShellWriter interface {
Warningf(fmt string, arguments ...interface{})
Errorf(fmt string, arguments ...interface{})
EmptyLine()
Exit(code int)

Finish(trace bool) string
}

0 comments on commit 7d857b4

Please sign in to comment.