Skip to content

Commit

Permalink
Handle empty blob files more appropriately
Browse files Browse the repository at this point in the history
Several API tests were added to ensure correct acceptance of zero-size and
empty tar files. This led to several changes in the storage backend around the
guarantees of remote file reading, which backs the layer and layer upload type.

In support of these changes, zero-length and empty checks have been added to
the digest package. These provide a sanity check against upstream tarsum
changes. The fileReader has been modified to be more robust when reading and
seeking on zero-length or non-existent files. The file no longer needs to exist
for the reader to be created. Seeks can now move beyond the end of the file,
causing reads to issue an io.EOF. This eliminates errors during certain race
conditions for reading files which should be detected by stat calls. As a part
of this, a few error types were factored out and the read buffer size was
increased to something more reasonable.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
  • Loading branch information
stevvooe committed Feb 2, 2015
1 parent 097fce3 commit 0270bec
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 89 deletions.
6 changes: 3 additions & 3 deletions api/v2/descriptors.go
Original file line number Diff line number Diff line change
Expand Up @@ -1332,9 +1332,9 @@ var errorDescriptors = []ErrorDescriptor{
{
Code: ErrorCodeNameInvalid,
Value: "NAME_INVALID",
Message: "manifest name did not match URI",
Description: `During a manifest upload, if the name in the manifest
does not match the uri name, this error will be returned.`,
Message: "invalid repository name",
Description: `Invalid repository name encountered either during
manifest validation or any API operation.`,
HTTPStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound},
},
{
Expand Down
5 changes: 5 additions & 0 deletions digest/digest.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ import (
"github.com/docker/docker/pkg/tarsum"
)

const (
// DigestTarSumV1EmptyTar is the digest for the empty tar file.
DigestTarSumV1EmptyTar = "tarsum.v1+sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)

// Digest allows simple protection of hex formatted digest strings, prefixed
// by their algorithm. Strings of type Digest have some guarantee of being in
// the correct format and it provides quick access to the components of a
Expand Down
28 changes: 27 additions & 1 deletion digest/digest_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package digest

import "testing"
import (
"bytes"
"io"
"testing"
)

func TestParseDigest(t *testing.T) {
for _, testcase := range []struct {
Expand Down Expand Up @@ -78,3 +82,25 @@ func TestParseDigest(t *testing.T) {
}
}
}

// A few test cases used to fix behavior we expect in storage backend.

func TestFromTarArchiveZeroLength(t *testing.T) {
checkTarsumDigest(t, "zero-length archive", bytes.NewReader([]byte{}), DigestTarSumV1EmptyTar)
}

func TestFromTarArchiveEmptyTar(t *testing.T) {
// String of 1024 zeros is a valid, empty tar file.
checkTarsumDigest(t, "1024 zero bytes", bytes.NewReader(bytes.Repeat([]byte("\x00"), 1024)), DigestTarSumV1EmptyTar)
}

func checkTarsumDigest(t *testing.T, msg string, rd io.Reader, expected Digest) {
dgst, err := FromTarArchive(rd)
if err != nil {
t.Fatalf("unexpected error digesting %s: %v", msg, err)
}

if dgst != expected {
t.Fatalf("unexpected digest for %s: %q != %q", msg, dgst, expected)
}
}
31 changes: 20 additions & 11 deletions registry/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,29 +144,38 @@ func TestLayerAPI(t *testing.T) {
checkResponse(t, "status of deleted upload", resp, http.StatusNotFound)

// -----------------------------------------
// Do layer push with an empty body
// Do layer push with an empty body and different digest
uploadURLBase = startPushLayer(t, builder, imageName)
resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader([]byte{}))
if err != nil {
t.Fatalf("unexpected error doing bad layer push: %v", err)
}

checkResponse(t, "bad layer push", resp, http.StatusBadRequest)
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeBlobUploadInvalid)
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid)

// -----------------------------------------
// Do layer push with an invalid body
// Do layer push with an empty body and correct digest
zeroDigest, err := digest.FromTarArchive(bytes.NewReader([]byte{}))
if err != nil {
t.Fatalf("unexpected error digesting empty buffer: %v", err)
}

// This is a valid but empty tarfile!
badTar := bytes.Repeat([]byte("\x00"), 1024)
uploadURLBase = startPushLayer(t, builder, imageName)
resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader(badTar))
pushLayer(t, builder, imageName, zeroDigest, uploadURLBase, bytes.NewReader([]byte{}))

// -----------------------------------------
// Do layer push with an empty body and correct digest

// This is a valid but empty tarfile!
emptyTar := bytes.Repeat([]byte("\x00"), 1024)
emptyDigest, err := digest.FromTarArchive(bytes.NewReader(emptyTar))
if err != nil {
t.Fatalf("unexpected error doing bad layer push: %v", err)
t.Fatalf("unexpected error digesting empty tar: %v", err)
}

checkResponse(t, "bad layer push", resp, http.StatusBadRequest)
checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid)
uploadURLBase = startPushLayer(t, builder, imageName)
pushLayer(t, builder, imageName, emptyDigest, uploadURLBase, bytes.NewReader(emptyTar))

// ------------------------------------------
// Now, actually do successful upload.
Expand Down Expand Up @@ -517,15 +526,15 @@ func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, error

for _, err := range errs.Errors {
if _, ok := expected[err.Code]; !ok {
t.Fatalf("unexpected error code %v encountered: %s ", err.Code, string(p))
t.Fatalf("unexpected error code %v encountered during %s: %s ", err.Code, msg, string(p))
}
counts[err.Code]++
}

// Ensure that counts of expected errors were all non-zero
for code := range expected {
if counts[code] == 0 {
t.Fatalf("expected error code %v not encounterd: %s", code, string(p))
t.Fatalf("expected error code %v not encounterd during %s: %s", code, msg, string(p))
}
}

Expand Down
7 changes: 0 additions & 7 deletions registry/layerupload.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,6 @@ func (luh *layerUploadHandler) PutLayerUploadComplete(w http.ResponseWriter, r *
layer, err := luh.Upload.Finish(dgst)
if err != nil {
switch err := err.(type) {
case storage.ErrLayerUploadUnavailable:
w.WriteHeader(http.StatusBadRequest)
// TODO(stevvooe): Arguably, we may want to add an error code to
// cover this condition. It is not always a client error but it
// may be. For now, we effectively throw out the upload and have
// them start over.
luh.Errors.Push(v2.ErrorCodeBlobUploadInvalid, err.Err)
case storage.ErrLayerInvalidDigest:
w.WriteHeader(http.StatusBadRequest)
luh.Errors.Push(v2.ErrorCodeDigestInvalid, err)
Expand Down
67 changes: 49 additions & 18 deletions storage/filereader.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@ package storage

import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"time"

"github.com/docker/distribution/storagedriver"
)

// TODO(stevvooe): Set an optimal buffer size here. We'll have to
// understand the latency characteristics of the underlying network to
// set this correctly, so we may want to leave it to the driver. For
// out of process drivers, we'll have to optimize this buffer size for
// local communication.
const fileReaderBufferSize = 4 << 20

// remoteFileReader provides a read seeker interface to files stored in
// storagedriver. Used to implement part of layer interface and will be used
// to implement read side of LayerUpload.
Expand All @@ -28,24 +37,40 @@ type fileReader struct {
err error // terminal error, if set, reader is closed
}

// newFileReader initializes a file reader for the remote file. The read takes
// on the offset and size at the time the reader is created. If the underlying
// file changes, one must create a new fileReader.
func newFileReader(driver storagedriver.StorageDriver, path string) (*fileReader, error) {
// Grab the size of the layer file, ensuring existence.
fi, err := driver.Stat(path)

if err != nil {
return nil, err
rd := &fileReader{
driver: driver,
path: path,
}

if fi.IsDir() {
return nil, fmt.Errorf("cannot read a directory")
// Grab the size of the layer file, ensuring existence.
if fi, err := driver.Stat(path); err != nil {
switch err := err.(type) {
case storagedriver.PathNotFoundError:
// NOTE(stevvooe): We really don't care if the file is not
// actually present for the reader. If the caller needs to know
// whether or not the file exists, they should issue a stat call
// on the path. There is still no guarantee, since the file may be
// gone by the time the reader is created. The only correct
// behavior is to return a reader that immediately returns EOF.
default:
// Any other error we want propagated up the stack.
return nil, err
}
} else {
if fi.IsDir() {
return nil, fmt.Errorf("cannot read a directory")
}

// Fill in file information
rd.size = fi.Size()
rd.modtime = fi.ModTime()
}

return &fileReader{
driver: driver,
path: path,
size: fi.Size(),
modtime: fi.ModTime(),
}, nil
return rd, nil
}

func (fr *fileReader) Read(p []byte) (n int, err error) {
Expand Down Expand Up @@ -88,8 +113,6 @@ func (fr *fileReader) Seek(offset int64, whence int) (int64, error) {

if newOffset < 0 {
err = fmt.Errorf("cannot seek to negative position")
} else if newOffset > fr.size {
err = fmt.Errorf("cannot seek passed end of file")
} else {
if fr.offset != newOffset {
fr.reset()
Expand Down Expand Up @@ -134,9 +157,17 @@ func (fr *fileReader) reader() (io.Reader, error) {

// If we don't have a reader, open one up.
rc, err := fr.driver.ReadStream(fr.path, fr.offset)

if err != nil {
return nil, err
switch err := err.(type) {
case storagedriver.PathNotFoundError:
// NOTE(stevvooe): If the path is not found, we simply return a
// reader that returns io.EOF. However, we do not set fr.rc,
// allowing future attempts at getting a reader to possibly
// succeed if the file turns up later.
return ioutil.NopCloser(bytes.NewReader([]byte{})), nil
default:
return nil, err
}
}

fr.rc = rc
Expand All @@ -147,7 +178,7 @@ func (fr *fileReader) reader() (io.Reader, error) {
// set this correctly, so we may want to leave it to the driver. For
// out of process drivers, we'll have to optimize this buffer size for
// local communication.
fr.brd = bufio.NewReader(fr.rc)
fr.brd = bufio.NewReaderSize(fr.rc, fileReaderBufferSize)
} else {
fr.brd.Reset(fr.rc)
}
Expand Down
43 changes: 39 additions & 4 deletions storage/filereader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,52 @@ func TestFileReaderSeek(t *testing.T) {
t.Fatalf("expected to seek to end: %v != %v", end, len(content))
}

// 4. Seek past end and before start, ensure error.
// 4. Seek before start, ensure error.

// seek before start
before, err := fr.Seek(-1, os.SEEK_SET)
if err == nil {
t.Fatalf("error expected, returned offset=%v", before)
}

after, err := fr.Seek(int64(len(content)+1), os.SEEK_END)
if err == nil {
t.Fatalf("error expected, returned offset=%v", after)
// 5. Seek after end,
after, err := fr.Seek(1, os.SEEK_END)
if err != nil {
t.Fatalf("unexpected error expected, returned offset=%v", after)
}

p := make([]byte, 16)
n, err := fr.Read(p)

if n != 0 {
t.Fatalf("bytes reads %d != %d", n, 0)
}

if err != io.EOF {
t.Fatalf("expected io.EOF, got %v", err)
}
}

// TestFileReaderNonExistentFile ensures the reader behaves as expected with a
// missing or zero-length remote file. While the file may not exist, the
// reader should not error out on creation and should return 0-bytes from the
// read method, with an io.EOF error.
func TestFileReaderNonExistentFile(t *testing.T) {
driver := inmemory.New()
fr, err := newFileReader(driver, "/doesnotexist")
if err != nil {
t.Fatalf("unexpected error initializing reader: %v", err)
}

var buf [1024]byte

n, err := fr.Read(buf[:])
if n != 0 {
t.Fatalf("non-zero byte read reported: %d != 0", n)
}

if err != io.EOF {
t.Fatalf("read on missing file should return io.EOF, got %v", err)
}
}

Expand Down
3 changes: 0 additions & 3 deletions storage/filewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,6 @@ func (fw *fileWriter) Seek(offset int64, whence int) (int64, error) {

if newOffset < 0 {
err = fmt.Errorf("cannot seek to negative position")
} else if newOffset > fw.size {
fw.offset = newOffset
fw.size = newOffset
} else {
// No problems, set the offset.
fw.offset = newOffset
Expand Down
24 changes: 3 additions & 21 deletions storage/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,28 +80,10 @@ func (err ErrUnknownLayer) Error() string {
// ErrLayerInvalidDigest returned when tarsum check fails.
type ErrLayerInvalidDigest struct {
Digest digest.Digest
Reason error
}

func (err ErrLayerInvalidDigest) Error() string {
return fmt.Sprintf("invalid digest for referenced layer: %v", err.Digest)
}

// ErrLayerInvalidSize returned when length check fails.
type ErrLayerInvalidSize struct {
Size int64
}

func (err ErrLayerInvalidSize) Error() string {
return fmt.Sprintf("invalid layer size: %d", err.Size)
}

// ErrLayerUploadUnavailable signals missing upload data, either when no data
// has been received or when the backend reports the data as missing. This is
// different from ErrLayerUploadUnknown.
type ErrLayerUploadUnavailable struct {
Err error
}

func (err ErrLayerUploadUnavailable) Error() string {
return fmt.Sprintf("layer upload unavialable: %v", err)
return fmt.Sprintf("invalid digest for referenced layer: %v, %v",
err.Digest, err.Reason)
}
34 changes: 34 additions & 0 deletions storage/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,40 @@ func TestSimpleLayerRead(t *testing.T) {
}
}

// TestLayerUploadZeroLength uploads zero-length
func TestLayerUploadZeroLength(t *testing.T) {
imageName := "foo/bar"
driver := inmemory.New()
registry := NewRegistryWithDriver(driver)
ls := registry.Repository(imageName).Layers()

upload, err := ls.Upload()
if err != nil {
t.Fatalf("unexpected error starting upload: %v", err)
}

io.Copy(upload, bytes.NewReader([]byte{}))

dgst, err := digest.FromTarArchive(bytes.NewReader([]byte{}))
if err != nil {
t.Fatalf("error getting zero digest: %v", err)
}

if dgst != digest.DigestTarSumV1EmptyTar {
// sanity check on zero digest
t.Fatalf("digest not as expected: %v != %v", dgst, digest.DigestTarSumV1EmptyTar)
}

layer, err := upload.Finish(dgst)
if err != nil {
t.Fatalf("unexpected error finishing upload: %v", err)
}

if layer.Digest() != dgst {
t.Fatalf("unexpected digest: %v != %v", layer.Digest(), dgst)
}
}

// writeRandomLayer creates a random layer under name and tarSum using driver
// and pathMapper. An io.ReadSeeker with the data is returned, along with the
// sha256 hex digest.
Expand Down
Loading

0 comments on commit 0270bec

Please sign in to comment.