From c080c40030ea16cc6c5220de41117f498a4dd028 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Thu, 29 Jan 2015 20:42:53 -0800 Subject: [PATCH 1/4] Remove erroneous error code on layer upload delete Signed-off-by: Stephen J Day --- api/v2/descriptors.go | 1 - doc/SPEC.md | 1 - 2 files changed, 2 deletions(-) diff --git a/api/v2/descriptors.go b/api/v2/descriptors.go index e98ed4adfb3..d7d3eed1563 100644 --- a/api/v2/descriptors.go +++ b/api/v2/descriptors.go @@ -1263,7 +1263,6 @@ var routeDescriptors = []RouteDescriptor{ Description: "An error was encountered processing the delete. The client may ignore this error.", StatusCode: http.StatusBadRequest, ErrorCodes: []ErrorCode{ - ErrorCodeDigestInvalid, ErrorCodeNameInvalid, ErrorCodeBlobUploadInvalid, }, diff --git a/doc/SPEC.md b/doc/SPEC.md index b82d9d6b540..3fc3e372a25 100644 --- a/doc/SPEC.md +++ b/doc/SPEC.md @@ -2539,7 +2539,6 @@ The error codes that may be included in the response body are enumerated below: |Code|Message|Description| -------|----|------|------------ -| `DIGEST_INVALID` | provided digest did not match uploaded content | When a blob is uploaded, the registry will check that the content matches the digest provided by the client. The error may include a detail structure with the key "digest", including the invalid digest string. This error may also be returned when a manifest includes an invalid layer digest. | | `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | | `BLOB_UPLOAD_INVALID` | blob upload invalid | The blob upload encountered an error and can no longer proceed. | From f926a93778c2b213b05e7649c52017a8b466c239 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Thu, 29 Jan 2015 20:45:19 -0800 Subject: [PATCH 2/4] Report layer upload as unavialable when data missing Signed-off-by: Stephen J Day --- storage/layer.go | 11 +++++++++++ storage/layerupload.go | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/storage/layer.go b/storage/layer.go index 24736c70170..b7d84a9848c 100644 --- a/storage/layer.go +++ b/storage/layer.go @@ -94,3 +94,14 @@ type ErrLayerInvalidSize struct { func (err ErrLayerInvalidSize) Error() string { return fmt.Sprintf("invalid layer size: %d", err.Size) } + +// ErrLayerUploadUnavailable signals missing upload data, either when no data +// has been received or when the backend reports the data as missing. This is +// different from ErrLayerUploadUnknown. +type ErrLayerUploadUnavailable struct { + Err error +} + +func (err ErrLayerUploadUnavailable) Error() string { + return fmt.Sprintf("layer upload unavialable: %v", err) +} diff --git a/storage/layerupload.go b/storage/layerupload.go index 690e99ec989..dbd9140e230 100644 --- a/storage/layerupload.go +++ b/storage/layerupload.go @@ -102,7 +102,22 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige // Read the file from the backend driver and validate it. fr, err := newFileReader(luc.fileWriter.driver, luc.path) if err != nil { - return "", err + switch err := err.(type) { + case storagedriver.PathNotFoundError: + // NOTE(stevvooe): Path not found can mean several things by we + // should report the upload is not available. This can happen if + // the following happens: + // + // 1. If not data was received for the upload instance. + // 2. Backend storage driver has not convereged after receiving latest data. + // + // This *does not* mean that the upload does not exist, since we + // can't even get a LayerUpload object without having the + // directory exist. + return "", ErrLayerUploadUnavailable{Err: err} + default: + return "", err + } } tr := io.TeeReader(fr, digestVerifier) From 097fce3bb24d73a0c82225ebf43bb5f644453341 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Thu, 29 Jan 2015 21:26:35 -0800 Subject: [PATCH 3/4] Address server errors received during layer upload This changeset addresses intermittent internal server errors encountered during pushes. The root cause has been isolated to layers that result in identical, empty filesystems but may have some path declarations (imaginge "./"), resulting in different tarsums. The main error message reported during these upload problems was a 500 error, which was not correct. Further investigation showed the errors to be rooted in digest verification when finishing uploads. Inspection of the surrounding code also identified a few issues. PutLayerChunk was slightly refactered into PutLayerUploadComplete. Helper methods were avoided to make handler less confusing. This simplification leveraged an earlier change in the spec that moved non-complete chunk uploads to the PATCH method. Simple logging was also added in the unknown error case that should help to avoid mysterious 500 errors in the future. At the same time, the glaring omission of a proper layer upload cancel method was rectified. This has been added in this change so it is not missed in the future. In the future, we may want to refactor the handler code to be more straightforward, hopefully letting us avoid these problems in the future. Added test cases that reproduce these errors and drove these changes include the following: 1. Push a layer with an empty body results in invalid blob upload. 2. Push a layer with a different tarsum (in this case, empty tar) 3. Deleting a layer upload works. 4. Getting status on a deleted layer upload returns 404. Common functionality was grouped into shared functions to remove repitition. The API tests will still require future love. Signed-off-by: Stephen J Day --- registry/api_test.go | 208 ++++++++++++++++++++++------------------ registry/layerupload.go | 151 ++++++++++++++--------------- 2 files changed, 189 insertions(+), 170 deletions(-) diff --git a/registry/api_test.go b/registry/api_test.go index b0f3bb2b50b..682549205ce 100644 --- a/registry/api_test.go +++ b/registry/api_test.go @@ -11,6 +11,7 @@ import ( "net/http/httputil" "net/url" "os" + "reflect" "testing" "github.com/docker/distribution/api/v2" @@ -120,29 +121,59 @@ func TestLayerAPI(t *testing.T) { checkResponse(t, "checking head on non-existent layer", resp, http.StatusNotFound) // ------------------------------------------ - // Upload a layer - layerUploadURL, err := builder.BuildBlobUploadURL(imageName) + // Start an upload and cancel + uploadURLBase := startPushLayer(t, builder, imageName) + + req, err := http.NewRequest("DELETE", uploadURLBase, nil) if err != nil { - t.Fatalf("error building upload url: %v", err) + t.Fatalf("unexpected error creating delete request: %v", err) } - resp, err = http.Post(layerUploadURL, "", nil) + resp, err = http.DefaultClient.Do(req) if err != nil { - t.Fatalf("error starting layer upload: %v", err) + t.Fatalf("unexpected error sending delete request: %v", err) } - checkResponse(t, "starting layer upload", resp, http.StatusAccepted) - checkHeaders(t, resp, http.Header{ - "Location": []string{"*"}, - "Content-Length": []string{"0"}, - }) + checkResponse(t, "deleting upload", resp, http.StatusNoContent) + + // A status check should result in 404 + resp, err = http.Get(uploadURLBase) + if err != nil { + t.Fatalf("unexpected error getting upload status: %v", err) + } + checkResponse(t, "status of deleted upload", resp, http.StatusNotFound) + + // ----------------------------------------- + // Do layer push with an empty body + uploadURLBase = startPushLayer(t, builder, imageName) + resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader([]byte{})) + if err != nil { + t.Fatalf("unexpected error doing bad layer push: %v", err) + } + checkResponse(t, "bad layer push", resp, http.StatusBadRequest) + checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeBlobUploadInvalid) + + // ----------------------------------------- + // Do layer push with an invalid body + + // This is a valid but empty tarfile! + badTar := bytes.Repeat([]byte("\x00"), 1024) + uploadURLBase = startPushLayer(t, builder, imageName) + resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader(badTar)) + if err != nil { + t.Fatalf("unexpected error doing bad layer push: %v", err) + } + + checkResponse(t, "bad layer push", resp, http.StatusBadRequest) + checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid) + + // ------------------------------------------ + // Now, actually do successful upload. layerLength, _ := layerFile.Seek(0, os.SEEK_END) layerFile.Seek(0, os.SEEK_SET) - // TODO(sday): Cancel the layer upload here and restart. - - uploadURLBase := startPushLayer(t, builder, imageName) + uploadURLBase = startPushLayer(t, builder, imageName) pushLayer(t, builder, imageName, layerDigest, uploadURLBase, layerFile) // ------------------------ @@ -218,27 +249,7 @@ func TestManifestAPI(t *testing.T) { defer resp.Body.Close() checkResponse(t, "getting non-existent manifest", resp, http.StatusNotFound) - - // TODO(stevvooe): Shoot. The error setup is not working out. The content- - // type headers are being set after writing the status code. - // if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" { - // t.Fatalf("unexpected content type: %v != 'application/json'", - // resp.Header.Get("Content-Type")) - // } - dec := json.NewDecoder(resp.Body) - - var respErrs v2.Errors - if err := dec.Decode(&respErrs); err != nil { - t.Fatalf("unexpected error decoding error response: %v", err) - } - - if len(respErrs.Errors) == 0 { - t.Fatalf("expected errors in response") - } - - if respErrs.Errors[0].Code != v2.ErrorCodeManifestUnknown { - t.Fatalf("expected manifest unknown error: got %v", respErrs) - } + checkBodyHasErrorCodes(t, "getting non-existent manifest", resp, v2.ErrorCodeManifestUnknown) tagsURL, err := builder.BuildTagsURL(imageName) if err != nil { @@ -253,18 +264,7 @@ func TestManifestAPI(t *testing.T) { // Check that we get an unknown repository error when asking for tags checkResponse(t, "getting unknown manifest tags", resp, http.StatusNotFound) - dec = json.NewDecoder(resp.Body) - if err := dec.Decode(&respErrs); err != nil { - t.Fatalf("unexpected error decoding error response: %v", err) - } - - if len(respErrs.Errors) == 0 { - t.Fatalf("expected errors in response") - } - - if respErrs.Errors[0].Code != v2.ErrorCodeNameUnknown { - t.Fatalf("expected respository unknown error: got %v", respErrs) - } + checkBodyHasErrorCodes(t, "getting unknown manifest tags", resp, v2.ErrorCodeNameUnknown) // -------------------------------- // Attempt to push unsigned manifest with missing layers @@ -284,41 +284,17 @@ func TestManifestAPI(t *testing.T) { resp = putManifest(t, "putting unsigned manifest", manifestURL, unsignedManifest) defer resp.Body.Close() checkResponse(t, "posting unsigned manifest", resp, http.StatusBadRequest) + _, p, counts := checkBodyHasErrorCodes(t, "getting unknown manifest tags", resp, + v2.ErrorCodeManifestUnverified, v2.ErrorCodeBlobUnknown, v2.ErrorCodeDigestInvalid) - dec = json.NewDecoder(resp.Body) - if err := dec.Decode(&respErrs); err != nil { - t.Fatalf("unexpected error decoding error response: %v", err) - } - - var unverified int - var missingLayers int - var invalidDigests int - - for _, err := range respErrs.Errors { - switch err.Code { - case v2.ErrorCodeManifestUnverified: - unverified++ - case v2.ErrorCodeBlobUnknown: - missingLayers++ - case v2.ErrorCodeDigestInvalid: - // TODO(stevvooe): This error isn't quite descriptive enough -- - // the layer with an invalid digest isn't identified. - invalidDigests++ - default: - t.Fatalf("unexpected error: %v", err) - } - } - - if unverified != 1 { - t.Fatalf("should have received one unverified manifest error: %v", respErrs) + expectedCounts := map[v2.ErrorCode]int{ + v2.ErrorCodeManifestUnverified: 1, + v2.ErrorCodeBlobUnknown: 2, + v2.ErrorCodeDigestInvalid: 2, } - if missingLayers != 2 { - t.Fatalf("should have received two missing layer errors: %v", respErrs) - } - - if invalidDigests != 2 { - t.Fatalf("should have received two invalid digest errors: %v", respErrs) + if !reflect.DeepEqual(counts, expectedCounts) { + t.Fatalf("unexpected number of error codes encountered: %v\n!=\n%v\n---\n%s", counts, expectedCounts, string(p)) } // TODO(stevvooe): Add a test case where we take a mostly valid registry, @@ -363,7 +339,7 @@ func TestManifestAPI(t *testing.T) { checkResponse(t, "fetching uploaded manifest", resp, http.StatusOK) var fetchedManifest manifest.SignedManifest - dec = json.NewDecoder(resp.Body) + dec := json.NewDecoder(resp.Body) if err := dec.Decode(&fetchedManifest); err != nil { t.Fatalf("error decoding fetched manifest: %v", err) } @@ -448,11 +424,9 @@ func startPushLayer(t *testing.T, ub *v2.URLBuilder, name string) string { return resp.Header.Get("Location") } -// pushLayer pushes the layer content returning the url on success. -func pushLayer(t *testing.T, ub *v2.URLBuilder, name string, dgst digest.Digest, uploadURLBase string, rs io.ReadSeeker) string { - rsLength, _ := rs.Seek(0, os.SEEK_END) - rs.Seek(0, os.SEEK_SET) - +// doPushLayer pushes the layer content returning the url on success returning +// the response. If you're only expecting a successful response, use pushLayer. +func doPushLayer(t *testing.T, ub *v2.URLBuilder, name string, dgst digest.Digest, uploadURLBase string, body io.Reader) (*http.Response, error) { u, err := url.Parse(uploadURLBase) if err != nil { t.Fatalf("unexpected error parsing pushLayer url: %v", err) @@ -462,23 +436,24 @@ func pushLayer(t *testing.T, ub *v2.URLBuilder, name string, dgst digest.Digest, "_state": u.Query()["_state"], "digest": []string{dgst.String()}, - - // TODO(stevvooe): Layer upload can be completed with and without size - // argument. We'll need to add a test that checks the latter path. - "size": []string{fmt.Sprint(rsLength)}, }.Encode() uploadURL := u.String() // Just do a monolithic upload - req, err := http.NewRequest("PUT", uploadURL, rs) + req, err := http.NewRequest("PUT", uploadURL, body) if err != nil { t.Fatalf("unexpected error creating new request: %v", err) } - resp, err := http.DefaultClient.Do(req) + return http.DefaultClient.Do(req) +} + +// pushLayer pushes the layer content returning the url on success. +func pushLayer(t *testing.T, ub *v2.URLBuilder, name string, dgst digest.Digest, uploadURLBase string, body io.Reader) string { + resp, err := doPushLayer(t, ub, name, dgst, uploadURLBase, body) if err != nil { - t.Fatalf("unexpected error doing put: %v", err) + t.Fatalf("unexpected error doing push layer request: %v", err) } defer resp.Body.Close() @@ -506,6 +481,57 @@ func checkResponse(t *testing.T, msg string, resp *http.Response, expectedStatus } } +// checkBodyHasErrorCodes ensures the body is an error body and has the +// expected error codes, returning the error structure, the json slice and a +// count of the errors by code. +func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, errorCodes ...v2.ErrorCode) (v2.Errors, []byte, map[v2.ErrorCode]int) { + p, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("unexpected error reading body %s: %v", msg, err) + } + + var errs v2.Errors + if err := json.Unmarshal(p, &errs); err != nil { + t.Fatalf("unexpected error decoding error response: %v", err) + } + + if len(errs.Errors) == 0 { + t.Fatalf("expected errors in response") + } + + // TODO(stevvooe): Shoot. The error setup is not working out. The content- + // type headers are being set after writing the status code. + // if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" { + // t.Fatalf("unexpected content type: %v != 'application/json'", + // resp.Header.Get("Content-Type")) + // } + + expected := map[v2.ErrorCode]struct{}{} + counts := map[v2.ErrorCode]int{} + + // Initialize map with zeros for expected + for _, code := range errorCodes { + expected[code] = struct{}{} + counts[code] = 0 + } + + for _, err := range errs.Errors { + if _, ok := expected[err.Code]; !ok { + t.Fatalf("unexpected error code %v encountered: %s ", err.Code, string(p)) + } + counts[err.Code]++ + } + + // Ensure that counts of expected errors were all non-zero + for code := range expected { + if counts[code] == 0 { + t.Fatalf("expected error code %v not encounterd: %s", code, string(p)) + } + } + + return errs, p, counts +} + func maybeDumpResponse(t *testing.T, resp *http.Response) { if d, err := httputil.DumpResponse(resp, true); err != nil { t.Logf("error dumping response: %v", err) diff --git a/registry/layerupload.go b/registry/layerupload.go index 5cd445a59a3..e9585b0ecc5 100644 --- a/registry/layerupload.go +++ b/registry/layerupload.go @@ -23,10 +23,12 @@ func layerUploadDispatcher(ctx *Context, r *http.Request) http.Handler { } handler := http.Handler(handlers.MethodHandler{ - "POST": http.HandlerFunc(luh.StartLayerUpload), - "GET": http.HandlerFunc(luh.GetUploadStatus), - "HEAD": http.HandlerFunc(luh.GetUploadStatus), - "PUT": http.HandlerFunc(luh.PutLayerChunk), + "POST": http.HandlerFunc(luh.StartLayerUpload), + "GET": http.HandlerFunc(luh.GetUploadStatus), + "HEAD": http.HandlerFunc(luh.GetUploadStatus), + // TODO(stevvooe): Must implement patch support. + // "PATCH": http.HandlerFunc(luh.PutLayerChunk), + "PUT": http.HandlerFunc(luh.PutLayerUploadComplete), "DELETE": http.HandlerFunc(luh.CancelLayerUpload), }) @@ -158,55 +160,80 @@ func (luh *layerUploadHandler) GetUploadStatus(w http.ResponseWriter, r *http.Re w.WriteHeader(http.StatusNoContent) } -// PutLayerChunk receives a layer chunk during the layer upload process, -// possible completing the upload with a checksum and length. -func (luh *layerUploadHandler) PutLayerChunk(w http.ResponseWriter, r *http.Request) { +// PutLayerUploadComplete takes the final request of a layer upload. The final +// chunk may include all the layer data, the final chunk of layer data or no +// layer data. Any data provided is received and verified. If successful, the +// layer is linked into the blob store and 201 Created is returned with the +// canonical url of the layer. +func (luh *layerUploadHandler) PutLayerUploadComplete(w http.ResponseWriter, r *http.Request) { if luh.Upload == nil { w.WriteHeader(http.StatusNotFound) luh.Errors.Push(v2.ErrorCodeBlobUploadUnknown) + return } - var finished bool + dgstStr := r.FormValue("digest") // TODO(stevvooe): Support multiple digest parameters! + + if dgstStr == "" { + // no digest? return error, but allow retry. + w.WriteHeader(http.StatusNotFound) + luh.Errors.Push(v2.ErrorCodeDigestInvalid, "digest missing") + return + } + + dgst, err := digest.ParseDigest(dgstStr) + if err != nil { + // no digest? return error, but allow retry. + w.WriteHeader(http.StatusNotFound) + luh.Errors.Push(v2.ErrorCodeDigestInvalid, "digest parsing failed") + return + } - // TODO(stevvooe): This is woefully incomplete. Missing stuff: - // - // 1. Extract information from range header, if present. - // 2. Check offset of current layer. - // 3. Emit correct error responses. + // TODO(stevvooe): Check the incoming range header here, per the + // specification. LayerUpload should be seeked (sought?) to that position. - // Read in the chunk + // Read in the final chunk, if any. io.Copy(luh.Upload, r.Body) - if err := luh.maybeCompleteUpload(w, r); err != nil { - if err != errNotReadyToComplete { - switch err := err.(type) { - case storage.ErrLayerInvalidSize: - w.WriteHeader(http.StatusBadRequest) - luh.Errors.Push(v2.ErrorCodeSizeInvalid, err) - return - case storage.ErrLayerInvalidDigest: - w.WriteHeader(http.StatusBadRequest) - luh.Errors.Push(v2.ErrorCodeDigestInvalid, err) - return - default: - w.WriteHeader(http.StatusInternalServerError) - luh.Errors.Push(v2.ErrorCodeUnknown, err) - return - } + layer, err := luh.Upload.Finish(dgst) + if err != nil { + switch err := err.(type) { + case storage.ErrLayerUploadUnavailable: + w.WriteHeader(http.StatusBadRequest) + // TODO(stevvooe): Arguably, we may want to add an error code to + // cover this condition. It is not always a client error but it + // may be. For now, we effectively throw out the upload and have + // them start over. + luh.Errors.Push(v2.ErrorCodeBlobUploadInvalid, err.Err) + case storage.ErrLayerInvalidDigest: + w.WriteHeader(http.StatusBadRequest) + luh.Errors.Push(v2.ErrorCodeDigestInvalid, err) + default: + luh.log.Errorf("unknown error completing upload: %#v", err) + w.WriteHeader(http.StatusInternalServerError) + luh.Errors.Push(v2.ErrorCodeUnknown, err) } + + // Clean up the backend layer data if there was an error. + if err := luh.Upload.Cancel(); err != nil { + // If the cleanup fails, all we can do is observe and report. + luh.log.Errorf("error canceling upload after error: %v", err) + } + + return } - if err := luh.layerUploadResponse(w, r); err != nil { - w.WriteHeader(http.StatusInternalServerError) // Error conditions here? + // Build our canonical layer url + layerURL, err := luh.urlBuilder.BuildBlobURL(layer.Name(), layer.Digest()) + if err != nil { luh.Errors.Push(v2.ErrorCodeUnknown, err) + w.WriteHeader(http.StatusInternalServerError) return } - if finished { - w.WriteHeader(http.StatusCreated) - } else { - w.WriteHeader(http.StatusAccepted) - } + w.Header().Set("Location", layerURL) + w.Header().Set("Content-Length", "0") + w.WriteHeader(http.StatusCreated) } // CancelLayerUpload cancels an in-progress upload of a layer. @@ -214,8 +241,16 @@ func (luh *layerUploadHandler) CancelLayerUpload(w http.ResponseWriter, r *http. if luh.Upload == nil { w.WriteHeader(http.StatusNotFound) luh.Errors.Push(v2.ErrorCodeBlobUploadUnknown) + return } + if err := luh.Upload.Cancel(); err != nil { + luh.log.Errorf("error encountered canceling upload: %v", err) + w.WriteHeader(http.StatusInternalServerError) + luh.Errors.PushErr(err) + } + + w.WriteHeader(http.StatusNoContent) } // layerUploadResponse provides a standard request for uploading layers and @@ -257,45 +292,3 @@ func (luh *layerUploadHandler) layerUploadResponse(w http.ResponseWriter, r *htt return nil } - -var errNotReadyToComplete = fmt.Errorf("not ready to complete upload") - -// maybeCompleteUpload tries to complete the upload if the correct parameters -// are available. Returns errNotReadyToComplete if not ready to complete. -func (luh *layerUploadHandler) maybeCompleteUpload(w http.ResponseWriter, r *http.Request) error { - // If we get a digest and length, we can finish the upload. - dgstStr := r.FormValue("digest") // TODO(stevvooe): Support multiple digest parameters! - - if dgstStr == "" { - return errNotReadyToComplete - } - - dgst, err := digest.ParseDigest(dgstStr) - if err != nil { - return err - } - - luh.completeUpload(w, r, dgst) - return nil -} - -// completeUpload finishes out the upload with the correct response. -func (luh *layerUploadHandler) completeUpload(w http.ResponseWriter, r *http.Request, dgst digest.Digest) { - layer, err := luh.Upload.Finish(dgst) - if err != nil { - luh.Errors.Push(v2.ErrorCodeUnknown, err) - w.WriteHeader(http.StatusInternalServerError) - return - } - - layerURL, err := luh.urlBuilder.BuildBlobURL(layer.Name(), layer.Digest()) - if err != nil { - luh.Errors.Push(v2.ErrorCodeUnknown, err) - w.WriteHeader(http.StatusInternalServerError) - return - } - - w.Header().Set("Location", layerURL) - w.Header().Set("Content-Length", "0") - w.WriteHeader(http.StatusCreated) -} From 0270bec91642bd3bf61afd05e510526b98b57706 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Mon, 2 Feb 2015 13:01:49 -0800 Subject: [PATCH 4/4] Handle empty blob files more appropriately Several API tests were added to ensure correct acceptance of zero-size and empty tar files. This led to several changes in the storage backend around the guarantees of remote file reading, which backs the layer and layer upload type. In support of these changes, zero-length and empty checks have been added to the digest package. These provide a sanity check against upstream tarsum changes. The fileReader has been modified to be more robust when reading and seeking on zero-length or non-existent files. The file no longer needs to exist for the reader to be created. Seeks can now move beyond the end of the file, causing reads to issue an io.EOF. This eliminates errors during certain race conditions for reading files which should be detected by stat calls. As a part of this, a few error types were factored out and the read buffer size was increased to something more reasonable. Signed-off-by: Stephen J Day --- api/v2/descriptors.go | 6 ++-- digest/digest.go | 5 +++ digest/digest_test.go | 28 +++++++++++++++- registry/api_test.go | 31 +++++++++++------- registry/layerupload.go | 7 ---- storage/filereader.go | 67 ++++++++++++++++++++++++++++---------- storage/filereader_test.go | 43 +++++++++++++++++++++--- storage/filewriter.go | 3 -- storage/layer.go | 24 ++------------ storage/layer_test.go | 34 +++++++++++++++++++ storage/layerupload.go | 55 ++++++++++++++++++++----------- storage/revisionstore.go | 2 -- 12 files changed, 216 insertions(+), 89 deletions(-) diff --git a/api/v2/descriptors.go b/api/v2/descriptors.go index d7d3eed1563..2c6fafd02b1 100644 --- a/api/v2/descriptors.go +++ b/api/v2/descriptors.go @@ -1332,9 +1332,9 @@ var errorDescriptors = []ErrorDescriptor{ { Code: ErrorCodeNameInvalid, Value: "NAME_INVALID", - Message: "manifest name did not match URI", - Description: `During a manifest upload, if the name in the manifest - does not match the uri name, this error will be returned.`, + Message: "invalid repository name", + Description: `Invalid repository name encountered either during + manifest validation or any API operation.`, HTTPStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound}, }, { diff --git a/digest/digest.go b/digest/digest.go index 40f1db1591d..3c5ae403e08 100644 --- a/digest/digest.go +++ b/digest/digest.go @@ -13,6 +13,11 @@ import ( "github.com/docker/docker/pkg/tarsum" ) +const ( + // DigestTarSumV1EmptyTar is the digest for the empty tar file. + DigestTarSumV1EmptyTar = "tarsum.v1+sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" +) + // Digest allows simple protection of hex formatted digest strings, prefixed // by their algorithm. Strings of type Digest have some guarantee of being in // the correct format and it provides quick access to the components of a diff --git a/digest/digest_test.go b/digest/digest_test.go index 127f7873de8..9c1e70e5a6c 100644 --- a/digest/digest_test.go +++ b/digest/digest_test.go @@ -1,6 +1,10 @@ package digest -import "testing" +import ( + "bytes" + "io" + "testing" +) func TestParseDigest(t *testing.T) { for _, testcase := range []struct { @@ -78,3 +82,25 @@ func TestParseDigest(t *testing.T) { } } } + +// A few test cases used to fix behavior we expect in storage backend. + +func TestFromTarArchiveZeroLength(t *testing.T) { + checkTarsumDigest(t, "zero-length archive", bytes.NewReader([]byte{}), DigestTarSumV1EmptyTar) +} + +func TestFromTarArchiveEmptyTar(t *testing.T) { + // String of 1024 zeros is a valid, empty tar file. + checkTarsumDigest(t, "1024 zero bytes", bytes.NewReader(bytes.Repeat([]byte("\x00"), 1024)), DigestTarSumV1EmptyTar) +} + +func checkTarsumDigest(t *testing.T, msg string, rd io.Reader, expected Digest) { + dgst, err := FromTarArchive(rd) + if err != nil { + t.Fatalf("unexpected error digesting %s: %v", msg, err) + } + + if dgst != expected { + t.Fatalf("unexpected digest for %s: %q != %q", msg, dgst, expected) + } +} diff --git a/registry/api_test.go b/registry/api_test.go index 682549205ce..5f9e6c386b5 100644 --- a/registry/api_test.go +++ b/registry/api_test.go @@ -144,7 +144,7 @@ func TestLayerAPI(t *testing.T) { checkResponse(t, "status of deleted upload", resp, http.StatusNotFound) // ----------------------------------------- - // Do layer push with an empty body + // Do layer push with an empty body and different digest uploadURLBase = startPushLayer(t, builder, imageName) resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader([]byte{})) if err != nil { @@ -152,21 +152,30 @@ func TestLayerAPI(t *testing.T) { } checkResponse(t, "bad layer push", resp, http.StatusBadRequest) - checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeBlobUploadInvalid) + checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid) // ----------------------------------------- - // Do layer push with an invalid body + // Do layer push with an empty body and correct digest + zeroDigest, err := digest.FromTarArchive(bytes.NewReader([]byte{})) + if err != nil { + t.Fatalf("unexpected error digesting empty buffer: %v", err) + } - // This is a valid but empty tarfile! - badTar := bytes.Repeat([]byte("\x00"), 1024) uploadURLBase = startPushLayer(t, builder, imageName) - resp, err = doPushLayer(t, builder, imageName, layerDigest, uploadURLBase, bytes.NewReader(badTar)) + pushLayer(t, builder, imageName, zeroDigest, uploadURLBase, bytes.NewReader([]byte{})) + + // ----------------------------------------- + // Do layer push with an empty body and correct digest + + // This is a valid but empty tarfile! + emptyTar := bytes.Repeat([]byte("\x00"), 1024) + emptyDigest, err := digest.FromTarArchive(bytes.NewReader(emptyTar)) if err != nil { - t.Fatalf("unexpected error doing bad layer push: %v", err) + t.Fatalf("unexpected error digesting empty tar: %v", err) } - checkResponse(t, "bad layer push", resp, http.StatusBadRequest) - checkBodyHasErrorCodes(t, "bad layer push", resp, v2.ErrorCodeDigestInvalid) + uploadURLBase = startPushLayer(t, builder, imageName) + pushLayer(t, builder, imageName, emptyDigest, uploadURLBase, bytes.NewReader(emptyTar)) // ------------------------------------------ // Now, actually do successful upload. @@ -517,7 +526,7 @@ func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, error for _, err := range errs.Errors { if _, ok := expected[err.Code]; !ok { - t.Fatalf("unexpected error code %v encountered: %s ", err.Code, string(p)) + t.Fatalf("unexpected error code %v encountered during %s: %s ", err.Code, msg, string(p)) } counts[err.Code]++ } @@ -525,7 +534,7 @@ func checkBodyHasErrorCodes(t *testing.T, msg string, resp *http.Response, error // Ensure that counts of expected errors were all non-zero for code := range expected { if counts[code] == 0 { - t.Fatalf("expected error code %v not encounterd: %s", code, string(p)) + t.Fatalf("expected error code %v not encounterd during %s: %s", code, msg, string(p)) } } diff --git a/registry/layerupload.go b/registry/layerupload.go index e9585b0ecc5..cfce98f3a00 100644 --- a/registry/layerupload.go +++ b/registry/layerupload.go @@ -198,13 +198,6 @@ func (luh *layerUploadHandler) PutLayerUploadComplete(w http.ResponseWriter, r * layer, err := luh.Upload.Finish(dgst) if err != nil { switch err := err.(type) { - case storage.ErrLayerUploadUnavailable: - w.WriteHeader(http.StatusBadRequest) - // TODO(stevvooe): Arguably, we may want to add an error code to - // cover this condition. It is not always a client error but it - // may be. For now, we effectively throw out the upload and have - // them start over. - luh.Errors.Push(v2.ErrorCodeBlobUploadInvalid, err.Err) case storage.ErrLayerInvalidDigest: w.WriteHeader(http.StatusBadRequest) luh.Errors.Push(v2.ErrorCodeDigestInvalid, err) diff --git a/storage/filereader.go b/storage/filereader.go index 9a6eb2b374b..9bc09afefa4 100644 --- a/storage/filereader.go +++ b/storage/filereader.go @@ -2,14 +2,23 @@ package storage import ( "bufio" + "bytes" "fmt" "io" + "io/ioutil" "os" "time" "github.com/docker/distribution/storagedriver" ) +// TODO(stevvooe): Set an optimal buffer size here. We'll have to +// understand the latency characteristics of the underlying network to +// set this correctly, so we may want to leave it to the driver. For +// out of process drivers, we'll have to optimize this buffer size for +// local communication. +const fileReaderBufferSize = 4 << 20 + // remoteFileReader provides a read seeker interface to files stored in // storagedriver. Used to implement part of layer interface and will be used // to implement read side of LayerUpload. @@ -28,24 +37,40 @@ type fileReader struct { err error // terminal error, if set, reader is closed } +// newFileReader initializes a file reader for the remote file. The read takes +// on the offset and size at the time the reader is created. If the underlying +// file changes, one must create a new fileReader. func newFileReader(driver storagedriver.StorageDriver, path string) (*fileReader, error) { - // Grab the size of the layer file, ensuring existence. - fi, err := driver.Stat(path) - - if err != nil { - return nil, err + rd := &fileReader{ + driver: driver, + path: path, } - if fi.IsDir() { - return nil, fmt.Errorf("cannot read a directory") + // Grab the size of the layer file, ensuring existence. + if fi, err := driver.Stat(path); err != nil { + switch err := err.(type) { + case storagedriver.PathNotFoundError: + // NOTE(stevvooe): We really don't care if the file is not + // actually present for the reader. If the caller needs to know + // whether or not the file exists, they should issue a stat call + // on the path. There is still no guarantee, since the file may be + // gone by the time the reader is created. The only correct + // behavior is to return a reader that immediately returns EOF. + default: + // Any other error we want propagated up the stack. + return nil, err + } + } else { + if fi.IsDir() { + return nil, fmt.Errorf("cannot read a directory") + } + + // Fill in file information + rd.size = fi.Size() + rd.modtime = fi.ModTime() } - return &fileReader{ - driver: driver, - path: path, - size: fi.Size(), - modtime: fi.ModTime(), - }, nil + return rd, nil } func (fr *fileReader) Read(p []byte) (n int, err error) { @@ -88,8 +113,6 @@ func (fr *fileReader) Seek(offset int64, whence int) (int64, error) { if newOffset < 0 { err = fmt.Errorf("cannot seek to negative position") - } else if newOffset > fr.size { - err = fmt.Errorf("cannot seek passed end of file") } else { if fr.offset != newOffset { fr.reset() @@ -134,9 +157,17 @@ func (fr *fileReader) reader() (io.Reader, error) { // If we don't have a reader, open one up. rc, err := fr.driver.ReadStream(fr.path, fr.offset) - if err != nil { - return nil, err + switch err := err.(type) { + case storagedriver.PathNotFoundError: + // NOTE(stevvooe): If the path is not found, we simply return a + // reader that returns io.EOF. However, we do not set fr.rc, + // allowing future attempts at getting a reader to possibly + // succeed if the file turns up later. + return ioutil.NopCloser(bytes.NewReader([]byte{})), nil + default: + return nil, err + } } fr.rc = rc @@ -147,7 +178,7 @@ func (fr *fileReader) reader() (io.Reader, error) { // set this correctly, so we may want to leave it to the driver. For // out of process drivers, we'll have to optimize this buffer size for // local communication. - fr.brd = bufio.NewReader(fr.rc) + fr.brd = bufio.NewReaderSize(fr.rc, fileReaderBufferSize) } else { fr.brd.Reset(fr.rc) } diff --git a/storage/filereader_test.go b/storage/filereader_test.go index 7cf5633d363..53dd6c9a5f7 100644 --- a/storage/filereader_test.go +++ b/storage/filereader_test.go @@ -124,7 +124,7 @@ func TestFileReaderSeek(t *testing.T) { t.Fatalf("expected to seek to end: %v != %v", end, len(content)) } - // 4. Seek past end and before start, ensure error. + // 4. Seek before start, ensure error. // seek before start before, err := fr.Seek(-1, os.SEEK_SET) @@ -132,9 +132,44 @@ func TestFileReaderSeek(t *testing.T) { t.Fatalf("error expected, returned offset=%v", before) } - after, err := fr.Seek(int64(len(content)+1), os.SEEK_END) - if err == nil { - t.Fatalf("error expected, returned offset=%v", after) + // 5. Seek after end, + after, err := fr.Seek(1, os.SEEK_END) + if err != nil { + t.Fatalf("unexpected error expected, returned offset=%v", after) + } + + p := make([]byte, 16) + n, err := fr.Read(p) + + if n != 0 { + t.Fatalf("bytes reads %d != %d", n, 0) + } + + if err != io.EOF { + t.Fatalf("expected io.EOF, got %v", err) + } +} + +// TestFileReaderNonExistentFile ensures the reader behaves as expected with a +// missing or zero-length remote file. While the file may not exist, the +// reader should not error out on creation and should return 0-bytes from the +// read method, with an io.EOF error. +func TestFileReaderNonExistentFile(t *testing.T) { + driver := inmemory.New() + fr, err := newFileReader(driver, "/doesnotexist") + if err != nil { + t.Fatalf("unexpected error initializing reader: %v", err) + } + + var buf [1024]byte + + n, err := fr.Read(buf[:]) + if n != 0 { + t.Fatalf("non-zero byte read reported: %d != 0", n) + } + + if err != io.EOF { + t.Fatalf("read on missing file should return io.EOF, got %v", err) } } diff --git a/storage/filewriter.go b/storage/filewriter.go index cfa7c93de63..5037f1608ac 100644 --- a/storage/filewriter.go +++ b/storage/filewriter.go @@ -99,9 +99,6 @@ func (fw *fileWriter) Seek(offset int64, whence int) (int64, error) { if newOffset < 0 { err = fmt.Errorf("cannot seek to negative position") - } else if newOffset > fw.size { - fw.offset = newOffset - fw.size = newOffset } else { // No problems, set the offset. fw.offset = newOffset diff --git a/storage/layer.go b/storage/layer.go index b7d84a9848c..627a3b5f717 100644 --- a/storage/layer.go +++ b/storage/layer.go @@ -80,28 +80,10 @@ func (err ErrUnknownLayer) Error() string { // ErrLayerInvalidDigest returned when tarsum check fails. type ErrLayerInvalidDigest struct { Digest digest.Digest + Reason error } func (err ErrLayerInvalidDigest) Error() string { - return fmt.Sprintf("invalid digest for referenced layer: %v", err.Digest) -} - -// ErrLayerInvalidSize returned when length check fails. -type ErrLayerInvalidSize struct { - Size int64 -} - -func (err ErrLayerInvalidSize) Error() string { - return fmt.Sprintf("invalid layer size: %d", err.Size) -} - -// ErrLayerUploadUnavailable signals missing upload data, either when no data -// has been received or when the backend reports the data as missing. This is -// different from ErrLayerUploadUnknown. -type ErrLayerUploadUnavailable struct { - Err error -} - -func (err ErrLayerUploadUnavailable) Error() string { - return fmt.Sprintf("layer upload unavialable: %v", err) + return fmt.Sprintf("invalid digest for referenced layer: %v, %v", + err.Digest, err.Reason) } diff --git a/storage/layer_test.go b/storage/layer_test.go index 7da64190a53..2a551694e67 100644 --- a/storage/layer_test.go +++ b/storage/layer_test.go @@ -235,6 +235,40 @@ func TestSimpleLayerRead(t *testing.T) { } } +// TestLayerUploadZeroLength uploads zero-length +func TestLayerUploadZeroLength(t *testing.T) { + imageName := "foo/bar" + driver := inmemory.New() + registry := NewRegistryWithDriver(driver) + ls := registry.Repository(imageName).Layers() + + upload, err := ls.Upload() + if err != nil { + t.Fatalf("unexpected error starting upload: %v", err) + } + + io.Copy(upload, bytes.NewReader([]byte{})) + + dgst, err := digest.FromTarArchive(bytes.NewReader([]byte{})) + if err != nil { + t.Fatalf("error getting zero digest: %v", err) + } + + if dgst != digest.DigestTarSumV1EmptyTar { + // sanity check on zero digest + t.Fatalf("digest not as expected: %v != %v", dgst, digest.DigestTarSumV1EmptyTar) + } + + layer, err := upload.Finish(dgst) + if err != nil { + t.Fatalf("unexpected error finishing upload: %v", err) + } + + if layer.Digest() != dgst { + t.Fatalf("unexpected digest: %v != %v", layer.Digest(), dgst) + } +} + // writeRandomLayer creates a random layer under name and tarSum using driver // and pathMapper. An io.ReadSeeker with the data is returned, along with the // sha256 hex digest. diff --git a/storage/layerupload.go b/storage/layerupload.go index dbd9140e230..0c69e30da50 100644 --- a/storage/layerupload.go +++ b/storage/layerupload.go @@ -1,6 +1,7 @@ package storage import ( + "fmt" "io" "path" "time" @@ -89,7 +90,10 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige case tarsum.Version1: default: // version 0 and dev, for now. - return "", ErrLayerTarSumVersionUnsupported + return "", ErrLayerInvalidDigest{ + Digest: dgst, + Reason: ErrLayerTarSumVersionUnsupported, + } } digestVerifier := digest.NewDigestVerifier(dgst) @@ -102,22 +106,7 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige // Read the file from the backend driver and validate it. fr, err := newFileReader(luc.fileWriter.driver, luc.path) if err != nil { - switch err := err.(type) { - case storagedriver.PathNotFoundError: - // NOTE(stevvooe): Path not found can mean several things by we - // should report the upload is not available. This can happen if - // the following happens: - // - // 1. If not data was received for the upload instance. - // 2. Backend storage driver has not convereged after receiving latest data. - // - // This *does not* mean that the upload does not exist, since we - // can't even get a LayerUpload object without having the - // directory exist. - return "", ErrLayerUploadUnavailable{Err: err} - default: - return "", err - } + return "", err } tr := io.TeeReader(fr, digestVerifier) @@ -132,7 +121,10 @@ func (luc *layerUploadController) validateLayer(dgst digest.Digest) (digest.Dige } if !digestVerifier.Verified() { - return "", ErrLayerInvalidDigest{Digest: dgst} + return "", ErrLayerInvalidDigest{ + Digest: dgst, + Reason: fmt.Errorf("content does not match digest"), + } } return canonical, nil @@ -151,7 +143,7 @@ func (luc *layerUploadController) moveLayer(dgst digest.Digest) error { } // Check for existence - if _, err := luc.layerStore.repository.registry.driver.Stat(blobPath); err != nil { + if _, err := luc.driver.Stat(blobPath); err != nil { switch err := err.(type) { case storagedriver.PathNotFoundError: break // ensure that it doesn't exist. @@ -166,6 +158,31 @@ func (luc *layerUploadController) moveLayer(dgst digest.Digest) error { return nil } + // If no data was received, we may not actually have a file on disk. Check + // the size here and write a zero-length file to blobPath if this is the + // case. For the most part, this should only ever happen with zero-length + // tars. + if _, err := luc.driver.Stat(luc.path); err != nil { + switch err := err.(type) { + case storagedriver.PathNotFoundError: + // HACK(stevvooe): This is slightly dangerous: if we verify above, + // get a hash, then the underlying file is deleted, we risk moving + // a zero-length blob into a nonzero-length blob location. To + // prevent this horrid thing, we employ the hack of only allowing + // to this happen for the zero tarsum. + if dgst == digest.DigestTarSumV1EmptyTar { + return luc.driver.PutContent(blobPath, []byte{}) + } + + // We let this fail during the move below. + logrus. + WithField("upload.uuid", luc.UUID()). + WithField("digest", dgst).Warnf("attempted to move zero-length content with non-zero digest") + default: + return err // unrelated error + } + } + return luc.driver.Move(luc.path, blobPath) } diff --git a/storage/revisionstore.go b/storage/revisionstore.go index 97518df19f6..a88ca8c77ff 100644 --- a/storage/revisionstore.go +++ b/storage/revisionstore.go @@ -57,8 +57,6 @@ func (rs *revisionStore) get(revision digest.Digest) (*manifest.SignedManifest, return nil, err } - logrus.Infof("retrieved signatures: %v", string(signatures[0])) - jsig, err := libtrust.NewJSONSignature(content, signatures...) if err != nil { return nil, err