From 02fe71843e0e45ddc986a6c8182370b042349a27 Mon Sep 17 00:00:00 2001 From: John Howard Date: Thu, 23 Aug 2018 15:56:27 -0700 Subject: [PATCH] Windows: DetachVhd attempt in cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: John Howard This is a fix for a few related scenarios where it's impossible to remove layers or containers until the host is rebooted. Generally (or at least easiest to repro) through a forced daemon kill while a container is running. Possibly slightly worse than that, as following a host reboot, the scratch layer would possibly be leaked and left on disk under the dataroot\windowsfilter directory after the container is removed. One such example of a failure: 1. run a long running container with the --rm flag docker run --rm -d --name test microsoft/windowsservercore powershell sleep 30 2. Force kill the daemon not allowing it to cleanup. Simulates a crash or a host power-cycle. 3. (re-)Start daemon 4. docker ps -a PS C:\control> docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 7aff773d782b malloc "powershell start-sl…" 11 seconds ago Removal In Progress malloc 5. Try to remove PS C:\control> docker rm 7aff Error response from daemon: container 7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d: driver "windowsfilter" failed to remove root filesystem: rename C:\control\windowsfilter\7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d C:\control\windowsfilter\7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d-removing: Access is denied. PS C:\control> Step 5 fails. (cherry picked from commit efdad5374465a2a889d7572834a2dcca147af4fb) Signed-off-by: Sebastiaan van Stijn --- daemon/graphdriver/windows/windows.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/daemon/graphdriver/windows/windows.go b/daemon/graphdriver/windows/windows.go index 13bfac2d2ecbf..52b0c6d84509a 100644 --- a/daemon/graphdriver/windows/windows.go +++ b/daemon/graphdriver/windows/windows.go @@ -6,7 +6,6 @@ import ( "bufio" "bytes" "encoding/json" - "errors" "fmt" "io" "io/ioutil" @@ -23,6 +22,7 @@ import ( "github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio/archive/tar" "github.com/Microsoft/go-winio/backuptar" + "github.com/Microsoft/go-winio/vhd" "github.com/Microsoft/hcsshim" "github.com/docker/docker/daemon/graphdriver" "github.com/docker/docker/pkg/archive" @@ -33,6 +33,7 @@ import ( "github.com/docker/docker/pkg/reexec" "github.com/docker/docker/pkg/system" units "github.com/docker/go-units" + "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/windows" ) @@ -331,7 +332,18 @@ func (d *Driver) Remove(id string) error { tmpID := fmt.Sprintf("%s-removing", rID) tmpLayerPath := filepath.Join(d.info.HomeDir, tmpID) if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) { - return err + if !os.IsPermission(err) { + return err + } + // If permission denied, it's possible that the scratch is still mounted, an + // artifact after a hard daemon crash for example. Worth a shot to try detaching it + // before retrying the rename. + if detachErr := vhd.DetachVhd(filepath.Join(layerPath, "sandbox.vhdx")); detachErr != nil { + return errors.Wrapf(err, "failed to detach VHD: %s", detachErr) + } + if renameErr := os.Rename(layerPath, tmpLayerPath); renameErr != nil && !os.IsNotExist(renameErr) { + return errors.Wrapf(err, "second rename attempt following detach failed: %s", renameErr) + } } if err := hcsshim.DestroyLayer(d.info, tmpID); err != nil { logrus.Errorf("Failed to DestroyLayer %s: %s", id, err)