Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better error recovery in device mapper #3470

Merged
merged 3 commits into from Aug 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions snapshots/devmapper/device_info.go
Expand Up @@ -56,6 +56,8 @@ const (
Removing
// Removed means that device successfully removed but not yet deleted from meta store
Removed
// Faulty means that the device is errored and the snapshotter failed to rollback it
Faulty
)

func (s DeviceState) String() string {
Expand Down Expand Up @@ -84,6 +86,8 @@ func (s DeviceState) String() string {
return "Removing"
case Removed:
return "Removed"
case Faulty:
return "Faulty"
default:
return fmt.Sprintf("unknown %d", s)
}
Expand Down
43 changes: 39 additions & 4 deletions snapshots/devmapper/metadata.go
Expand Up @@ -38,6 +38,7 @@ type deviceIDState byte
const (
deviceFree deviceIDState = iota
deviceTaken
deviceFaulty
)

// Bucket names
Expand Down Expand Up @@ -92,11 +93,14 @@ func (m *PoolMetadata) ensureDatabaseInitialized() error {

// AddDevice saves device info to database.
func (m *PoolMetadata) AddDevice(ctx context.Context, info *DeviceInfo) error {
return m.db.Update(func(tx *bolt.Tx) error {
err := m.db.Update(func(tx *bolt.Tx) error {
devicesBucket := tx.Bucket(devicesBucketName)

// Make sure device name is unique
if err := getObject(devicesBucket, info.Name, nil); err == nil {
// Make sure device name is unique. If there is already a device with the same name,
// but in Faulty state, give it a try with another devmapper device ID.
// See https://github.com/containerd/containerd/pull/3436 for more context.
var existing DeviceInfo
if err := getObject(devicesBucket, info.Name, &existing); err == nil && existing.State != Faulty {
return ErrAlreadyExists
}

Expand All @@ -108,7 +112,38 @@ func (m *PoolMetadata) AddDevice(ctx context.Context, info *DeviceInfo) error {

info.DeviceID = deviceID

return putObject(devicesBucket, info.Name, info, false)
return putObject(devicesBucket, info.Name, info, true)
mxpv marked this conversation as resolved.
Show resolved Hide resolved
})

if err != nil {
return errors.Wrapf(err, "failed to save metadata for device %q (parent: %q)", info.Name, info.ParentName)
}

return nil
}

// MarkFaulty marks the given device and corresponding devmapper device ID as faulty.
// The snapshotter might attempt to recreate a device in 'Faulty' state with another devmapper ID in
// subsequent calls, and in case of success it's status will be changed to 'Created' or 'Activated'.
// The devmapper dev ID will remain in 'deviceFaulty' state until manually handled by a user.
func (m *PoolMetadata) MarkFaulty(ctx context.Context, name string) error {
return m.db.Update(func(tx *bolt.Tx) error {
var (
device = DeviceInfo{}
devBucket = tx.Bucket(devicesBucketName)
)

if err := getObject(devBucket, name, &device); err != nil {
return err
}

device.State = Faulty

if err := putObject(devBucket, name, &device, true); err != nil {
return err
}

return markDeviceID(tx, device.DeviceID, deviceFaulty)
})
}

Expand Down
32 changes: 31 additions & 1 deletion snapshots/devmapper/metadata_test.go
Expand Up @@ -23,8 +23,11 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/pkg/errors"
"go.etcd.io/bbolt"
"gotest.tools/assert"
is "gotest.tools/assert/cmp"
)
Expand Down Expand Up @@ -77,7 +80,7 @@ func TestPoolMetadata_AddDeviceDuplicate(t *testing.T) {
assert.NilError(t, err)

err = store.AddDevice(testCtx, &DeviceInfo{Name: "test"})
assert.Equal(t, ErrAlreadyExists, err)
assert.Equal(t, ErrAlreadyExists, errors.Cause(err))
}

func TestPoolMetadata_ReuseDeviceID(t *testing.T) {
Expand Down Expand Up @@ -151,6 +154,33 @@ func TestPoolMetadata_UpdateDevice(t *testing.T) {
assert.Equal(t, Created, newInfo.State)
}

func TestPoolMetadata_MarkFaulty(t *testing.T) {
tempDir, store := createStore(t)
defer cleanupStore(t, tempDir, store)

info := &DeviceInfo{Name: "test"}
err := store.AddDevice(testCtx, info)
assert.NilError(t, err)

err = store.MarkFaulty(testCtx, "test")
assert.NilError(t, err)

saved, err := store.GetDevice(testCtx, info.Name)
assert.NilError(t, err)
assert.Equal(t, saved.State, Faulty)
assert.Assert(t, saved.DeviceID > 0)

// Make sure a device ID marked as faulty as well
err = store.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket(deviceIDBucketName)
key := strconv.FormatUint(uint64(saved.DeviceID), 10)
value := bucket.Get([]byte(key))
assert.Equal(t, value[0], byte(deviceFaulty))
return nil
})
assert.NilError(t, err)
}

func TestPoolMetadata_GetDeviceNames(t *testing.T) {
tempDir, store := createStore(t)
defer cleanupStore(t, tempDir, store)
Expand Down
123 changes: 85 additions & 38 deletions snapshots/devmapper/pool_device.go
Expand Up @@ -118,35 +118,59 @@ func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, vi
State: Unknown,
}

// Save initial device metadata and allocate new device ID from store
if err := p.metadata.AddDevice(ctx, info); err != nil {
return errors.Wrapf(err, "failed to save initial metadata for new thin device %q", deviceName)
}
var (
metaErr error
devErr error
activeErr error
)

defer func() {
if retErr == nil {
// We've created a devmapper device, but failed to activate it, try rollback everything
if activeErr != nil {
// Delete the device first.
delErr := p.deleteDevice(ctx, info)
if delErr != nil {
// Failed to rollback, mark the device as faulty and keep metadata in order to
// preserve the faulty device ID
retErr = multierror.Append(retErr, delErr, p.metadata.MarkFaulty(ctx, info.Name))
return
}

// The devmapper device has been successfully deleted, deallocate device ID
if err := p.RemoveDevice(ctx, info.Name); err != nil {
retErr = multierror.Append(retErr, err)
return
}

return
}

// Rollback metadata
retErr = multierror.Append(retErr, p.metadata.RemoveDevice(ctx, info.Name))
// We're unable to create the devmapper device, most likely something wrong with the deviceID
if devErr != nil {
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
return
}
}()

// Create thin device
if err := p.createDevice(ctx, info); err != nil {
return err
// Save initial device metadata and allocate new device ID from store
metaErr = p.metadata.AddDevice(ctx, info)
if metaErr != nil {
return metaErr
}

defer func() {
if retErr == nil {
return
}
// Create thin device
devErr = p.createDevice(ctx, info)
if devErr != nil {
return devErr
}

// Rollback creation
retErr = multierror.Append(retErr, p.deleteDevice(ctx, info))
}()
// Activate thin device
activeErr = p.activateDevice(ctx, info)
if activeErr != nil {
return activeErr
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

What if crash happens in between these steps and rollback code doesn't get chance to run?

This possibility also applies to other places to change device or metadata, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if crash happens in between these steps and rollback code doesn't get chance to run?

Each device mapper operation is wrapped with state transition function (have a look on 1 and 2). If we're failed to activate a device and crash happens, so no defer was run, you'll end up with something like this in your metadata database (meaning that the device didn't finish the transition):

DevName=X ID=9 State=Activating Error="failed to activate"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Essentially each operation with devmapper device is recorded (https://github.com/containerd/containerd/blob/master/snapshots/devmapper/device_info.go#L34), so you can get a clear picture about device state and last operation at any time. For example, for CreateThinDevice the following states will be used:

Unknown <-- preallocated metadata info and devID
Creating <-- createDevice
Created
Activating <-- activateDevice
Activated

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

thanks for your detailed explanation ;-) Yes, I know, in the crash case, the metadata DB can reflect correctly the device state before crashing, such as creating, activating etc.

Actually, my question was, in that case, there is no chance to the rollback code to mark the device as faulty, right?

If so, after machine comes up and containerd get started again, we still have "object already exists" error from the AddDevice() - 878a320#diff-4132e94f99d36d21b09c446fb20687faL100.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you crash right in between create and rollback, then yes, a device won't be marked as Faulty one. In this case, what behavior would you expect to see?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a crash happens, containerd must restart again, so devmapper will be reloaded. Could we take the chance to scan the device list and mark the device as faulty who's state is not in a final state - Deactivated and Activated. Or I feel it's safe to cleanup that device from metadata and thin pool completely.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you crash right in between create and rollback, then yes, a device won't be marked as Faulty one. In this case, what behavior would you expect to see?

Hi,

I just come up this idea. I'd like to know what do you think?thanks.

diff --git a/snapshots/devmapper/pool_device.go b/snapshots/devmapper/pool_device.go
index 22e78c1..4eaab6e 100644
--- a/snapshots/devmapper/pool_device.go
+++ b/snapshots/devmapper/pool_device.go
@@ -150,6 +150,16 @@ func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, vi
                        retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
                        return
                }
+
+               // If ErrAlreadyExists returns, it indicates the device name is already in metadata,
+               // but not aware by upper layer. This may occur when crash happens and there is no
+               // chance to mark the device faulty. So, mark it here and upper layer will retry with
+               // the same device name.
+               if metaErr == ErrAlreadyExists {
+                       log.G(ctx).Warnf("conflicting thin device %s, mark it faulty", info.Name)
+                       retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
+                       return
+               }
        }()
 
        // Save initial device metadata and allocate new device ID from store
@@ -241,6 +251,12 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
                        retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
                        return
                }
+
+               if metaErr == ErrAlreadyExists {
+                       log.G(ctx).Warnf("conflicting snapshot %s, mark it faulty", snapInfo.Name)
+                       retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
+                       return
+               }
        }()
 

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've addressed this use case in #3489. In case of crash, the snapshotter will mark devices as faulty after restart. Can you check if it works for you?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've addressed this use case in #3489. In case of crash, the snapshotter will mark devices as faulty after restart. Can you check if it works for you?

Great, thanks!

return p.activateDevice(ctx, info)
return nil
}

// createDevice creates thin device
Expand Down Expand Up @@ -185,36 +209,59 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
State: Unknown,
}

// Save snapshot metadata and allocate new device ID
if err := p.metadata.AddDevice(ctx, snapInfo); err != nil {
return errors.Wrapf(err, "failed to save initial metadata for snapshot %q", snapshotName)
}
var (
metaErr error
devErr error
activeErr error
)

defer func() {
if retErr == nil {
// We've created a devmapper device, but failed to activate it, try rollback everything
if activeErr != nil {
// Delete the device first.
delErr := p.deleteDevice(ctx, snapInfo)
if delErr != nil {
// Failed to rollback, mark the device as faulty and keep metadata in order to
// preserve the faulty device ID
retErr = multierror.Append(retErr, delErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
return
}

// The devmapper device has been successfully deleted, deallocate device ID
if err := p.RemoveDevice(ctx, snapInfo.Name); err != nil {
retErr = multierror.Append(retErr, err)
return
}

return
}

// Rollback metadata
retErr = multierror.Append(retErr, p.metadata.RemoveDevice(ctx, snapInfo.Name))
// We're unable to create the devmapper device, most likely something wrong with the deviceID
if devErr != nil {
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
return
}
}()

// Create thin device snapshot
if err := p.createSnapshot(ctx, baseInfo, snapInfo); err != nil {
return err
// Save snapshot metadata and allocate new device ID
metaErr = p.metadata.AddDevice(ctx, snapInfo)
if metaErr != nil {
return metaErr
}

defer func() {
if retErr == nil {
return
}
// Create thin device snapshot
devErr = p.createSnapshot(ctx, baseInfo, snapInfo)
if devErr != nil {
return devErr
}

// Rollback snapshot creation
retErr = multierror.Append(retErr, p.deleteDevice(ctx, snapInfo))
}()
// Activate the snapshot device
activeErr = p.activateDevice(ctx, snapInfo)
if activeErr != nil {
return activeErr
}

// Activate snapshot device
return p.activateDevice(ctx, snapInfo)
return nil
}

func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
Expand Down Expand Up @@ -317,7 +364,7 @@ func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
}

if err := p.DeactivateDevice(ctx, deviceName, true, true); err != nil {
if err := p.DeactivateDevice(ctx, deviceName, false, true); err != nil {
return err
}

Expand Down
2 changes: 1 addition & 1 deletion snapshots/testsuite/testsuite.go
Expand Up @@ -501,7 +501,6 @@ func checkRemoveIntermediateSnapshot(ctx context.Context, t *testing.T, snapshot
if err != nil {
t.Fatal(err)
}
defer testutil.Unmount(t, base)

committedBase := filepath.Join(work, "committed-base")
if err = snapshotter.Commit(ctx, committedBase, base, opt); err != nil {
Expand Down Expand Up @@ -540,6 +539,7 @@ func checkRemoveIntermediateSnapshot(ctx context.Context, t *testing.T, snapshot
if err != nil {
t.Fatal(err)
}
testutil.Unmount(t, base)
err = snapshotter.Remove(ctx, committedBase)
if err != nil {
t.Fatal(err)
Expand Down