Skip to content

Commit

Permalink
rbd: remove dummy image workaround
Browse files Browse the repository at this point in the history
To address the problem that snapshot
schedules are triggered for volumes
that are promoted, a dummy image was
disabled/enabled for replication.
This was done as a workaround, because the
promote operation was not triggering
the schedules for the image being promoted.

The bugs related to the same have been fixed in
RBD mirroring functionality and hence the
workaround #2656 can be removed from the code base.

ceph tracker https://tracker.ceph.com/issues/53914

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
  • Loading branch information
Madhu-1 authored and mergify[bot] committed Oct 10, 2022
1 parent 5c8564c commit 71e5b3f
Showing 1 changed file with 0 additions and 141 deletions.
141 changes: 0 additions & 141 deletions internal/rbd/replicationcontrollerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"

"github.com/ceph/ceph-csi/internal/util"
Expand Down Expand Up @@ -70,18 +69,6 @@ const (
schedulingStartTimeKey = "schedulingStartTime"
)

type operation string

var (
// pool+"/"+key to check dummy image is created.
dummyImageCreated operation = "dummyImageCreated"
// Read write lock to ensure that only one operation is happening at a time.
operationLock = sync.Map{}

// Lock to serialize operations on the dummy image to tickle RBD snapshot schedule.
dummyImageOpsLock sync.Mutex
)

// ReplicationServer struct of rbd CSI driver with supported methods of Replication
// controller server spec.
type ReplicationServer struct {
Expand Down Expand Up @@ -269,11 +256,6 @@ func (rs *ReplicationServer) EnableVolumeReplication(ctx context.Context,
return nil, status.Error(codes.Internal, err.Error())
}

err = createDummyImage(ctx, rbdVol)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to create dummy image %s", err.Error())
}

if mirroringInfo.State != librbd.MirrorImageEnabled {
err = rbdVol.enableImageMirroring(mirroringMode)
if err != nil {
Expand All @@ -286,117 +268,6 @@ func (rs *ReplicationServer) EnableVolumeReplication(ctx context.Context,
return &replication.EnableVolumeReplicationResponse{}, nil
}

// getDummyImageName returns the csi-vol-dummy+cluster FSID as the image name.
// each cluster should have a unique dummy image created. choosing the cluster
// FSID for the same reason.
func getDummyImageName(conn *util.ClusterConnection) (string, error) {
id, err := conn.GetFSID()
if err != nil {
return "", err
}

return fmt.Sprintf("csi-vol-dummy-%s", id), nil
}

// getOperationName returns the operation name for the given operation type
// combined with the pool name.
func getOperationName(poolName string, optName operation) string {
return fmt.Sprintf("%s/%s", poolName, optName)
}

// createDummyImage creates a dummy image as a workaround for the rbd
// scheduling problem.
func createDummyImage(ctx context.Context, rbdVol *rbdVolume) error {
var err error
var imgName string

dummyImageOpsLock.Lock()
defer dummyImageOpsLock.Unlock()
optName := getOperationName(rbdVol.Pool, dummyImageCreated)
if _, ok := operationLock.Load(optName); !ok {
// create a dummy image
imgName, err = getDummyImageName(rbdVol.conn)
if err != nil {
return err
}
dummyVol := *rbdVol
dummyVol.RbdImageName = imgName
// dummyVol holds rbdVol details, reset ImageID or else dummy image cannot be
// deleted from trash during repair operation.
dummyVol.ImageID = ""
f := []string{
librbd.FeatureNameLayering,
librbd.FeatureNameObjectMap,
librbd.FeatureNameExclusiveLock,
librbd.FeatureNameFastDiff,
}
features := librbd.FeatureSetFromNames(f)
dummyVol.ImageFeatureSet = features
// create 1MiB dummy image. 1MiB=1048576 bytes
dummyVol.VolSize = 1048576
err = createImage(ctx, &dummyVol, dummyVol.conn.Creds)
if err != nil {
if strings.Contains(err.Error(), "File exists") {
err = repairDummyImage(ctx, &dummyVol)
}
}
if err == nil {
operationLock.Store(optName, true)
}
}

return err
}

// repairDummyImage deletes and recreates the dummy image.
func repairDummyImage(ctx context.Context, dummyVol *rbdVolume) error {
// instead of checking the images features and than adding missing image
// features, updating the image size to 1Mib. We will delete the image
// and recreate it.

// deleting and recreating the dummy image will not impact anything as its
// a workaround to fix the scheduling problem.
err := dummyVol.deleteImage(ctx)
if err != nil {
return err
}

return createImage(ctx, dummyVol, dummyVol.conn.Creds)
}

// tickleMirroringOnDummyImage disables and reenables mirroring on the dummy image, and sets a
// schedule of a minute for the dummy image, to force a schedule refresh for other mirrored images
// within a minute.
func tickleMirroringOnDummyImage(rbdVol *rbdVolume, mirroringMode librbd.ImageMirrorMode) error {
imgName, err := getDummyImageName(rbdVol.conn)
if err != nil {
return err
}
dummyVol := *rbdVol
dummyVol.RbdImageName = imgName

dummyImageOpsLock.Lock()
defer dummyImageOpsLock.Unlock()
err = dummyVol.disableImageMirroring(false)
if err != nil {
return err
}

err = dummyVol.enableImageMirroring(mirroringMode)
if err != nil {
return err
}

if mirroringMode == librbd.ImageMirrorModeSnapshot {
err = dummyVol.addSnapshotScheduling(admin.Interval("1m"), admin.NoStartTime)
if err != nil {
return err
}
}

return nil
}

// DisableVolumeReplication extracts the RBD volume information from the
// volumeID, If the image is present and the mirroring is enabled on the RBD
// image it will disable the mirroring.
Expand Down Expand Up @@ -588,12 +459,6 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context,
}
}

var mode librbd.ImageMirrorMode
mode, err = getMirroringMode(ctx, req.GetParameters())
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to get mirroring mode %s", err.Error())
}

interval, startTime := getSchedulingDetails(req.GetParameters())
if interval != admin.NoInterval {
err = rbdVol.addSnapshotScheduling(interval, startTime)
Expand All @@ -608,12 +473,6 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context,
rbdVol)
}

log.DebugLog(ctx, "attempting to tickle dummy image for restarting RBD schedules")
err = tickleMirroringOnDummyImage(rbdVol, mode)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to enable mirroring on dummy image %s", err.Error())
}

return &replication.PromoteVolumeResponse{}, nil
}

Expand Down

0 comments on commit 71e5b3f

Please sign in to comment.