From 08102ee75f88de5390e09908c1cb61d8a3010271 Mon Sep 17 00:00:00 2001 From: TzZtzt Date: Tue, 7 Mar 2023 21:41:48 +0800 Subject: [PATCH] [Enhancement]CSI plugin checks mount point liveness before binding mount points (#2703) * Clean up broken mount point when NodeStageVolume Signed-off-by: dongyun.xzh * Check mount point aliveness when NodePublishVolume Signed-off-by: dongyun.xzh * Clean up broken mount point when NodeStageVolume Signed-off-by: dongyun.xzh * Fix cleaning logic Signed-off-by: dongyun.xzh --------- Signed-off-by: dongyun.xzh --- csi/shell/check_mount.sh | 23 +++++++++++++++++++++-- pkg/csi/plugins/nodeserver.go | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/csi/shell/check_mount.sh b/csi/shell/check_mount.sh index cd364bc2549..0b01cfd7726 100644 --- a/csi/shell/check_mount.sh +++ b/csi/shell/check_mount.sh @@ -4,6 +4,8 @@ set -ex ConditionPathIsMountPoint="$1" MountType="$2" +SubPath="$3" + #[ -z ${ConditionPathIsMountPoint} ] && ConditionPathIsMountPoint=/alluxio-fuse count=0 @@ -14,9 +16,26 @@ do count=`expr $count + 1` if test $count -eq 10 then - echo "timed out!" + echo "timed out waiting for $ConditionPathIsMountPoint mounted" exit 1 fi done -echo "succeed in checking mount point $ConditionPathIsMountPoint" \ No newline at end of file +count=0 +while ! stat $ConditionPathIsMountPoint +do + sleep 3 + count=`expr $count + 1` + if test $count -eq 10 + then + echo "timed out stating $ConditionPathIsMountPoint returns ready" + exit 1 + fi +done + +if [ ! -e $ConditionPathIsMountPoint/$SubPath ] ; then + echo "sub path [$SubPath] not exist!" + exit 2 +fi + +echo "succeed in checking mount point $ConditionPathIsMountPoint" diff --git a/pkg/csi/plugins/nodeserver.go b/pkg/csi/plugins/nodeserver.go index c8a3bf51ed4..57880e213fe 100644 --- a/pkg/csi/plugins/nodeserver.go +++ b/pkg/csi/plugins/nodeserver.go @@ -31,6 +31,7 @@ import ( "github.com/fluid-cloudnative/fluid/pkg/utils/kubeclient" "github.com/pkg/errors" v1 "k8s.io/api/core/v1" + "k8s.io/utils/mount" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/container-storage-interface/spec/lib/go/csi" @@ -39,7 +40,6 @@ import ( "golang.org/x/net/context" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "k8s.io/utils/mount" ) const ( @@ -262,6 +262,12 @@ func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol defer ns.mutex.Unlock() glog.Infof("NodeStageVolume: Starting NodeStage with VolumeId: %s, and VolumeContext: %v", req.GetVolumeId(), req.VolumeContext) + // 1. clean up broken mount point + fluidPath := req.GetVolumeContext()[common.VolumeAttrFluidPath] + if ignoredErr := cleanUpBrokenMountPoint(fluidPath); ignoredErr != nil { + glog.Warningf("Ignoring error when cleaning up broken mount point %v: %v", fluidPath, ignoredErr) + } + // 1. get runtime namespace and name namespace, name, err := ns.getRuntimeNamespacedName(req.GetVolumeContext(), req.GetVolumeId()) if err != nil { @@ -381,3 +387,30 @@ func checkMountInUse(volumeName string) (bool, error) { return inUse, err } + +// cleanUpBrokenMountPoint stats the given mountPoint and umounts it if it's broken mount point(i.e. Stat with errNo 107[Trasport Endpoint is not Connected]). +func cleanUpBrokenMountPoint(mountPoint string) error { + _, err := os.Stat(mountPoint) + if err != nil { + if os.IsNotExist(err) { + return nil + } + + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == syscall.ENOTCONN { + mounter := mount.New(mountPoint) + if err := mounter.Unmount(mountPoint); err != nil { + return errors.Wrapf(mounter.Unmount(mountPoint), "failed to unmount %s", mountPoint) + } + glog.Infof("Found broken mount point %s, successfully umounted it", mountPoint) + return nil + } + } + } + + return errors.Wrapf(err, "failed to os.Stat(%s)", mountPoint) + } + + return nil +}