Skip to content

Commit

Permalink
Cherry pick for 0.8.5 (#2785)
Browse files Browse the repository at this point in the history
* [juicefs] fix worker cache when set option (#2563)

* fix worker cache when set option

Signed-off-by: zwwhdls <zww@hdls.me>

* update changelog in chart

Signed-off-by: zwwhdls <zww@hdls.me>

* fix unittest

Signed-off-by: zwwhdls <zww@hdls.me>

---------

Signed-off-by: zwwhdls <zww@hdls.me>
Signed-off-by: cheyang <cheyang@163.com>

* fix multi cache dir (#2639)

* fix multi cache dir

Signed-off-by: zwwhdls <zww@hdls.me>

* fix unit test

---------

Signed-off-by: zwwhdls <zww@hdls.me>
Signed-off-by: cheyang <cheyang@163.com>

* [Enhancement]CSI plugin checks mount point liveness before binding mount points (#2703)

* Clean up broken mount point when NodeStageVolume

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Check mount point aliveness when NodePublishVolume

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Clean up broken mount point when NodeStageVolume

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Fix cleaning logic

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

---------

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>
Signed-off-by: cheyang <cheyang@163.com>

* Prettify error messages for exec.Commands in Fluid (#2718)

* Prettify error log message when calling NodePublishVolume

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Prettify error logs when calling helm-related funcs

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Use instead `errors.As`

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Use instead `errors.As`

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

* Set higher log level for helm exec

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>

---------

Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>
Signed-off-by: cheyang <cheyang@163.com>

* update mount to check mountinfo, To #48327952

Signed-off-by: cheyang <cheyang@163.com>

* update mount to check mountinfo, To #48327952

Signed-off-by: cheyang <cheyang@163.com>

* Build docker images for v0.8.5, To #48327952

Signed-off-by: cheyang <cheyang@163.com>

---------

Signed-off-by: zwwhdls <zww@hdls.me>
Signed-off-by: cheyang <cheyang@163.com>
Signed-off-by: dongyun.xzh <dongyun.xzh@alibaba-inc.com>
Co-authored-by: Weiwei <zww@hdls.me>
Co-authored-by: TzZtzt <trafalgarz@outlook.com>
  • Loading branch information
3 people committed Mar 27, 2023
1 parent d8a8bba commit 5aae5a0
Show file tree
Hide file tree
Showing 25 changed files with 1,083 additions and 110 deletions.
2 changes: 1 addition & 1 deletion charts/fluid/fluid/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ version: 0.8.5

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: 0.8.4-885b5a7
appVersion: 0.8.5-00f609e
home: https://github.com/fluid-cloudnative/fluid
keywords:
- category:data
Expand Down
24 changes: 12 additions & 12 deletions charts/fluid/fluid/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

workdir: /tmp
crdUpgrade:
image: fluidcloudnative/fluid-crd-upgrader:v0.8.4-885b5a7
image: fluidcloudnative/fluid-crd-upgrader:v0.8.5-00f609e

image:
imagePullSecrets: []

dataset:
replicas: 1
controller:
image: fluidcloudnative/dataset-controller:v0.8.4-885b5a7
image: fluidcloudnative/dataset-controller:v0.8.5-00f609e

csi:
featureGates: "FuseRecovery=false"
Expand All @@ -21,7 +21,7 @@ csi:
registrar:
image: registry.aliyuncs.com/acs/csi-node-driver-registrar:v2.3.0-038aeb6-aliyun
plugins:
image: fluidcloudnative/fluid-csi:v0.8.4-885b5a7
image: fluidcloudnative/fluid-csi:v0.8.5-00f609e
kubelet:
rootDir: /var/lib/kubelet
pruneFs: fuse.alluxio-fuse,fuse.jindofs-fuse,fuse.juicefs,fuse.goosefs-fuse,ossfs
Expand All @@ -37,9 +37,9 @@ runtime:
portAllocatePolicy: random
enabled: false
init:
image: fluidcloudnative/init-users:v0.8.4-885b5a7
image: fluidcloudnative/init-users:v0.8.5-00f609e
controller:
image: fluidcloudnative/alluxioruntime-controller:v0.8.4-885b5a7
image: fluidcloudnative/alluxioruntime-controller:v0.8.5-00f609e
runtime:
# image: fluidcloudnative/alluxio:release-2.7.3-SNAPSHOT-a7154f1
image: fluidcloudnative/alluxio:release-2.8.1-SNAPSHOT-0433ade
Expand All @@ -59,21 +59,21 @@ runtime:
fuse:
image: registry.cn-shanghai.aliyuncs.com/jindofs/jindo-fuse:4.5.1
controller:
image: fluidcloudnative/jindoruntime-controller:v0.8.4-885b5a7
image: fluidcloudnative/jindoruntime-controller:v0.8.5-00f609e
init:
portCheck:
enabled: false
image: fluidcloudnative/init-users:v0.8.4-885b5a7
image: fluidcloudnative/init-users:v0.8.5-00f609e
goosefs:
replicas: 1
runtimeWorkers: 3
portRange: 26000-32000
portAllocatePolicy: random
enabled: false
init:
image: fluidcloudnative/init-users:v0.8.4-885b5a7
image: fluidcloudnative/init-users:v0.8.5-00f609e
controller:
image: fluidcloudnative/goosefsruntime-controller:v0.8.4-885b5a7
image: fluidcloudnative/goosefsruntime-controller:v0.8.5-00f609e
runtime:
image: ccr.ccs.tencentyun.com/qcloud/goosefs:v1.2.0
fuse:
Expand All @@ -82,17 +82,17 @@ runtime:
replicas: 1
enabled: false
controller:
image: fluidcloudnative/juicefsruntime-controller:v0.8.4-885b5a7
image: fluidcloudnative/juicefsruntime-controller:v0.8.5-00f609e
fuse:
image: juicedata/juicefs-fuse:v1.0.0-4.8.0

webhook:
enabled: true
image: fluidcloudnative/fluid-webhook:v0.8.4-885b5a7
image: fluidcloudnative/fluid-webhook:v0.8.5-00f609e
replicas: 1

fluidapp:
enabled: true
replicas: 1
controller:
image: fluidcloudnative/application-controller:v0.8.4-885b5a7
image: fluidcloudnative/application-controller:v0.8.5-00f609e
3 changes: 3 additions & 0 deletions charts/juicefs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ Support configurable tieredstore's volume type

0.2.11
- Support credential key in secret

0.2.12
- Set cache dir in volumes & volumeMounts for worker & fuse
2 changes: 1 addition & 1 deletion charts/juicefs/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: juicefs
apiVersion: v1
description: FileSystem aimed for data analytics and machine learning in any cloud.
version: 0.2.11
version: 0.2.12
appVersion: v1.0.0
home: https://juicefs.com/
maintainers:
Expand Down
26 changes: 5 additions & 21 deletions charts/juicefs/templates/fuse/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,11 @@ spec:
exec:
command: ["sh", "-c", "umount {{ .Values.fuse.mountPath }}"]
volumeMounts:
- name: juicefs-fuse-mount
mountPath: {{ .Values.fuse.hostMountPath }}
mountPropagation: Bidirectional
- mountPath: /root/script
name: script
{{- range $name, $mount := .Values.cacheDirs }}
- name: cache-dir-{{ $name }}
mountPath: "{{ $mount.path }}"
{{- end }}
- name: juicefs-fuse-mount
mountPath: {{ .Values.fuse.hostMountPath }}
mountPropagation: Bidirectional
- mountPath: /root/script
name: script
{{- if .Values.fuse.volumeMounts }}
{{ toYaml .Values.fuse.volumeMounts | indent 12 }}
{{- end }}
Expand All @@ -155,18 +151,6 @@ spec:
hostPath:
path: {{ .Values.fuse.hostMountPath }}
type: DirectoryOrCreate
{{- range $name, $mount := .Values.cacheDirs }}
{{- if eq $mount.type "hostPath" }}
- hostPath:
path: "{{ $mount.path }}"
type: DirectoryOrCreate
name: cache-dir-{{ $name }}
{{- else if eq $mount.type "emptyDir" }}
- emptyDir: {}
name: cache-dir-{{ $name }}
{{- /* todo: support volume template */}}
{{- end }}
{{- end }}
- name: script
configMap:
name: {{ template "juicefs.fullname" . }}-fuse-script
Expand Down
16 changes: 0 additions & 16 deletions charts/juicefs/templates/worker/statefuleset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,27 +126,11 @@ spec:
volumeMounts:
- mountPath: /root/script
name: script
{{- range $name, $mount := .Values.cacheDirs }}
- name: cache-dir-{{ $name }}
mountPath: "{{ $mount.path }}"
{{- end }}
{{- if .Values.worker.volumeMounts }}
{{ toYaml .Values.worker.volumeMounts | indent 12 }}
{{- end }}
restartPolicy: Always
volumes:
{{- range $name, $mount := .Values.cacheDirs }}
{{- if eq $mount.type "hostPath" }}
- hostPath:
path: "{{ $mount.path }}"
type: DirectoryOrCreate
name: cache-dir-{{ $name }}
{{- else if eq $mount.type "emptyDir" }}
- emptyDir: {}
name: cache-dir-{{ $name }}
{{- /* todo: support volume template */}}
{{- end }}
{{- end }}
- name: script
configMap:
name: {{ template "juicefs.fullname" . }}-worker-script
Expand Down
27 changes: 23 additions & 4 deletions csi/shell/check_mount.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,38 @@ set -ex

ConditionPathIsMountPoint="$1"
MountType="$2"
SubPath="$3"

#[ -z ${ConditionPathIsMountPoint} ] && ConditionPathIsMountPoint=/alluxio-fuse

count=0
# while ! mount | grep alluxio | grep $ConditionPathIsMountPoint | grep -v grep
while ! mount | grep $ConditionPathIsMountPoint | grep $MountType
# while ! cat /proc/self/mountinfo | grep alluxio | grep $ConditionPathIsMountPoint | grep -v grep
while ! cat /proc/self/mountinfo | grep $ConditionPathIsMountPoint | grep $MountType
do
sleep 3
count=`expr $count + 1`
if test $count -eq 10
then
echo "timed out!"
echo "timed out waiting for $ConditionPathIsMountPoint mounted"
exit 1
fi
done

echo "succeed in checking mount point $ConditionPathIsMountPoint"
count=0
while ! stat $ConditionPathIsMountPoint
do
sleep 3
count=`expr $count + 1`
if test $count -eq 10
then
echo "timed out stating $ConditionPathIsMountPoint returns ready"
exit 1
fi
done

if [ ! -e $ConditionPathIsMountPoint/$SubPath ] ; then
echo "sub path [$SubPath] not exist!"
exit 2
fi

echo "succeed in checking mount point $ConditionPathIsMountPoint"
57 changes: 57 additions & 0 deletions docs/zh/samples/juicefs/juicefs_cache_dir.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# JuiceFSRuntime 缓存配置

如何在 Fluid 中使用 JuiceFS,请参考文档[示例 - 如何在 Fluid 中使用 JuiceFS](juicefs_runtime.md)。本文讲述所有在 Fluid 中有关 JuiceFS 的缓存相关配置。

## 设置多个路径缓存

缓存路径在 JuiceFSRuntime 中的 tiredstore 设置,worker 和 fuse pod 共享相同的配置。

注意:JuiceFS 支持多路径缓存,不支持多级缓存。

```yaml
apiVersion: data.fluid.io/v1alpha1
kind: JuiceFSRuntime
metadata:
name: jfsdemo
spec:
replicas: 1
tieredstore:
levels:
- mediumtype: SSD
path: /mnt/cache1:/mnt/cache2
quota: 40Gi
low: "0.1"
```

其中:
- `spec.tiredstore.levels.path` 可设置为多个路径,以 `:` 分隔,缓存会被分配在这里设置的所有路径下;但不支持通配符;
- `spec.tiredstore.levels.quota` 为缓存对象的总大小,与路径多少无关;
- `spec.tiredstore.levels.low` 为缓存路径的最小剩余空间比例,无论缓存是否达到限额,都会保证缓存路径的剩余空间;
- `spec.tiredstore.levels.mediumtype` 为缓存路径的类型,目前支持 `SSD``MEM`


## 单独设置 worker 的缓存路径

默认情况下,worker 和 fuse 的缓存路径都在 `spec.tiredstore.levels.path` 中设置,但是也可以单独设置 worker 的缓存路径。

```yaml
apiVersion: data.fluid.io/v1alpha1
kind: JuiceFSRuntime
metadata:
name: jfsdemo
spec:
worker:
options:
"cache-dir": "/mnt/cache1:/mnt/cache2"
tieredstore:
levels:
- mediumtype: MEM
path: /dev/shm
quota: 500Mi
low: "0.1"
```

其中:
- `spec.worker.options` 为 worker 的挂载参数,缓存路径以 `cache-dir` 为 key,以 `:` 分隔的多个路径;


Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Redis、MySQL、TiKV 等多种数据库中。

## 部署 JuiceFSRuntime 环境

具体部署方法参考文档 [如何在 Fluid 中使用 JuiceFS](./juicefs_runtime.md)
具体部署方法参考文档 [如何在 Fluid 中使用 JuiceFS](juicefs_runtime.md)

在 JuiceFSRuntime 和 Dataset 创建成功后,等待 worker pod 启动成功,再进行下面的步骤。

Expand Down Expand Up @@ -63,4 +63,4 @@ root@jfsdemo-worker-0:~#

可以看到 bucket 中的文件已经被同步到了 JuiceFS 中。

最后创建业务 Pod,其中 Pod 使用上面创建的 `Dataset` 的方式为指定同名的 PVC。该步骤与文档 [如何在 Fluid 中使用 JuiceFS](./juicefs_runtime.md) 中一致,这里不再赘述。
最后创建业务 Pod,其中 Pod 使用上面创建的 `Dataset` 的方式为指定同名的 PVC。该步骤与文档 [如何在 Fluid 中使用 JuiceFS](juicefs_runtime.md) 中一致,这里不再赘述。
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ JuiceFS 是一款面向云环境设计的开源高性能共享文件系统,提

## 安装


您可以从 [Fluid Releases](https://github.com/fluid-cloudnative/fluid/releases) 下载最新的 Fluid 安装包。

在 Fluid 的安装 chart values.yaml 中将 `runtime.juicefs.enable` 设置为 `true`,再参考 [安装文档](../userguide/install.md) 完成安装。并检查 Fluid 各组件正常运行:
Expand Down
File renamed without changes.
37 changes: 35 additions & 2 deletions pkg/csi/plugins/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/fluid-cloudnative/fluid/pkg/utils/kubeclient"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
"k8s.io/utils/mount"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/container-storage-interface/spec/lib/go/csi"
Expand All @@ -39,7 +40,6 @@ import (
"golang.org/x/net/context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/utils/mount"
)

const (
Expand Down Expand Up @@ -117,7 +117,7 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
// 1. Wait the runtime fuse ready
err = utils.CheckMountReady(fluidPath, mountType)
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
return nil, status.Error(codes.Internal, err.Error())
}

args := []string{"--bind"}
Expand Down Expand Up @@ -262,6 +262,12 @@ func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
defer ns.mutex.Unlock()
glog.Infof("NodeStageVolume: Starting NodeStage with VolumeId: %s, and VolumeContext: %v", req.GetVolumeId(), req.VolumeContext)

// 1. clean up broken mount point
fluidPath := req.GetVolumeContext()[common.VolumeAttrFluidPath]
if ignoredErr := cleanUpBrokenMountPoint(fluidPath); ignoredErr != nil {
glog.Warningf("Ignoring error when cleaning up broken mount point %v: %v", fluidPath, ignoredErr)
}

// 1. get runtime namespace and name
namespace, name, err := ns.getRuntimeNamespacedName(req.GetVolumeContext(), req.GetVolumeId())
if err != nil {
Expand Down Expand Up @@ -381,3 +387,30 @@ func checkMountInUse(volumeName string) (bool, error) {

return inUse, err
}

// cleanUpBrokenMountPoint stats the given mountPoint and umounts it if it's broken mount point(i.e. Stat with errNo 107[Trasport Endpoint is not Connected]).
func cleanUpBrokenMountPoint(mountPoint string) error {
_, err := os.Stat(mountPoint)
if err != nil {
if os.IsNotExist(err) {
return nil
}

if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == syscall.ENOTCONN {
mounter := mount.New(mountPoint)
if err := mounter.Unmount(mountPoint); err != nil {
return errors.Wrapf(mounter.Unmount(mountPoint), "failed to unmount %s", mountPoint)
}
glog.Infof("Found broken mount point %s, successfully umounted it", mountPoint)
return nil
}
}
}

return errors.Wrapf(err, "failed to os.Stat(%s)", mountPoint)
}

return nil
}
Loading

0 comments on commit 5aae5a0

Please sign in to comment.