charts/core/templates/ray-service/ray-service.yaml

{{- $modelRepository := .Values.persistence.persistentVolumeClaim.modelRepository -}}
{{- $rayConda := .Values.persistence.persistentVolumeClaim.rayConda -}}
apiVersion: ray.io/v1
kind: RayCluster
metadata:
  name: {{ template "core.ray-service" . }}
  # annotations:
  #   ray.io/ft-enabled: "true"
spec:
  rayVersion: {{ .Values.rayService.image.version }}
  ## raycluster autoscaling config
  enableInTreeAutoscaling: true
  autoscalerOptions:
    upscalingMode: Default
    # idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
    idleTimeoutSeconds: 60
    imagePullPolicy: Always
    securityContext: {}
    env: []
    envFrom: []
    {{- if .Values.rayService.spec.autoscalerOptions.resources }}
    resources:
      {{- toYaml .Values.rayService.spec.autoscalerOptions.resources | nindent 6 }}
    {{- end }}
  headGroupSpec:
    rayStartParams:
      num-cpus: "0"
      num-gpus: "0"
      disable-usage-stats: "true"
    template:
      spec:
        {{- with .Values.rayService.headGroupSpec.nodeSelector }}
        nodeSelector:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.rayService.headGroupSpec.affinity }}
        affinity:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        volumes:
          - name: ray-conda
          {{- if not $.Values.persistence.enabled }}
            emptyDir: {}
          {{- else if $rayConda.existingClaim }}
            persistentVolumeClaim:
              claimName: {{ $rayConda.existingClaim }}
          {{- else }}
            persistentVolumeClaim:
              claimName: {{ template "core.rayCondaDataVolume" . }}
          {{- end }}
          - name: cp-conda-env-configmap
            configMap:
              name: cp-conda-env
              defaultMode: 0777
              items:
                - key: cp_conda_env.sh
                  path: cp_conda_env.sh
          - name: podman-configmap
            configMap:
              name: podman
              defaultMode: 0666
              items:
                - key: registries.conf
                  path: registries.conf
                - key: policy.json
                  path: policy.json
                - key: storage.conf
                  path: storage.conf
        containers:
          - name: ray-head
            image: {{ .Values.rayService.image.repository }}:{{ .Values.rayService.image.tag }}
            securityContext:
              # for mounting /dev/fuse
              # TODO: maybe implement a fuse-device-plugin-daemonset
              privileged: true
            imagePullPolicy: Always
            {{- if .Values.rayService.headGroupSpec.resources }}
            resources:
              {{- toYaml .Values.rayService.headGroupSpec.resources | nindent 14 }}
            {{- end }}
            env:
              - name: RAY_GRAFANA_IFRAME_HOST
                value: http://127.0.0.1:3002
              - name: RAY_GRAFANA_HOST
                value: http://core-grafana:80
              - name: RAY_PROMETHEUS_HOST
                value: http://core-prometheus:9090
              - name: RAY_worker_register_timeout_seconds
                value: "360"
            volumeMounts:
              - mountPath: /ray-conda-pack
                name: ray-conda
              - mountPath: /home/ray/script
                name: cp-conda-env-configmap
              - mountPath: /etc/containers/
                name: podman-configmap
            ports:
              - containerPort: 6379
                name: gcs-server
              - containerPort: 8265
                name: dashboard
              - containerPort: 10001
                name: client
              - containerPort: 8000
                name: serve
              - containerPort: 9000
                name: serve-grpc
              - containerPort: 44217
                name: as-metrics # autoscaler
              - containerPort: 44227
                name: dash-metrics # dashboard
            lifecycle:
              postStart:
                exec:
                  command: ["/bin/sh","-c","/home/ray/script/cp_conda_env.sh"]
              preStop:
                exec:
                  command: ["/bin/sh","-c","ray stop"]
  workerGroupSpecs:
  {{- range $workerGroupSpecs := .Values.rayService.workerGroupSpecs }}
    - replicas: {{ $workerGroupSpecs.replicas }}
      minReplicas: {{ $workerGroupSpecs.minReplicas }}
      maxReplicas: {{ $workerGroupSpecs.maxReplicas }}
      groupName: {{ $workerGroupSpecs.groupName }}
      rayStartParams:
        disable-usage-stats: "true"
      #pod template
      template:
        spec:
          {{- with $workerGroupSpecs.nodeSelector }}
          nodeSelector:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with $workerGroupSpecs.affinity }}
          affinity:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          volumes:
            - name: model-repository
            {{- if not $.Values.persistence.enabled }}
              emptyDir: {}
            {{- else if $modelRepository.existingClaim }}
              persistentVolumeClaim:
                claimName: {{ $modelRepository.existingClaim }}
            {{- else }}
              persistentVolumeClaim:
                claimName: core-model-repository-data-volume
            {{- end }}
            - name: start-ray-serve-configmap
              configMap:
                name: start-ray-serve
                defaultMode: 0777
                items:
                  - key: start_ray_serve.sh
                    path: start_ray_serve.sh
            - name: podman-configmap
              configMap:
                name: podman
                defaultMode: 0666
                items:
                  - key: registries.conf
                    path: registries.conf
                  - key: policy.json
                    path: policy.json
          containers:
            - name: ray-worker
              image: {{ $.Values.rayService.image.repository }}:{{ $.Values.rayService.image.tag }}
              imagePullPolicy: Always
              lifecycle:
                postStart:
                  exec:
                    command: ["/bin/sh","-c","/home/ray/script/start_ray_serve.sh"]
                preStop:
                  exec:
                    command: ["/bin/sh","-c","ray stop"]
              # TODO: determine how big the head node should be
              # Optimal resource allocation will depend on our Kubernetes infrastructure and might
              # require some experimentation.
              # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal
              # resource accounting. K8s requests are not used by Ray.
              # this also apply to the workerGroup
              resources:
              {{- if $workerGroupSpecs.gpuWorkerGroup.enabled }}
                {{- toYaml $workerGroupSpecs.gpuWorkerGroup.resources | nindent 16 }}
              {{- else }}
                {{- toYaml $workerGroupSpecs.resources | nindent 16 }}
              {{- end }}
              volumeMounts:
                - mountPath: /home/ray/script
                  name: start-ray-serve-configmap
                - mountPath: /model-repository
                  name: model-repository
                - mountPath: /etc/containers/
                  name: podman-configmap
  {{- end }}
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: cp-conda-env
data:
  cp_conda_env.sh: |
    #!/bin/bash

    # wait for ray cluster to finish initialization
    while true; do
        ray health-check 2>/dev/null
        if [ "$?" = "0" ]; then
            break
        else
            echo "INFO: waiting for ray head to start"
            sleep 1
        fi
    done

    echo "INFO: copying Conda env to the shared folder /ray-conda-pack"

    sudo chown -R 1000:100 /ray-conda-pack
    cp -r /home/ray/anaconda3/* /ray-conda-pack

    echo "INFO: Conda env copying done"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: start-ray-serve
data:
  start_ray_serve.sh: |
    #!/bin/bash

    # wait for ray cluster to finish initialization
    while true; do
        ray health-check 2>/dev/null
        if [ "$?" = "0" ]; then
            break
        else
            echo "INFO: waiting for ray head to start"
            sleep 1
        fi
    done

    serve start --http-host=0.0.0.0 --grpc-port 9000 --grpc-servicer-functions ray_pb2_grpc.add_RayServiceServicer_to_server

    echo "INFO: Start ray serve"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: podman
data:
  registries.conf: |
    unqualified-search-registries = ["{{ template "core.registry" . }}:{{ template "core.registry.port" . }}", "docker.io", "quay.io"]

    [[registry]]
    location = "{{ template "core.registry" . }}:{{ template "core.registry.port" . }}"
    insecure = true
  policy.json: |
    {
      "default": [
        {
          "type": "insecureAcceptAnything"
        }
      ],
      "transports": {
        "docker-daemon": {
          "": [{ "type": "insecureAcceptAnything" }]
        }
      }
    }
  storage.conf: |
    [storage]
    driver = "overlay"