From 8f565827856c91ea5efca33562cd4445a4203c1e Mon Sep 17 00:00:00 2001 From: HR Wu <5631010+heiruwu@users.noreply.github.com> Date: Tue, 19 Mar 2024 16:20:22 +0800 Subject: [PATCH] feat(helm,docker-compose): add registry service (#435) Because - Instill core will be incorporating a image registry service This commit - add `docker registry` service in docker-composes deployment - add `docker registry` service in helm deployment Resolves INS-3818 Resolves INS-3819 Resolves INS-3934 --- .env | 6 + Makefile | 4 +- charts/core/templates/_helpers.tpl | 19 +++ .../templates/model-backend/configmap.yaml | 3 + charts/core/templates/pvc.yaml | 31 +++++ .../templates/ray-service/ray-service.yaml | 58 ++++++++ charts/core/templates/registry/configmap.yaml | 28 ++++ charts/core/templates/registry/cronjob.yaml | 70 ++++++++++ .../core/templates/registry/deployment.yaml | 131 ++++++++++++++++++ charts/core/templates/registry/hpa.yaml | 33 +++++ charts/core/templates/registry/service.yaml | 22 +++ charts/core/values.yaml | 103 +++++++++++++- configs/registry/config.yaml | 73 ++++++++++ configs/registry/registries.conf | 82 +++++++++++ docker-compose-latest.yml | 4 + docker-compose.yml | 22 ++- 16 files changed, 683 insertions(+), 6 deletions(-) create mode 100644 charts/core/templates/registry/configmap.yaml create mode 100644 charts/core/templates/registry/cronjob.yaml create mode 100644 charts/core/templates/registry/deployment.yaml create mode 100644 charts/core/templates/registry/hpa.yaml create mode 100644 charts/core/templates/registry/service.yaml create mode 100644 configs/registry/config.yaml create mode 100644 configs/registry/registries.conf diff --git a/.env b/.env index dbba5af2..57cb7952 100644 --- a/.env +++ b/.env @@ -192,3 +192,9 @@ SOCAT_IMAGE=alpine/socat SOCAT_VERSION=1.7.4.4-r0 SOCAT_HOST=socat SOCAT_PORT=2375 + +# registry +REGISTRY_IMAGE=registry +REGISTRY_VERSION=2.8.3 +REGISTRY_HOST=registry +REGISTRY_PORT=5000 diff --git a/Makefile b/Makefile index b817f4d2..45311098 100644 --- a/Makefile +++ b/Makefile @@ -47,13 +47,13 @@ HELM_RELEASE_NAME := core .PHONY: all all: ## Launch all services with their up-to-date release version + @docker inspect --type=image instill/ray:${RAY_RELEASE_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5 @make build-release BUILD=${BUILD} @if [ ! -f "$$(echo ${SYSTEM_CONFIG_PATH}/user_uid)" ]; then \ mkdir -p ${SYSTEM_CONFIG_PATH} && \ docker run --rm --name uuidgen ${INSTILL_CORE_IMAGE_NAME}:${INSTILL_CORE_VERSION} uuidgen > ${SYSTEM_CONFIG_PATH}/user_uid; \ fi ifeq (${NVIDIA_GPU_AVAILABLE}, true) - @docker inspect --type=image instill/ray:${RAY_SERVER_VERSION} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5 @cat docker-compose-nvidia.yml | yq '.services.ray_server.deploy.resources.reservations.devices[0].device_ids |= (strenv(NVIDIA_VISIBLE_DEVICES) | split(",")) | ..style="double"' | \ EDITION=$${EDITION:=local-ce} DEFAULT_USER_UID=$$(cat ${SYSTEM_CONFIG_PATH}/user_uid) RAY_RELEASE_TAG=${RAY_RELEASE_TAG} docker compose ${COMPOSE_FILES} -f - up -d --quiet-pull else @@ -62,13 +62,13 @@ endif .PHONY: latest latest: ## Lunch all dependent services with their latest codebase + @docker inspect --type=image instill/ray:${RAY_LATEST_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5 @make build-latest PROFILE=${PROFILE} BUILD=${BUILD} @if [ ! -f "$$(echo ${SYSTEM_CONFIG_PATH}/user_uid)" ]; then \ mkdir -p ${SYSTEM_CONFIG_PATH} && \ docker run --rm --name uuidgen ${INSTILL_CORE_IMAGE_NAME}:latest uuidgen > ${SYSTEM_CONFIG_PATH}/user_uid; \ fi ifeq (${NVIDIA_GPU_AVAILABLE}, true) - @docker inspect --type=image instill/ray:${RAY_LATEST_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5 @cat docker-compose-nvidia.yml | yq '.services.ray_server.deploy.resources.reservations.devices[0].device_ids |= (strenv(NVIDIA_VISIBLE_DEVICES) | split(",")) | ..style="double"' | \ COMPOSE_PROFILES=${PROFILE} EDITION=$${EDITION:=local-ce:latest} DEFAULT_USER_UID=$$(cat ${SYSTEM_CONFIG_PATH}/user_uid) RAY_LATEST_TAG=${RAY_LATEST_TAG} docker compose ${COMPOSE_FILES} -f docker-compose-latest.yml -f - up -d --quiet-pull else diff --git a/charts/core/templates/_helpers.tpl b/charts/core/templates/_helpers.tpl index c09a2f32..c250eb60 100644 --- a/charts/core/templates/_helpers.tpl +++ b/charts/core/templates/_helpers.tpl @@ -384,6 +384,21 @@ openfga {{- printf "%s-openfga" (include "core.fullname" .) -}} {{- end -}} +{{/* +registry +*/}} +{{- define "core.registry" -}} + {{- printf "%s-registry" (include "core.fullname" .) -}} +{{- end -}} + +{{- define "core.registry.port" -}} + {{- printf "5000" -}} +{{- end -}} + +{{- define "core.registry.metricsPort" -}} + {{- printf "5001" -}} +{{- end -}} + {{/* internal TLS secret names */}} @@ -446,6 +461,10 @@ Persistent Volume Claims {{- printf "%s-ray-conda-data-volume" (include "core.fullname" .) -}} {{- end -}} +{{- define "core.registryDataVolume" -}} + {{- printf "%s-registry-data-volume" (include "core.fullname" .) -}} +{{- end -}} + {{- define "core.databaseDataVolume" -}} {{- printf "%s-database-data-volume" (include "core.fullname" .) -}} {{- end -}} diff --git a/charts/core/templates/model-backend/configmap.yaml b/charts/core/templates/model-backend/configmap.yaml index b46b9345..7aab44ea 100644 --- a/charts/core/templates/model-backend/configmap.yaml +++ b/charts/core/templates/model-backend/configmap.yaml @@ -95,3 +95,6 @@ data: openfga: host: {{ template "core.openfga" . }} port: 8080 + registry: + host: {{ template "core.registry" . }} + port: {{ template "core.registry.port" . }} diff --git a/charts/core/templates/pvc.yaml b/charts/core/templates/pvc.yaml index 031299fe..399ba3c5 100644 --- a/charts/core/templates/pvc.yaml +++ b/charts/core/templates/pvc.yaml @@ -57,6 +57,37 @@ spec: {{- end }} {{- end }} {{- end }} +{{- $registry := .Values.persistence.persistentVolumeClaim.registry -}} +{{- if not $registry.existingClaim }} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ template "core.registryDataVolume" . }} + annotations: + {{- range $key, $value := $registry.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if eq .Values.persistence.resourcePolicy "keep" }} + helm.sh/resource-policy: keep + {{- end }} + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry +spec: + accessModes: + - {{ $registry.accessMode }} + resources: + requests: + storage: {{ $registry.size }} + {{- if $registry.storageClass }} + {{- if eq "-" $registry.storageClass }} + storageClassName: "" + {{- else }} + storageClassName: {{ $registry.storageClass }} + {{- end }} + {{- end }} +{{- end }} {{- $database := .Values.persistence.persistentVolumeClaim.database -}} {{- if and (not $database.existingClaim) .Values.database.enabled }} --- diff --git a/charts/core/templates/ray-service/ray-service.yaml b/charts/core/templates/ray-service/ray-service.yaml index 58760672..24cdb197 100644 --- a/charts/core/templates/ray-service/ray-service.yaml +++ b/charts/core/templates/ray-service/ray-service.yaml @@ -55,9 +55,24 @@ spec: items: - key: cp_conda_env.sh path: cp_conda_env.sh + - name: podman-configmap + configMap: + name: podman + defaultMode: 0666 + items: + - key: registries.conf + path: registries.conf + - key: policy.json + path: policy.json + - key: storage.conf + path: storage.conf containers: - name: ray-head image: {{ .Values.rayService.image.repository }}:{{ .Values.rayService.image.tag }} + securityContext: + # for mounting /dev/fuse + # TODO: maybe implement a fuse-device-plugin-daemonset + privileged: true imagePullPolicy: Always {{- if .Values.rayService.headGroupSpec.resources }} resources: @@ -70,11 +85,15 @@ spec: value: http://core-grafana:80 - name: RAY_PROMETHEUS_HOST value: http://core-prometheus:9090 + - name: RAY_worker_register_timeout_seconds + value: "360" volumeMounts: - mountPath: /ray-conda-pack name: ray-conda - mountPath: /home/ray/script name: cp-conda-env-configmap + - mountPath: /etc/containers/ + name: podman-configmap ports: - containerPort: 6379 name: gcs-server @@ -134,6 +153,15 @@ spec: items: - key: start_ray_serve.sh path: start_ray_serve.sh + - name: podman-configmap + configMap: + name: podman + defaultMode: 0666 + items: + - key: registries.conf + path: registries.conf + - key: policy.json + path: policy.json containers: - name: ray-worker image: {{ $.Values.rayService.image.repository }}:{{ $.Values.rayService.image.tag }} @@ -162,6 +190,8 @@ spec: name: start-ray-serve-configmap - mountPath: /model-repository name: model-repository + - mountPath: /etc/containers/ + name: podman-configmap {{- end }} --- apiVersion: v1 @@ -212,3 +242,31 @@ data: serve start --http-host=0.0.0.0 --grpc-port 9000 --grpc-servicer-functions ray_pb2_grpc.add_RayServiceServicer_to_server echo "INFO: Start ray serve" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: podman +data: + registries.conf: | + unqualified-search-registries = ["{{ template "core.registry" . }}:{{ template "core.registry.port" . }}", "docker.io", "quay.io"] + + [[registry]] + location = "{{ template "core.registry" . }}:{{ template "core.registry.port" . }}" + insecure = true + policy.json: | + { + "default": [ + { + "type": "insecureAcceptAnything" + } + ], + "transports": { + "docker-daemon": { + "": [{ "type": "insecureAcceptAnything" }] + } + } + } + storage.conf: | + [storage] + driver = "overlay" diff --git a/charts/core/templates/registry/configmap.yaml b/charts/core/templates/registry/configmap.yaml new file mode 100644 index 00000000..0dde83c3 --- /dev/null +++ b/charts/core/templates/registry/configmap.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "core.registry" . }} + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry +data: + config.yaml: |+ + version: {{ .Values.registry.config.version }} + log: + {{- toYaml .Values.registry.config.log | nindent 6 }} + storage: + {{- toYaml .Values.registry.config.storage | nindent 6 }} + http: + {{- toYaml .Values.registry.config.http | nindent 6 }} + redis: + {{- toYaml .Values.registry.config.redis | nindent 6 }} + health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 + tcp: + - addr: {{ template "core.redis.addr" . }} + timeout: 3s + interval: 30s + threshold: 3 diff --git a/charts/core/templates/registry/cronjob.yaml b/charts/core/templates/registry/cronjob.yaml new file mode 100644 index 00000000..969c9d05 --- /dev/null +++ b/charts/core/templates/registry/cronjob.yaml @@ -0,0 +1,70 @@ +{{- if .Values.registry.garbageCollect.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "core.registry" . }}-garbage-collector + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry +spec: + concurrencyPolicy: Forbid + schedule: {{ .Values.registry.garbageCollect.schedule | quote }} + jobTemplate: + metadata: + labels: + {{- include "core.matchLabels" . | nindent 8 }} + app.kubernetes.io/component: registry + annotations: + checksum/config: {{ include (print $.Template.BasePath "/registry/configmap.yaml") . | sha256sum }} + {{- with .Values.registry.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + template: + spec: + {{- if .Values.registry.serviceAccountName }} + serviceAccountName: {{ .Values.registry.serviceAccountName }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + containers: + - name: garbage-collect + image: {{ .Values.registry.image.repository }}:{{ .Values.registry.image.tag }} + imagePullPolicy: {{ .Values.registry.image.pullPolicy }} + command: + - /bin/registry + - garbage-collect + - --delete-untagged={{ .Values.registry.garbageCollect.deleteUntagged }} + - /etc/docker/registry/config.yml + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + volumeMounts: + - name: config + mountPath: {{ .Values.registry.configPath }} + subPath: config.yaml + restartPolicy: OnFailure + {{- with .Values.registry.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.registry.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.registry.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ template "core.registry" . }} +{{- end }} \ No newline at end of file diff --git a/charts/core/templates/registry/deployment.yaml b/charts/core/templates/registry/deployment.yaml new file mode 100644 index 00000000..bc121a0f --- /dev/null +++ b/charts/core/templates/registry/deployment.yaml @@ -0,0 +1,131 @@ +{{- $registry := .Values.persistence.persistentVolumeClaim.registry -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "core.registry" . }} + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry + annotations: + rollme: {{ randAlphaNum 5 | quote }} +spec: + strategy: + type: {{ .Values.updateStrategy.type }} + {{- if eq .Values.updateStrategy.type "RollingUpdate" }} + rollingUpdate: + maxSurge: {{ .Values.updateStrategy.rollingUpdate.maxSurge }} + maxUnavailable: {{ .Values.updateStrategy.rollingUpdate.maxUnavailable }} + {{- else}} + rollingUpdate: null + {{- end }} + {{- if not .Values.registry.autoscaling.enabled }} + replicas: {{ .Values.registry.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "core.matchLabels" . | nindent 6 }} + app.kubernetes.io/component: registry + template: + metadata: + labels: + {{- include "core.matchLabels" . | nindent 8 }} + app.kubernetes.io/component: registry + annotations: + checksum/config: {{ include (print $.Template.BasePath "/registry/configmap.yaml") . | sha256sum }} + {{- with .Values.registry.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + {{- if .Values.registry.serviceAccountName }} + serviceAccountName: {{ .Values.registry.serviceAccountName }} + {{- end }} + automountServiceAccountToken: {{ .Values.registry.automountServiceAccountToken | default false }} + terminationGracePeriodSeconds: 120 + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: wait-for-redis + image: {{ .Values.redis.image.repository }}:{{ .Values.redis.image.tag }} + command: ['redis-cli', '-h'] + args: + - '$(REDIS_HOST)' + - 'ping' + env: + - name: REDIS_HOST + value: "{{ template "core.redis" . }}" + containers: + - name: registry + image: {{ .Values.registry.image.repository }}:{{ .Values.registry.image.tag }} + imagePullPolicy: {{ .Values.registry.image.pullPolicy }} + command: + - /bin/registry + - serve + - {{ .Values.registry.configPath }} + readinessProbe: + httpGet: + path: / + scheme: {{ ternary "https" "http" .Values.internalTLS.enabled | upper }} + port: {{ template "core.registry.port" . }} + periodSeconds: 10 + livenessProbe: + httpGet: + path: / + scheme: {{ ternary "https" "http" .Values.internalTLS.enabled | upper }} + port: {{ template "core.registry.port" . }} + periodSeconds: 10 + {{- if .Values.registry.resources }} + resources: + {{- toYaml .Values.registry.resources | nindent 12 }} + {{- end }} + ports: + - name: {{ ternary "https" "http" .Values.internalTLS.enabled }} + containerPort: {{ template "core.registry.port" . }} + protocol: TCP + - name: metrics + containerPort: {{ template "core.registry.metricsPort" . }} + protocol: TCP + volumeMounts: + - name: config + mountPath: {{ .Values.registry.configPath }} + subPath: config.yaml + - name: data-volume + mountPath: /var/lib/registry + {{- with .Values.registry.extraEnv }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ template "core.registry" . }} + - name: data-volume + {{- if not .Values.persistence.enabled }} + emptyDir: {} + {{- else if $registry.existingClaim }} + persistentVolumeClaim: + claimName: {{ $registry.existingClaim }} + {{- else }} + persistentVolumeClaim: + claimName: {{ template "core.registryDataVolume" . }} + {{- end }} + {{- with .Values.registry.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.registry.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.registry.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.registry.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/charts/core/templates/registry/hpa.yaml b/charts/core/templates/registry/hpa.yaml new file mode 100644 index 00000000..f286411a --- /dev/null +++ b/charts/core/templates/registry/hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.registry.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ template "core.registry" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ template "core.registry" . }} + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry + minReplicas: {{ .Values.registry.autoscaling.minReplicas }} + maxReplicas: {{ .Values.registry.autoscaling.maxReplicas }} + metrics: +{{- with .Values.registry.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ . }} +{{- end }} +{{- with .Values.registry.autoscaling.targetAverageMemoryUtilization }} + - type: Resource + resource: + name: memory + target: + type: AverageValue + averageValue: {{ . }} +{{- end }} +{{- end }} diff --git a/charts/core/templates/registry/service.yaml b/charts/core/templates/registry/service.yaml new file mode 100644 index 00000000..32a4b164 --- /dev/null +++ b/charts/core/templates/registry/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "core.registry" . }} + labels: + {{- include "core.labels" . | nindent 4 }} + app.kubernetes.io/component: registry + {{- with .Values.registry.serviceAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ports: + - name: {{ ternary "https" "http" .Values.internalTLS.enabled }} + port: {{ template "core.registry.port" . }} + targetPort: {{ template "core.registry.port" . }} + - name: metrics + port: {{ template "core.registry.metricsPort" . }} + targetPort: {{ template "core.registry.metricsPort" . }} + selector: + {{- include "core.matchLabels" . | nindent 4 }} + app.kubernetes.io/component: registry diff --git a/charts/core/values.yaml b/charts/core/values.yaml index f725dd94..e2df1a59 100644 --- a/charts/core/values.yaml +++ b/charts/core/values.yaml @@ -245,6 +245,13 @@ persistence: accessMode: ReadWriteOnce size: 128Mi annotations: {} + registry: + existingClaim: "" + storageClass: "" + subPath: "" + accessMode: ReadWriteOnce + size: 250Gi + annotations: {} # -- The usage collector configuration usage: usageidentifieruid: @@ -1080,7 +1087,7 @@ etcd: # -- The configuration of Redis redis: # -- If external Redis is used, set "enabled" to false - # and fill the connection informations in "external" section + # and fill the connection information in "external" section enabled: true # -- The image of redis image: @@ -1109,7 +1116,7 @@ redis: # -- The configuration of Elasticsearch elasticsearch: # -- If external Elasticsearch is used, set "enabled" to false - # and fill the connection informations in "external" section + # and fill the connection information in "external" section enabled: true clusterName: core-elasticsearch replicas: 1 @@ -1177,7 +1184,6 @@ jaeger: args: - > while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' http://core-elasticsearch-master:9200)" != "200" ]]; do echo waiting for elasticsearch; sleep 1; done - # -- The configuration of OpenTelemetry Collector opentelemetry-collector: mode: deployment @@ -1379,6 +1385,97 @@ openfga: spec: minAvailable: maxUnavailable: + # -- The configuration of registry +registry: + image: + repository: registry + tag: 2.8.3 + pullPolicy: IfNotPresent + configPath: /etc/docker/registry/config.yml + serviceAccountName: "" + automountServiceAccountToken: false + replicaCount: 1 + extraEnv: [] + extraVolumes: [] + extraVolumeMounts: [] + extraInitContainers: [] + sidecarContainers: {} + podAnnotations: {} + serviceAnnotations: {} + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + autoscaling: + enabled: false + minReplicas: + maxReplicas: + targetCPUUtilizationPercentage: + targetAverageMemoryUtilization: + podDisruptionBudget: + enabled: false + spec: + minAvailable: + maxUnavailable: + garbageCollect: + enabled: false + deleteUntagged: true + schedule: "@midnight" + config: + version: 0.1 + log: + level: Info + formatter: text + fields: + service: registry + environment: k8s:ce + storage: + filesystem: + rootdirectory: /var/lib/registry + maxthreads: 100 + delete: + enabled: true + redirect: + disable: false + cache: + blobdescriptor: redis + blobdescriptorsize: 10000 + maintenance: + uploadpurging: + enabled: true + age: 168h + interval: 24h + dryrun: false + readonly: + enabled: false + http: + addr: 0.0.0.0:5000 + prefix: / + secret: i-love-instill-ai + relativeurls: false + draintimeout: 60s + debug: + addr: 0.0.0.0:5001 + prometheus: + enabled: true + path: /metrics + headers: + X-Content-Type-Options: [nosniff] + http2: + disabled: false + h2c: + enabled: false + redis: + addr: core-redis:6379 + dialtimeout: 100ms + readtimeout: 100ms + writetimeout: 100ms + pool: + maxidle: 16 + maxactive: 128 + idletimeout: 300s + tls: + enabled: false tags: observability: true prometheusStack: false diff --git a/configs/registry/config.yaml b/configs/registry/config.yaml new file mode 100644 index 00000000..d00822ee --- /dev/null +++ b/configs/registry/config.yaml @@ -0,0 +1,73 @@ +version: 0.1 +log: + level: debug + formatter: text + fields: + service: registry + environment: local:ce +storage: + filesystem: + rootdirectory: /var/lib/registry + maxthreads: 100 + delete: + enabled: true + redirect: + disable: false + cache: + blobdescriptor: redis + blobdescriptorsize: 10000 + maintenance: + uploadpurging: + enabled: false + age: 168h + interval: 24h + dryrun: false + readonly: + enabled: false +http: + addr: 0.0.0.0:5000 + prefix: / + secret: asecretforlocaldevelopment + relativeurls: false + draintimeout: 60s + debug: + addr: 0.0.0.0:5001 + prometheus: + enabled: true + path: /metrics + headers: + X-Content-Type-Options: [nosniff] + http2: + disabled: false + h2c: + enabled: false +redis: + addr: redis:6379 + dialtimeout: 100ms + readtimeout: 100ms + writetimeout: 100ms + pool: + maxidle: 16 + maxactive: 128 + idletimeout: 300s + tls: + enabled: false +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 + http: + # Check Instill Artifact + # - uri: http://server.to.check/must/return/200 + # headers: + # Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] + # statuscode: 200 + # timeout: 3s + # interval: 10s + # threshold: 3 + tcp: + - addr: redis:6379 + timeout: 3s + interval: 30s + threshold: 3 diff --git a/configs/registry/registries.conf b/configs/registry/registries.conf new file mode 100644 index 00000000..7478fbd1 --- /dev/null +++ b/configs/registry/registries.conf @@ -0,0 +1,82 @@ +# For more information on this configuration file, see containers-registries.conf(5). +# +# NOTE: RISK OF USING UNQUALIFIED IMAGE NAMES +# We recommend always using fully qualified image names including the registry +# server (full dns name), namespace, image name, and tag +# (e.g., registry.redhat.io/ubi8/ubi:latest). Pulling by digest (i.e., +# quay.io/repository/name@digest) further eliminates the ambiguity of tags. +# When using short names, there is always an inherent risk that the image being +# pulled could be spoofed. For example, a user wants to pull an image named +# `foobar` from a registry and expects it to come from myregistry.com. If +# myregistry.com is not first in the search list, an attacker could place a +# different `foobar` image at a registry earlier in the search list. The user +# would accidentally pull and run the attacker's image and code rather than the +# intended content. We recommend only adding registries which are completely +# trusted (i.e., registries which don't allow unknown or anonymous users to +# create accounts with arbitrary names). This will prevent an image from being +# spoofed, squatted or otherwise made insecure. If it is necessary to use one +# of these registries, it should be added at the end of the list. +# +# # An array of host[:port] registries to try when pulling an unqualified image, in order. +unqualified-search-registries = ["registry:5000", "docker.io", "quay.io"] + +[[registry]] +location = "registry:5000" +insecure = true + +# +# [[registry]] +# # The "prefix" field is used to choose the relevant [[registry]] TOML table; +# # (only) the TOML table with the longest match for the input image name +# # (taking into account namespace/repo/tag/digest separators) is used. +# # +# # The prefix can also be of the form: *.example.com for wildcard subdomain +# # matching. +# # +# # If the prefix field is missing, it defaults to be the same as the "location" field. +# prefix = "example.com/foo" +# +# # If true, unencrypted HTTP as well as TLS connections with untrusted +# # certificates are allowed. +# insecure = false +# +# # If true, pulling images with matching names is forbidden. +# blocked = false +# +# # The physical location of the "prefix"-rooted namespace. +# # +# # By default, this is equal to "prefix" (in which case "prefix" can be omitted +# # and the [[registry]] TOML table can only specify "location"). +# # +# # Example: Given +# # prefix = "example.com/foo" +# # location = "internal-registry-for-example.net/bar" +# # requests for the image example.com/foo/myimage:latest will actually work with the +# # internal-registry-for-example.net/bar/myimage:latest image. +# +# # The location can be empty iff prefix is in a +# # wildcarded format: "*.example.com". In this case, the input reference will +# # be used as-is without any rewrite. +# location = internal-registry-for-example.com/bar" +# +# # (Possibly-partial) mirrors for the "prefix"-rooted namespace. +# # +# # The mirrors are attempted in the specified order; the first one that can be +# # contacted and contains the image will be used (and if none of the mirrors contains the image, +# # the primary location specified by the "registry.location" field, or using the unmodified +# # user-specified reference, is tried last). +# # +# # Each TOML table in the "mirror" array can contain the following fields, with the same semantics +# # as if specified in the [[registry]] TOML table directly: +# # - location +# # - insecure +# [[registry.mirror]] +# location = "example-mirror-0.local/mirror-for-foo" +# [[registry.mirror]] +# location = "example-mirror-1.local/mirrors/foo" +# insecure = true +# # Given the above, a pull of example.com/foo/image:latest will try: +# # 1. example-mirror-0.local/mirror-for-foo/image:latest +# # 2. example-mirror-1.local/mirrors/foo/image:latest +# # 3. internal-registry-for-example.net/bar/image:latest +# # in order, and use the first one that exists. \ No newline at end of file diff --git a/docker-compose-latest.yml b/docker-compose-latest.yml index a7f42a0b..1b24caa8 100644 --- a/docker-compose-latest.yml +++ b/docker-compose-latest.yml @@ -167,3 +167,7 @@ services: etcd: ports: - ${ETCD_CLIENT_PORT}:${ETCD_CLIENT_PORT} + + registry: + ports: + - ${REGISTRY_PORT}:5000 diff --git a/docker-compose.yml b/docker-compose.yml index f09693a6..c647f171 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -411,6 +411,10 @@ services: ray_server: container_name: ${RAY_SERVER_HOST} image: ${RAY_SERVER_IMAGE}:${RAY_RELEASE_TAG} + # for dind podman + privileged: true + devices: + - "/dev/fuse" restart: unless-stopped environment: - RAY_ADDRESS=0.0.0.0:6379 @@ -418,14 +422,20 @@ services: - RAY_GRAFANA_HOST=http://${GRAFANA_HOST}:${GRAFANA_PORT} - RAY_PROMETHEUS_HOST=http://${PROMETHEUS_HOST}:${PROMETHEUS_PORT} - RAY_GRAFANA_IFRAME_HOST=http://localhost:${GRAFANA_PORT} + - RAY_worker_register_timeout_seconds=360 entrypoint: ["/bin/bash", "-c"] command: | - 'ray start --head --node-ip-address=0.0.0.0 --dashboard-host=0.0.0.0 --metrics-export-port ${RAY_SERVER_PROMETHEUS_PORT} --disable-usage-stats && + 'if [[ $${NVIDIA_VISIBLE_DEVICES} == all ]]; then + sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml; + fi && + ray start --head --node-ip-address=0.0.0.0 --dashboard-host=0.0.0.0 --metrics-export-port ${RAY_SERVER_PROMETHEUS_PORT} --disable-usage-stats && serve start --http-host=0.0.0.0 --grpc-port ${RAY_SERVER_SERVE_GRPC_PORT} --grpc-servicer-functions ray_pb2_grpc.add_RayServiceServicer_to_server && tail -f /dev/null' volumes: - model_repository:/model-repository - ray_conda:/home/ray/anaconda3/ + - /var/lib/containers:/var/lib/containers + - ${CONFIG_DIR_PATH}/registry/registries.conf:/etc/containers/registries.conf healthcheck: test: ["CMD", "ray", "status"] start_period: 20s @@ -576,3 +586,13 @@ services: - type: bind source: /var/run/docker.sock target: /var/run/docker.sock + + registry: + container_name: ${REGISTRY_HOST} + image: ${REGISTRY_IMAGE}:${REGISTRY_VERSION} + restart: unless-stopped + volumes: + - ${CONFIG_DIR_PATH}/registry/config.yaml:/etc/docker/registry/config.yml + depends_on: + redis: + condition: service_healthy