Skip to content

Commit

Permalink
#1839: introduce script to Helm chart updating pod-deletion-cost
Browse files Browse the repository at this point in the history
* script is executed as "pre-upgrade" hook and as cron-job
* script updates the pod-deletion-cost for the "oldest" members in the Ditto cluster

Signed-off-by: Thomas Jäckle <thomas.jaeckle@beyonnex.io>
  • Loading branch information
thjaeckle committed Jan 25, 2024
1 parent f2904ef commit 92036c2
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 4 deletions.
2 changes: 1 addition & 1 deletion deployment/helm/ditto/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ description: |
A digital twin is a virtual, cloud based, representation of his real world counterpart
(real world “Things”, e.g. devices like sensors, smart heating, connected cars, smart grids, EV charging stations etc).
type: application
version: 3.4.4 # chart version is effectively set by release-job
version: 3.4.6 # chart version is effectively set by release-job
appVersion: 3.4.4
keywords:
- iot-chart
Expand Down
92 changes: 92 additions & 0 deletions deployment/helm/ditto/scripts/patch-pods-deletion-cost.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash
# Copyright (c) 2024 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# http://www.eclipse.org/legal/epl-2.0
#
# SPDX-License-Identifier: EPL-2.0

SERVICEACCOUNT=/var/run/secrets/kubernetes.io/serviceaccount
TOKEN=$(cat ${SERVICEACCOUNT}/token)
CACERT=${SERVICEACCOUNT}/ca.crt
NAMESPACE=$(cat ${SERVICEACCOUNT}/namespace)
WORKING_FOLDER="/tmp/pod-deletion-script-$(date +%Y-%m-%d-%H-%M-%S)"

mkdir -p $WORKING_FOLDER
cd $WORKING_FOLDER

echo "Retrieving current pods, ips and their deletion cost in working dir: $WORKING_FOLDER ..."
# access k8s pods information and extract "pod name", "internal pod IP" and "pod deletion cost" into file `pod_ip_cost.json`:
curl --fail --silent --cacert ${CACERT} -H "Authorization: Bearer ${TOKEN}" \
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1/namespaces/${NAMESPACE}/pods" \
| jq '.items | map(select(.metadata.labels.actorSystemName == "ditto-cluster") | { pod: .metadata.name, ip: .status.podIP, cost: .metadata.annotations."controller.kubernetes.io/pod-deletion-cost"})' \
> pod_ip_cost.json
if [ $? -ne 0 ]; then
echo "Retrieving current pods curl failed [exit-code: $?]"
exit 1
fi

# extract the first internal IP from `pod_ip_cost.json` in order to lookup Apache Pekko cluster membership data:
somePekkoClusterIp=$(jq -r '.[0].ip' pod_ip_cost.json)
echo "Accessing current Pekko Cluster members from internal ip: $somePekkoClusterIp ..."
curl --fail --silent -o pekko_cluster_members.json http://$somePekkoClusterIp:7626/cluster/members
if [ $? -ne 0 ]; then
echo "Accessing current Pekko Cluster members curl failed [exit-code: $?]"
exit 1
fi

echo "Finding out all oldest ..."
# find out all "oldest" pods (per role and the "overall oldest" - specifying the "cost" for deletion in here as well):
jq '.oldestPerRole | to_entries | map(.value | split("@") | last | split(":") | first | { ip: . }) | group_by(.ip) | map(.[]+{"cost":length}) | unique_by(.ip) | .[].cost *= 100' pekko_cluster_members.json \
> ip_to_new_cost.json

echo "Merging pods and their internal ip addresses with pod deletion cost ..."
# merge pods and their internal IP addresses together with the calculated pod deletion cost:
jq 'INDEX(.ip)' ip_to_new_cost.json > ip_to_new_cost_by_ip.json
jq 'map(del(.cost)) | INDEX(.ip)' pod_ip_cost.json > pods_by_ip.json
jq -s '.[0] * .[1] | to_entries | map(select(.value.cost != null).value)' pods_by_ip.json ip_to_new_cost_by_ip.json \
> new_cost_pod_and_ip.json

# clear remaining ones - which had a cost from "last run", but now don't any longer
jq -r '.[] | select(.cost != null) | .pod' pod_ip_cost.json > pods_with_old_cost.txt
while read pod; do
grep -R $pod new_cost_pod_and_ip.json
if [ $? -eq 0 ]; then
#pod is in file at least once
echo "Not clearing pod-deletion-cost of pod: $pod"
else
#pod is not in file
echo "Clearing pod-deletion-cost of pod: $pod"
curl -X PATCH --silent --output /dev/null --show-error --fail --cacert ${CACERT} -H "Authorization: Bearer ${TOKEN}" -H 'Content-Type: application/merge-patch+json' \
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1/namespaces/${NAMESPACE}/pods/${pod}" \
--data '{"metadata": {"annotations": {"controller.kubernetes.io/pod-deletion-cost": null }}}'
if [ $? -ne 0 ]; then
echo "Clearing pod-deletion-cost curl failed [exit-code: $?]"
exit 1
fi
fi
done <pods_with_old_cost.txt

echo "Starting to patch pods with updated pod deletion costs ..."
jq -r '.[] | [.pod, .ip, .cost] | @tsv' new_cost_pod_and_ip.json |
while IFS=$'\t' read -r pod ip cost; do
echo "Patching pod-deletion-cost of pod: $pod to: $cost"
curl -X PATCH --silent --output /dev/null --show-error --fail --cacert ${CACERT} -H "Authorization: Bearer ${TOKEN}" -H 'Content-Type: application/merge-patch+json' \
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1/namespaces/${NAMESPACE}/pods/${pod}" \
--data '{"metadata": {"annotations": {"controller.kubernetes.io/pod-deletion-cost": '\""$cost"\"' }}}'
if [ $? -ne 0 ]; then
echo "Patching pod-deletion-cost curl failed [exit-code: $?]"
exit 1
fi
done

echo "Pod deletion costs after script finished:"
curl --fail --cacert ${CACERT} -H "Authorization: Bearer ${TOKEN}" \
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1/namespaces/${NAMESPACE}/pods" \
| jq '.items | map(select(.metadata.labels.actorSystemName == "ditto-cluster") | { pod: .metadata.name, ip: .status.podIP, cost: .metadata.annotations."controller.kubernetes.io/pod-deletion-cost"})'

echo "DONE"
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) 2024 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# http://www.eclipse.org/legal/epl-2.0
#
# SPDX-License-Identifier: EPL-2.0
{{- if and .Values.global.podDeletionCostPatching.enabled -}}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ include "ditto.fullname" . }}-pod-deletion-cron-job
labels:
{{ include "ditto.labels" . | indent 4 }}
spec:
schedule: "*/5 * * * *" # every 5 minutes
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
metadata:
name: {{ include "ditto.fullname" . }}-pod-deletion-cron-job
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
spec:
{{- if .Values.rbac.enabled }}
serviceAccountName: {{ template "ditto.serviceAccountName" . }}
{{- end }}
restartPolicy: Never
containers:
- name: {{ .Chart.Name }}-pod-deletion-cost-cronjob
image: "public.ecr.aws/h0h9t7p1/alpine-bash-curl-jq:latest"
command:
- /bin/sh
- -c
- |
cd /opt/ditto-scripts
./patch-pods-deletion-cost.sh
volumeMounts:
- name: hook-scripts
mountPath: "/opt/ditto-scripts"
volumes:
- name: hook-scripts
configMap:
name: {{ include "ditto.fullname" . }}-hook-scripts
defaultMode: 0555
{{- end }}
56 changes: 56 additions & 0 deletions deployment/helm/ditto/templates/hooks/pre-upgrade-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2024 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# http://www.eclipse.org/legal/epl-2.0
#
# SPDX-License-Identifier: EPL-2.0
{{- if and .Values.global.podDeletionCostPatching.enabled -}}
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "ditto.fullname" . }}-pre-update
labels:
{{ include "ditto.labels" . | indent 4 }}
annotations:
# This is what defines this resource as a hook. Without this line, the
# job is considered part of the release.
helm.sh/hook: pre-upgrade
helm.sh/hook-weight: "-5"
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
spec:
backoffLimit: 3
template:
metadata:
name: {{ include "ditto.fullname" . }}-pre-upgrade
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
spec:
{{- if .Values.rbac.enabled }}
serviceAccountName: {{ template "ditto.serviceAccountName" . }}
{{- end }}
restartPolicy: Never
containers:
- name: {{ .Chart.Name }}-pod-deletion-cost-pre-upgrade-hook
image: "public.ecr.aws/h0h9t7p1/alpine-bash-curl-jq:latest"
command:
- /bin/sh
- -c
- |
cd /opt/ditto-scripts
./patch-pods-deletion-cost.sh
volumeMounts:
- name: hook-scripts
mountPath: "/opt/ditto-scripts"
volumes:
- name: hook-scripts
configMap:
name: {{ include "ditto.fullname" . }}-hook-scripts
defaultMode: 0555
{{- end }}
23 changes: 23 additions & 0 deletions deployment/helm/ditto/templates/hooks/scripts-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) 2024 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# http://www.eclipse.org/legal/epl-2.0
#
# SPDX-License-Identifier: EPL-2.0
{{- if and .Values.global.podDeletionCostPatching.enabled -}}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "ditto.fullname" . }}-hook-scripts
labels:
app.kubernetes.io/name: {{ include "ditto.name" . }}-hook-scripts
{{ include "ditto.labels" . | indent 4 }}
data:
patch-pods-deletion-cost.sh: |
{{- tpl (.Files.Get "scripts/patch-pods-deletion-cost.sh") . | nindent 4 }}
{{- end }}
12 changes: 9 additions & 3 deletions deployment/helm/ditto/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ metadata:
app.kubernetes.io/name: {{ include "ditto.name" . }}
{{ include "ditto.labels" . | indent 4 }}
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups:
- "" # "" indicates the core API group
resources:
- "pods"
verbs:
- "get"
- "watch"
- "list"
- "patch" # we require "patch" to annotate the pods with pod-deletion-cost
{{- end -}}
5 changes: 5 additions & 0 deletions deployment/helm/ditto/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ global:
adaptiveSampler:
# throughput the throughput goal trying to achieve with the adaptive sampler
throughput: 600
# podDeletionCostPatching hold configuration for whether to patch "oldest" members of the Ditto cluster with higher
# k8s pod-deletion-cost annotations so that they are downed last when e.g. doing an upgrade
podDeletionCostPatching:
# enabled whether the pod-deletion-cost annotation patching should be enabled
enabled: true

## ----------------------------------------------------------------------------
## dbconfig for mongodb connections
Expand Down

0 comments on commit 92036c2

Please sign in to comment.