Skip to content

Commit c85d399

Browse files
authored
Lookup standard Helm release name for RIG Helm installation (1ff9c) (#104)
1 parent 70576ef commit c85d399

File tree

2 files changed

+52
-33
lines changed

2 files changed

+52
-33
lines changed

helm_chart/install_rig_dependencies.sh

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,42 @@
11
#!/bin/bash
22

3-
RIG_HELM_RELEASE=rig-dependencies
4-
DATETIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
5-
SUPPORTED_REGIONS=(
6-
"us-east-1"
7-
"eu-north-1"
8-
)
9-
10-
11-
SRC_DIR="HyperPodHelmChart"
12-
OUTPUT_DIR="HyperPodHelmChartForRIG"
13-
STANDARD_HELM_RELEASE_NAME=dependencies
14-
TRAINING_OPERATORS=$STANDARD_HELM_RELEASE_NAME-training-operators # dependencies- prefix from standard Helm installation
15-
EFA=$STANDARD_HELM_RELEASE_NAME-aws-efa-k8s-device-plugin # dependencies- prefix from standard Helm installation
16-
PATCH_ONLY=(
3+
set_script_variables() {
4+
#
5+
# Some of this logic will be migrated into the standard Helm chart (e.g. patches)
6+
# For now, we will define what is needed here based on Helm Release Name lookup
177
#
18-
# These objects do not need entirely separate YAML; we just need to patch them to make them work with RIG
19-
#
20-
"$TRAINING_OPERATORS"
21-
"$EFA"
22-
)
23-
24-
# Format: "<eks|hyperpod>,namespace,<k8s_name|chart_dir>"
25-
add_ons=(
26-
"eks,kube-system,aws-node,daemonset"
27-
"eks,kube-system,coredns,deployment"
28-
#"hp,kube-system,mpi-operator,deployment"
29-
#"hp,kube-system,neuron-device-plugin,daemonset"
30-
"hp,kubeflow,$TRAINING_OPERATORS,deployment"
31-
"hp,kube-system,$EFA,daemonset"
32-
)
8+
RIG_HELM_RELEASE=rig-dependencies
9+
DATETIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
10+
SUPPORTED_REGIONS=(
11+
"us-east-1"
12+
"eu-north-1"
13+
)
14+
15+
SRC_DIR="HyperPodHelmChart"
16+
OUTPUT_DIR="HyperPodHelmChartForRIG"
17+
18+
STANDARD_HELM_RELEASE_NAME=$(get_standard_hyperpod_helm_release_name)
19+
TRAINING_OPERATORS=$STANDARD_HELM_RELEASE_NAME-training-operators
20+
EFA=$STANDARD_HELM_RELEASE_NAME-aws-efa-k8s-device-plugin
21+
PATCH_ONLY=(
22+
#
23+
# These objects do not need entirely separate YAML; we just need to patch them to make them work with RIG
24+
#
25+
"$TRAINING_OPERATORS"
26+
"$EFA"
27+
)
28+
add_ons=(
29+
#
30+
# Format: "<eks|hyperpod>,namespace,<k8s_name|chart_dir>,type"
31+
#
32+
"eks,kube-system,aws-node,daemonset"
33+
"eks,kube-system,coredns,deployment"
34+
#"hp,kube-system,mpi-operator,deployment"
35+
#"hp,kube-system,neuron-device-plugin,daemonset"
36+
"hp,kubeflow,$TRAINING_OPERATORS,deployment"
37+
"hp,kube-system,$EFA,daemonset"
38+
)
39+
}
3340

3441
generate_helm_chart_root() {
3542
local outdir=$1
@@ -402,7 +409,6 @@ assert_addons_enabled() {
402409
response=$(kubectl get $kind $name -n $namespace --no-headers 2>&1)
403410
if [[ "$response" == *"Error from server (NotFound)"* ]] || [ -z "$response" ]; then
404411
echo "Namespace $namespace does not exist or No $kind $name found in namespace $namespace. Please ensure CNI, CoreDNS add-ons enabled, and that standard HyperPod Helm chart is installed for this cluster before installing RIG dependencies."
405-
echo "⚠️ Note: RIG installation depends on the Helm release name. If the standard Helm installation command/release name changed (i.e. no longer 'helm install dependencies...' where 'dependencies' is the release name), then please update the RELEASE_NAME in install_rig_dependencies.sh before running"
406412
exit 1
407413
fi
408414
done
@@ -552,14 +558,29 @@ assert_not_already_installed() {
552558
fi
553559
}
554560

561+
get_standard_hyperpod_helm_release_name() {
562+
release_name=$(kubectl get namespace aws-hyperpod -o yaml | yq '.metadata.annotations."meta.helm.sh/release-name"')
563+
564+
if [ -z "$release_name" ]; then
565+
echo "Error: Namespace 'aws-hyperpod' does not exist. Please be sure to install the HyperPod standard Helm chart (https://github.com/aws/sagemaker-hyperpod-cli/tree/main/helm_chart#step-three)" >&2
566+
exit 1
567+
else
568+
echo "Found Namespace 'aws-hyperpod' installed with Helm release name: $release_name" >&2
569+
echo "$release_name"
570+
return 0
571+
fi
572+
}
573+
555574
main() {
556575
assert_not_already_installed
557576

558577
ensure_yq_installed
578+
579+
set_script_variables
559580

560581
assert_supported_region
561582
assert_addons_enabled add_ons[@]
562-
583+
563584
set -e
564585
fetch_yaml_and_enable_overrides add_ons[@]
565586

helm_chart/readme.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ Notes:
9999
100100
If needed, please run `chmod +700 ./install_rig_dependencies.sh` to allow the script to execute.
101101
102-
⚠️ Note: RIG installation depends on the Helm release name. If the above commands were modified for standard Helm installation (i.e. no longer 'helm install dependencies...' where 'dependencies' is the release name), then please update the STANDARD_HELM_RELEASE_NAME in install_rig_dependencies.sh before running
103-
104102
⚠️ Note: This will require the yq utility with version >= 4 (e.g. https://github.com/mikefarah/yq/releases/tag/v4)
105103
106104
⚠️ Note: aws-node (AWS VPC CNI) is a critical add-on for general pod use.

0 commit comments

Comments
 (0)