|
1 | 1 | #!/bin/bash |
2 | 2 |
|
3 | | -RIG_HELM_RELEASE=rig-dependencies |
4 | | -DATETIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") |
5 | | -SUPPORTED_REGIONS=( |
6 | | - "us-east-1" |
7 | | - "eu-north-1" |
8 | | -) |
9 | | - |
10 | | - |
11 | | -SRC_DIR="HyperPodHelmChart" |
12 | | -OUTPUT_DIR="HyperPodHelmChartForRIG" |
13 | | -STANDARD_HELM_RELEASE_NAME=dependencies |
14 | | -TRAINING_OPERATORS=$STANDARD_HELM_RELEASE_NAME-training-operators # dependencies- prefix from standard Helm installation |
15 | | -EFA=$STANDARD_HELM_RELEASE_NAME-aws-efa-k8s-device-plugin # dependencies- prefix from standard Helm installation |
16 | | -PATCH_ONLY=( |
| 3 | +set_script_variables() { |
| 4 | + # |
| 5 | + # Some of this logic will be migrated into the standard Helm chart (e.g. patches) |
| 6 | + # For now, we will define what is needed here based on Helm Release Name lookup |
17 | 7 | # |
18 | | - # These objects do not need entirely separate YAML; we just need to patch them to make them work with RIG |
19 | | - # |
20 | | - "$TRAINING_OPERATORS" |
21 | | - "$EFA" |
22 | | -) |
23 | | - |
24 | | -# Format: "<eks|hyperpod>,namespace,<k8s_name|chart_dir>" |
25 | | -add_ons=( |
26 | | - "eks,kube-system,aws-node,daemonset" |
27 | | - "eks,kube-system,coredns,deployment" |
28 | | - #"hp,kube-system,mpi-operator,deployment" |
29 | | - #"hp,kube-system,neuron-device-plugin,daemonset" |
30 | | - "hp,kubeflow,$TRAINING_OPERATORS,deployment" |
31 | | - "hp,kube-system,$EFA,daemonset" |
32 | | -) |
| 8 | + RIG_HELM_RELEASE=rig-dependencies |
| 9 | + DATETIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") |
| 10 | + SUPPORTED_REGIONS=( |
| 11 | + "us-east-1" |
| 12 | + "eu-north-1" |
| 13 | + ) |
| 14 | + |
| 15 | + SRC_DIR="HyperPodHelmChart" |
| 16 | + OUTPUT_DIR="HyperPodHelmChartForRIG" |
| 17 | + |
| 18 | + STANDARD_HELM_RELEASE_NAME=$(get_standard_hyperpod_helm_release_name) |
| 19 | + TRAINING_OPERATORS=$STANDARD_HELM_RELEASE_NAME-training-operators |
| 20 | + EFA=$STANDARD_HELM_RELEASE_NAME-aws-efa-k8s-device-plugin |
| 21 | + PATCH_ONLY=( |
| 22 | + # |
| 23 | + # These objects do not need entirely separate YAML; we just need to patch them to make them work with RIG |
| 24 | + # |
| 25 | + "$TRAINING_OPERATORS" |
| 26 | + "$EFA" |
| 27 | + ) |
| 28 | + add_ons=( |
| 29 | + # |
| 30 | + # Format: "<eks|hyperpod>,namespace,<k8s_name|chart_dir>,type" |
| 31 | + # |
| 32 | + "eks,kube-system,aws-node,daemonset" |
| 33 | + "eks,kube-system,coredns,deployment" |
| 34 | + #"hp,kube-system,mpi-operator,deployment" |
| 35 | + #"hp,kube-system,neuron-device-plugin,daemonset" |
| 36 | + "hp,kubeflow,$TRAINING_OPERATORS,deployment" |
| 37 | + "hp,kube-system,$EFA,daemonset" |
| 38 | + ) |
| 39 | +} |
33 | 40 |
|
34 | 41 | generate_helm_chart_root() { |
35 | 42 | local outdir=$1 |
@@ -402,7 +409,6 @@ assert_addons_enabled() { |
402 | 409 | response=$(kubectl get $kind $name -n $namespace --no-headers 2>&1) |
403 | 410 | if [[ "$response" == *"Error from server (NotFound)"* ]] || [ -z "$response" ]; then |
404 | 411 | echo "Namespace $namespace does not exist or No $kind $name found in namespace $namespace. Please ensure CNI, CoreDNS add-ons enabled, and that standard HyperPod Helm chart is installed for this cluster before installing RIG dependencies." |
405 | | - echo "⚠️ Note: RIG installation depends on the Helm release name. If the standard Helm installation command/release name changed (i.e. no longer 'helm install dependencies...' where 'dependencies' is the release name), then please update the RELEASE_NAME in install_rig_dependencies.sh before running" |
406 | 412 | exit 1 |
407 | 413 | fi |
408 | 414 | done |
@@ -552,14 +558,29 @@ assert_not_already_installed() { |
552 | 558 | fi |
553 | 559 | } |
554 | 560 |
|
| 561 | +get_standard_hyperpod_helm_release_name() { |
| 562 | + release_name=$(kubectl get namespace aws-hyperpod -o yaml | yq '.metadata.annotations."meta.helm.sh/release-name"') |
| 563 | + |
| 564 | + if [ -z "$release_name" ]; then |
| 565 | + echo "Error: Namespace 'aws-hyperpod' does not exist. Please be sure to install the HyperPod standard Helm chart (https://github.com/aws/sagemaker-hyperpod-cli/tree/main/helm_chart#step-three)" >&2 |
| 566 | + exit 1 |
| 567 | + else |
| 568 | + echo "Found Namespace 'aws-hyperpod' installed with Helm release name: $release_name" >&2 |
| 569 | + echo "$release_name" |
| 570 | + return 0 |
| 571 | + fi |
| 572 | +} |
| 573 | + |
555 | 574 | main() { |
556 | 575 | assert_not_already_installed |
557 | 576 |
|
558 | 577 | ensure_yq_installed |
| 578 | + |
| 579 | + set_script_variables |
559 | 580 |
|
560 | 581 | assert_supported_region |
561 | 582 | assert_addons_enabled add_ons[@] |
562 | | - |
| 583 | + |
563 | 584 | set -e |
564 | 585 | fetch_yaml_and_enable_overrides add_ons[@] |
565 | 586 |
|
|
0 commit comments