From 7dff24ab9ec9f32f59ebf742241c7a75f5fa8a93 Mon Sep 17 00:00:00 2001 From: vishal Date: Fri, 23 Jul 2021 10:50:48 -0400 Subject: [PATCH 1/4] Refresh cluster config on startup and remove unnecessary cluster dependency --- cmd/activator/main.go | 26 +++++++++----------------- cmd/autoscaler/main.go | 24 ++++++++---------------- manager/install.sh | 11 +++++++++++ manager/manifests/activator.yaml.j2 | 10 ---------- manager/manifests/autoscaler.yaml.j2 | 10 ---------- 5 files changed, 28 insertions(+), 53 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index 7396bb890e..2dbe794c6b 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -32,7 +32,6 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/k8s" "github.com/cortexlabs/cortex/pkg/lib/logging" "github.com/cortexlabs/cortex/pkg/lib/telemetry" - "github.com/cortexlabs/cortex/pkg/types/clusterconfig" "github.com/cortexlabs/cortex/pkg/types/userconfig" "go.uber.org/zap" istioinformers "istio.io/client-go/pkg/informers/externalversions" @@ -43,12 +42,11 @@ import ( func main() { var ( - port int - adminPort int - inCluster bool - autoscalerURL string - namespace string - clusterConfigPath string + port int + adminPort int + inCluster bool + autoscalerURL string + namespace string ) flag.IntVar(&port, "port", 8000, "port where the activator server will be exposed") @@ -59,7 +57,6 @@ func main() { "kubernetes namespace where the cortex APIs are deployed "+ "(can be set through the CORTEX_NAMESPACE env variable)", ) - flag.StringVar(&clusterConfigPath, "cluster-config", "", "cluster config path") flag.Parse() log := logging.GetLogger() @@ -72,16 +69,9 @@ func main() { log.Fatal("--autoscaler-url is a required option") case namespace == "": log.Fatal("--namespace is a required option") - case clusterConfigPath == "": - log.Fatal("--cluster-config flag is required") } - clusterConfig, err := clusterconfig.NewForFile(clusterConfigPath) - if err != nil { - exit(log, err) - } - - awsClient, err := aws.NewForRegion(clusterConfig.Region) + awsClient, err := aws.New() if err != nil { exit(log, err) } @@ -91,8 +81,10 @@ func main() { exit(log, err) } + telemetryEnabled := os.Getenv("CORTEX_TELEMETRY_DISABLE") != "false" + err = telemetry.Init(telemetry.Config{ - Enabled: clusterConfig.Telemetry, + Enabled: telemetryEnabled, UserID: userID, Properties: map[string]string{ "kind": userconfig.RealtimeAPIKind.String(), diff --git a/cmd/autoscaler/main.go b/cmd/autoscaler/main.go index e0aceeac79..2d85e4cead 100644 --- a/cmd/autoscaler/main.go +++ b/cmd/autoscaler/main.go @@ -32,7 +32,6 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/k8s" "github.com/cortexlabs/cortex/pkg/lib/logging" "github.com/cortexlabs/cortex/pkg/lib/telemetry" - "github.com/cortexlabs/cortex/pkg/types/clusterconfig" "github.com/cortexlabs/cortex/pkg/types/userconfig" "github.com/gorilla/mux" promapi "github.com/prometheus/client_golang/api" @@ -49,11 +48,10 @@ import ( func main() { var ( - port int - inCluster bool - prometheusURL string - namespace string - clusterConfigPath string + port int + inCluster bool + prometheusURL string + namespace string ) flag.IntVar(&port, "port", 8000, "port where the autoscaler server will be exposed") @@ -65,7 +63,6 @@ func main() { "kubernetes namespace where the cortex APIs are deployed "+ "(can be set through the CORTEX_NAMESPACE env variable)", ) - flag.StringVar(&clusterConfigPath, "cluster-config", "", "cluster config path") flag.Parse() log := logging.GetLogger() @@ -78,16 +75,9 @@ func main() { log.Fatal("--prometheus-url is a required option") case namespace == "": log.Fatal("--namespace is a required option") - case clusterConfigPath == "": - log.Fatal("--cluster-config flag is required") } - clusterConfig, err := clusterconfig.NewForFile(clusterConfigPath) - if err != nil { - exit(log, err) - } - - awsClient, err := aws.NewForRegion(clusterConfig.Region) + awsClient, err := aws.New() if err != nil { exit(log, err) } @@ -97,8 +87,10 @@ func main() { exit(log, err) } + telemetryEnabled := os.Getenv("CORTEX_TELEMETRY_DISABLE") != "false" + err = telemetry.Init(telemetry.Config{ - Enabled: clusterConfig.Telemetry, + Enabled: telemetryEnabled, UserID: userID, Properties: map[string]string{ "kind": userconfig.RealtimeAPIKind.String(), diff --git a/manager/install.sh b/manager/install.sh index 64374a2596..0601f6d130 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -103,6 +103,8 @@ function cluster_configure() { python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/cluster-autoscaler.yaml.j2 | kubectl apply -f - >/dev/null echo "✓" + restart_controller_manager + restart_operator validate_cortex @@ -276,6 +278,15 @@ function start_controller_manager() { echo "✓" } +function restart_controller_manager() { + echo -n "○ restarting controller manager " + + kubectl scale deployment operator-controller-manager --replicas=0 - >/dev/null + kubectl scale deployment operator-controller-manager --replicas=1 - >/dev/null + + echo "✓" +} + function resize_nodegroups() { if [ -z "$CORTEX_NODEGROUP_NAMES_TO_SCALE" ]; then return diff --git a/manager/manifests/activator.yaml.j2 b/manager/manifests/activator.yaml.j2 index eeea185b0c..147bfb507b 100644 --- a/manager/manifests/activator.yaml.j2 +++ b/manager/manifests/activator.yaml.j2 @@ -81,7 +81,6 @@ spec: - "--port=8000" - "--autoscaler-url=http://autoscaler.default:8000" - "--namespace=default" - - "--cluster-config=/configs/cluster/cluster.yaml" ports: - name: http containerPort: 8000 @@ -111,15 +110,6 @@ spec: envFrom: - configMapRef: name: env-vars - volumeMounts: - - mountPath: /configs/cluster/cluster.yaml - name: cluster-config - subPath: cluster.yaml - volumes: - - configMap: - defaultMode: 420 - name: cluster-config - name: cluster-config --- apiVersion: v1 diff --git a/manager/manifests/autoscaler.yaml.j2 b/manager/manifests/autoscaler.yaml.j2 index 842552f31a..768929623a 100644 --- a/manager/manifests/autoscaler.yaml.j2 +++ b/manager/manifests/autoscaler.yaml.j2 @@ -102,16 +102,6 @@ spec: envFrom: - configMapRef: name: env-vars - volumeMounts: - - mountPath: /configs/cluster/cluster.yaml - name: cluster-config - subPath: cluster.yaml - volumes: - - configMap: - defaultMode: 420 - name: cluster-config - name: cluster-config - --- apiVersion: v1 From 830f1bade649ac299902bb0fe513fe23a8514b54 Mon Sep 17 00:00:00 2001 From: vishal Date: Fri, 23 Jul 2021 11:38:45 -0400 Subject: [PATCH 2/4] Fix conditions --- cmd/activator/main.go | 3 ++- cmd/autoscaler/main.go | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index 2dbe794c6b..f59f4e446f 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -23,6 +23,7 @@ import ( "os" "os/signal" "strconv" + "strings" "time" "github.com/cortexlabs/cortex/pkg/activator" @@ -81,7 +82,7 @@ func main() { exit(log, err) } - telemetryEnabled := os.Getenv("CORTEX_TELEMETRY_DISABLE") != "false" + telemetryEnabled := strings.ToLower(os.Getenv("CORTEX_TELEMETRY_DISABLE")) != "true" err = telemetry.Init(telemetry.Config{ Enabled: telemetryEnabled, diff --git a/cmd/autoscaler/main.go b/cmd/autoscaler/main.go index 2d85e4cead..71e8bd034e 100644 --- a/cmd/autoscaler/main.go +++ b/cmd/autoscaler/main.go @@ -24,6 +24,7 @@ import ( "os" "os/signal" "strconv" + "strings" "time" "github.com/cortexlabs/cortex/pkg/autoscaler" @@ -87,7 +88,7 @@ func main() { exit(log, err) } - telemetryEnabled := os.Getenv("CORTEX_TELEMETRY_DISABLE") != "false" + telemetryEnabled := strings.ToLower(os.Getenv("CORTEX_TELEMETRY_DISABLE")) != "true" err = telemetry.Init(telemetry.Config{ Enabled: telemetryEnabled, From 45b1328ee5094a1721160d68712ac889aafa78ec Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 26 Jul 2021 09:47:23 -0400 Subject: [PATCH 3/4] Push fixes --- manager/install.sh | 4 ++-- manager/manifests/autoscaler.yaml.j2 | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/manager/install.sh b/manager/install.sh index 0601f6d130..64e7ece0c2 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -281,8 +281,8 @@ function start_controller_manager() { function restart_controller_manager() { echo -n "○ restarting controller manager " - kubectl scale deployment operator-controller-manager --replicas=0 - >/dev/null - kubectl scale deployment operator-controller-manager --replicas=1 - >/dev/null + kubectl scale deployment operator-controller-manager --replicas=0 >/dev/null + kubectl scale deployment operator-controller-manager --replicas=1 >/dev/null echo "✓" } diff --git a/manager/manifests/autoscaler.yaml.j2 b/manager/manifests/autoscaler.yaml.j2 index 768929623a..ce875b24c3 100644 --- a/manager/manifests/autoscaler.yaml.j2 +++ b/manager/manifests/autoscaler.yaml.j2 @@ -84,7 +84,6 @@ spec: - "--port=8000" - "--prometheus-url=http://prometheus.prometheus:9090" - "--namespace=default" - - "--cluster-config=/configs/cluster/cluster.yaml" ports: - containerPort: 8000 livenessProbe: From c4aeb5d479359c7bce2971ff30d3267c6bd8d256 Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 26 Jul 2021 14:10:57 -0400 Subject: [PATCH 4/4] In place update --- manager/install.sh | 3 +-- pkg/crds/config/manager/manager.yaml | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/manager/install.sh b/manager/install.sh index 64e7ece0c2..0f1ce822fc 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -281,8 +281,7 @@ function start_controller_manager() { function restart_controller_manager() { echo -n "○ restarting controller manager " - kubectl scale deployment operator-controller-manager --replicas=0 >/dev/null - kubectl scale deployment operator-controller-manager --replicas=1 >/dev/null + kubectl rollout restart deployments/operator-controller-manager >/dev/null echo "✓" } diff --git a/pkg/crds/config/manager/manager.yaml b/pkg/crds/config/manager/manager.yaml index 28520fd891..9134f8bd68 100644 --- a/pkg/crds/config/manager/manager.yaml +++ b/pkg/crds/config/manager/manager.yaml @@ -11,6 +11,9 @@ spec: matchLabels: control-plane: controller-manager replicas: 1 + strategy: + rollingUpdate: + maxSurge: 0 template: metadata: labels: