Skip to content

Commit

Permalink
Merge pull request #6863 from mukundansundar/patch-operator
Browse files Browse the repository at this point in the history
[release-1.11] Patch operator for Argo CD
  • Loading branch information
mukundansundar committed Aug 30, 2023
2 parents 83ca1ab + 6554957 commit 9f99c6a
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 38 deletions.
26 changes: 26 additions & 0 deletions docs/release_notes/v1.11.3.md
@@ -0,0 +1,26 @@
# Dapr 1.11.3

This update includes a fix for operator deployment on Kubernetes.
- [Fixed Dapr Operator CrashLoopBackOff when Argo Controller is enabled](#fixed-dapr-operator-crashloopbackoff-when-argo-controller-is-enabled)

## Fixed Dapr Operator CrashLoopBackOff when Argo Controller is enabled

### Problem

When using the Dapr Operator on Kubernetes with Argo Controller enabled, the operator would fail to start with the following error:

```
time="2023-07-11T17:22:49.937549597Z" level=fatal msg="error running operator: error running operator: failed to wait for rollout caches to sync: timed out waiting for cache to be synced" instance=dapr-operator-7cd6d96dfd-9wbj7 scop │ │ Stream closed EOF for dapr-system/dapr-operator-7cd6d96dfd-9wbj7 (dapr-operator)
```

### Impact

The Dapr Operator would fail to start when Argo Controller is enabled and a Argo rollout is performend.

### Root cause

The Dapr Operator was not creating the full Operator scheme before waiting for the controller manager to start.

### Solution

Updated the Dapr Operator to create the full Operator scheme before waiting for the controller manager to start.
1 change: 0 additions & 1 deletion pkg/operator/handlers/dapr_handler.go
Expand Up @@ -136,7 +136,6 @@ func (h *DaprHandler) Init(ctx context.Context) error {
}

if h.argoRolloutServiceReconcilerEnabled {
_ = argov1alpha1.AddToScheme(h.Scheme)
err = ctrl.NewControllerManagedBy(h.mgr).
For(&argov1alpha1.Rollout{}).
Owns(&corev1.Service{}).
Expand Down
1 change: 1 addition & 0 deletions pkg/operator/handlers/dapr_handler_test.go
Expand Up @@ -240,6 +240,7 @@ func TestInit(t *testing.T) {
mgr := dapr_testing.NewMockManager()

_ = scheme.AddToScheme(mgr.GetScheme())
_ = argov1alpha1.AddToScheme(mgr.GetScheme())

handler := NewDaprHandlerWithOptions(mgr, &Options{
ArgoRolloutServiceReconcilerEnabled: true,
Expand Down
44 changes: 31 additions & 13 deletions pkg/operator/operator.go
Expand Up @@ -23,6 +23,7 @@ import (
"strings"
"time"

argov1alpha1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -86,26 +87,19 @@ type operator struct {
client client.Client
}

var scheme = runtime.NewScheme()

func init() {
_ = clientgoscheme.AddToScheme(scheme)

_ = componentsapi.AddToScheme(scheme)
_ = configurationapi.AddToScheme(scheme)
_ = resiliencyapi.AddToScheme(scheme)
_ = httpendpointsapi.AddToScheme(scheme)
_ = subscriptionsapiV1alpha1.AddToScheme(scheme)
_ = subscriptionsapiV2alpha1.AddToScheme(scheme)
}

// NewOperator returns a new Dapr Operator.
func NewOperator(ctx context.Context, opts Options) (Operator, error) {
conf, err := ctrl.GetConfig()
if err != nil {
return nil, fmt.Errorf("unable to get controller runtime configuration, err: %s", err)
}
watchdogPodSelector := getSideCarInjectedNotExistsSelector()

scheme, err := buildScheme(opts)
if err != nil {
return nil, fmt.Errorf("failed to build operator scheme: %w", err)
}

mgr, err := ctrl.NewManager(conf, ctrl.Options{
Scheme: scheme,
Port: 19443,
Expand Down Expand Up @@ -462,3 +456,27 @@ func (r nonLeaderRunnable) Start(ctx context.Context) error {
func (r nonLeaderRunnable) NeedLeaderElection() bool {
return false
}

func buildScheme(opts Options) (*runtime.Scheme, error) {
builders := []func(*runtime.Scheme) error{
clientgoscheme.AddToScheme,
componentsapi.AddToScheme,
configurationapi.AddToScheme,
resiliencyapi.AddToScheme,
httpendpointsapi.AddToScheme,
subscriptionsapiV1alpha1.AddToScheme,
subscriptionsapiV2alpha1.AddToScheme,
}

if opts.ArgoRolloutServiceReconcilerEnabled {
builders = append(builders, argov1alpha1.AddToScheme)
}

errs := make([]error, len(builders))
scheme := runtime.NewScheme()
for i, builder := range builders {
errs[i] = builder(scheme)
}

return scheme, errors.Join(errs...)
}
28 changes: 28 additions & 0 deletions tests/config/postgres_override.yaml
@@ -0,0 +1,28 @@
global:
postgresql:
auth:
username: postgres
postgresPassword: example
database: dapr_test
primary:
initdb:
scripts:
init.sql: |
CREATE TABLE IF NOT EXISTS configtable (KEY VARCHAR NOT NULL, VALUE VARCHAR NOT NULL, VERSION VARCHAR NOT NULL, METADATA JSON);
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
- key: kubernetes.io/arch
operator: In
values:
- amd64
persistence:
enabled: false
tls:
enabled: false
69 changes: 57 additions & 12 deletions tests/dapr_tests.mk
Expand Up @@ -252,7 +252,7 @@ create-test-namespace:
delete-test-namespace:
kubectl delete namespace $(DAPR_TEST_NAMESPACE)

setup-3rd-party: setup-helm-init setup-test-env-redis setup-test-env-kafka setup-test-env-mongodb setup-test-env-zipkin
setup-3rd-party: setup-helm-init setup-test-env-redis setup-test-env-kafka setup-test-env-mongodb setup-test-env-zipkin setup-test-env-postgres

setup-pubsub-subs-perf-test-components: setup-test-env-rabbitmq setup-test-env-pulsar setup-test-env-mqtt

Expand Down Expand Up @@ -317,7 +317,7 @@ test-deps:
# start all e2e tests
test-e2e-all: check-e2e-env test-deps
# Note: we can set -p 2 to run two tests apps at a time, because today we do not share state between
# tests. In the future, if we add any tests that modify global state (such as dapr config), we'll
# tests. In the future, if we add any tests that modify global state (such as dapr config), we'll
# have to be sure and run them after the main test suite, so as not to alter the state of a running
# test
# Note2: use env variable DAPR_E2E_TEST to pick one e2e test to run.
Expand Down Expand Up @@ -441,38 +441,83 @@ delete-test-env-k6:

# install redis to the cluster without password
setup-test-env-redis:
$(HELM) upgrade --install dapr-redis bitnami/redis --wait --timeout 5m0s --namespace $(DAPR_TEST_NAMESPACE) -f ./tests/config/redis_override.yaml
$(HELM) upgrade \
--install dapr-redis bitnami/redis \
--version 17.14.5 \
--wait \
--timeout 5m0s \
--namespace $(DAPR_TEST_NAMESPACE) \
-f ./tests/config/redis_override.yaml

delete-test-env-redis:
${HELM} del dapr-redis --namespace ${DAPR_TEST_NAMESPACE}

# install kafka to the cluster
setup-test-env-kafka:
$(HELM) upgrade --install dapr-kafka bitnami/kafka -f ./tests/config/kafka_override.yaml --namespace $(DAPR_TEST_NAMESPACE) --timeout 10m0s
$(HELM) upgrade \
--install dapr-kafka bitnami/kafka \
--version 23.0.7 \
-f ./tests/config/kafka_override.yaml \
--namespace $(DAPR_TEST_NAMESPACE) \
--timeout 10m0s

# install rabbitmq to the cluster
setup-test-env-rabbitmq:
$(HELM) upgrade --install rabbitmq bitnami/rabbitmq --set auth.username='admin' --set auth.password='admin' --namespace $(DAPR_TEST_NAMESPACE) --timeout 10m0s
$(HELM) upgrade \
--install rabbitmq bitnami/rabbitmq \
--version 12.0.9 \
--set auth.username='admin' \
--set auth.password='admin' \
--namespace $(DAPR_TEST_NAMESPACE) \
--timeout 10m0s

# install mqtt to the cluster
setup-test-env-mqtt:
$(HELM) repo add emqx https://repos.emqx.io/charts
$(HELM) repo add emqx https://repos.emqx.io/charts
$(HELM) repo update
$(HELM) upgrade --install perf-test-emqx emqx/emqx --namespace $(DAPR_TEST_NAMESPACE) --timeout 10m0s
$(HELM) upgrade \
--install perf-test-emqx emqx/emqx \
--version 5.1.4 \
--namespace $(DAPR_TEST_NAMESPACE) \
--timeout 10m0s

# install mqtt to the cluster
setup-test-env-pulsar:
$(HELM) repo add apache https://pulsar.apache.org/charts
$(HELM) repo update
$(HELM) upgrade --install perf-test-pulsar apache/pulsar --namespace $(DAPR_TEST_NAMESPACE) --timeout 10m0s
$(HELM) upgrade \
--install perf-test-pulsar apache/pulsar \
--version 3.0.0 \
--namespace $(DAPR_TEST_NAMESPACE) \
--timeout 10m0s

# delete kafka from cluster
delete-test-env-kafka:
$(HELM) del dapr-kafka --namespace $(DAPR_TEST_NAMESPACE)

# install mongodb to the cluster without password
setup-test-env-mongodb:
$(HELM) upgrade --install dapr-mongodb bitnami/mongodb -f ./tests/config/mongodb_override.yaml --namespace $(DAPR_TEST_NAMESPACE) --wait --timeout 5m0s
$(HELM) upgrade \
--install dapr-mongodb bitnami/mongodb \
--version 13.16.2 \
-f ./tests/config/mongodb_override.yaml \
--namespace $(DAPR_TEST_NAMESPACE) \
--wait \
--timeout 5m0s

# install postgres to the cluster
setup-test-env-postgres:
$(HELM) upgrade \
--install dapr-postgres bitnami/postgresql \
--version 12.8.0 \
-f ./tests/config/postgres_override.yaml \
--namespace $(DAPR_TEST_NAMESPACE) \
--wait \
--timeout 5m0s

# delete postgres from cluster
delete-test-env-postgres:
$(HELM) del dapr-postgres --namespace $(DAPR_TEST_NAMESPACE)

# delete mongodb from cluster
delete-test-env-mongodb:
Expand All @@ -485,7 +530,7 @@ delete-test-env-zipkin:
$(KUBECTL) delete -f ./tests/config/zipkin.yaml -n $(DAPR_TEST_NAMESPACE)

# Setup the test environment by installing components
setup-test-env: setup-test-env-kafka setup-test-env-redis setup-test-env-mongodb setup-test-env-k6 setup-test-env-zipkin
setup-test-env: setup-test-env-kafka setup-test-env-redis setup-test-env-mongodb setup-test-env-postgres setup-test-env-k6 setup-test-env-zipkin

save-dapr-control-plane-k8s-resources:
mkdir -p '$(DAPR_CONTAINER_LOG_PATH)'
Expand Down Expand Up @@ -580,13 +625,13 @@ describe-kind-env:
export DAPR_TEST_REGISTRY=$${DAPR_TEST_REGISTRY:-localhost:5000/dapr}\n\
export DAPR_TAG=dev\n\
export DAPR_NAMESPACE=dapr-tests"
delete-kind:
docker stop kind-registry && docker rm kind-registry || echo "Could not delete registry."
kind delete cluster --name kind
ifeq ($(OS),Windows_NT)
ifeq ($(OS),Windows_NT)
detected_OS := windows
else
detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown' | tr '[:upper:]' '[:lower:]')
Expand Down
25 changes: 25 additions & 0 deletions tests/test-infra/azure-aks-diagnostic.bicep
Expand Up @@ -43,5 +43,30 @@ resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = {
}
}

resource storageManagementPolicies 'Microsoft.Storage/storageAccounts/managementPolicies@2023-01-01' = {
name: 'blobPolicy'
parent: storageAccount
properties: {
policy: {
rules: [
{
enabled: true
name: 'Delete blob after 15 days'
type: 'Lifecycle'
definition: {
actions: {
baseBlob: {
delete: {
daysAfterModificationGreaterThan: 15
}
}
}
}
}
]
}
}
}

output diagLogAnalyticsWorkspaceResourceId string = logAnalyticsWorkspace.id
output diagStorageResourceId string = storageAccount.id
16 changes: 4 additions & 12 deletions tests/test-infra/azure-aks.bicep
Expand Up @@ -43,7 +43,7 @@ param diagStorageResourceId string = ''
var osDiskSizeGB = 0

// Version of Kubernetes
var kubernetesVersion = '1.25.5'
var kubernetesVersion = '1.26'

resource containerRegistry 'Microsoft.ContainerRegistry/registries@2019-05-01' = {
name: '${namePrefix}acr'
Expand Down Expand Up @@ -82,7 +82,7 @@ var networkProfileLinux = {
networkPlugin: 'kubenet'
}

resource aks 'Microsoft.ContainerService/managedClusters@2021-07-01' = {
resource aks 'Microsoft.ContainerService/managedClusters@2023-05-01' = {
location: location
name: '${namePrefix}-aks'
properties: {
Expand Down Expand Up @@ -184,8 +184,8 @@ resource aks 'Microsoft.ContainerService/managedClusters@2021-07-01' = {
}
tags: {}
sku: {
name: 'Basic'
tier: 'Paid'
name: 'Base'
tier: 'Standard'
}
identity: {
type: 'SystemAssigned'
Expand Down Expand Up @@ -252,18 +252,10 @@ resource aksDiagnosticStorage 'Microsoft.Insights/diagnosticSettings@2021-05-01-
{
category: 'kube-apiserver'
enabled: true
retentionPolicy: {
days: 15
enabled: true
}
}
{
category: 'kube-audit'
enabled: true
retentionPolicy: {
days: 15
enabled: true
}
}
]
storageAccountId: diagStorageResourceId
Expand Down

0 comments on commit 9f99c6a

Please sign in to comment.