From 274a964c22c56a0aaf813c85b2fa6b4d1f708181 Mon Sep 17 00:00:00 2001 From: Kabir Khan Date: Tue, 28 Oct 2025 10:55:31 +0000 Subject: [PATCH 1/2] feat: Add optional entity operator wait skip for Kind Add SKIP_ENTITY_OPERATOR_WAIT environment variable to work around entity operator timeout issues in some environments (Linux + Podman). When set to 'true': - Checks only Kafka broker pod readiness (not full Kafka resource) - Polls Kafka broker directly to verify topic creation This is an opt-in workaround that doesn't affect standard deployments. Also improves user messaging: - Added timeout information (10 minutes) - Added proactive hints about workaround if timeout occurs - Set expectations for topic creation timing --- examples/cloud-deployment/README.md | 17 +++- examples/cloud-deployment/scripts/deploy.sh | 89 ++++++++++++++++----- 2 files changed, 85 insertions(+), 21 deletions(-) diff --git a/examples/cloud-deployment/README.md b/examples/cloud-deployment/README.md index 989ba173b..bf1e4cd60 100644 --- a/examples/cloud-deployment/README.md +++ b/examples/cloud-deployment/README.md @@ -90,7 +90,22 @@ cd scripts ./deploy.sh --container-tool podman ``` -Note that using Kind with Podman on Linux may have some occasional issues due to Kind's experimental support for Podman. In our testing, a reboot normally solves this. +Note that using Kind with Podman on Linux may have some occasional issues due to Kind's experimental support for Podman. In our testing, a reboot normally solves this. + +**Troubleshooting entity operator timeout:** + +In some environments (particularly Linux with Podman), the Kafka entity operator may not start properly, causing deployment to timeout while waiting for Kafka to be ready. If you encounter this issue, you can skip the entity operator wait: + +```bash +export SKIP_ENTITY_OPERATOR_WAIT=true +./deploy.sh --container-tool podman +``` + +This tells the script to: +- Check only the Kafka broker pod (not the full Kafka resource with entity operator) +- Poll the Kafka broker directly to verify topic creation (instead of waiting for the topic operator) + +The entity operator manages topic and user resources, but the broker handles the actual message streaming. Skipping the entity operator wait does not affect the demo's core functionality. The script will: - Create Kind cluster with local registry support (if not already exists) diff --git a/examples/cloud-deployment/scripts/deploy.sh b/examples/cloud-deployment/scripts/deploy.sh index 4c7afeb4a..6b51f476f 100755 --- a/examples/cloud-deployment/scripts/deploy.sh +++ b/examples/cloud-deployment/scripts/deploy.sh @@ -219,34 +219,83 @@ echo -e "${GREEN}✓ PostgreSQL deployed${NC}" echo "" echo "Deploying Kafka..." kubectl apply -f ../k8s/02-kafka.yaml -echo "Waiting for Kafka to be ready (using KRaft mode, typically 2-3 minutes)..." +echo "Waiting for Kafka to be ready (using KRaft mode, typically 2-3 minutes. Timeout is 10 minutes)..." -# Monitor progress while waiting -for i in {1..60}; do - echo "Checking Kafka status (attempt $i/60)..." - kubectl get kafka -n kafka -o wide 2>/dev/null || true - kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true +# Check if we should skip entity operator wait (workaround for some environments) +if [ "${SKIP_ENTITY_OPERATOR_WAIT}" = "true" ]; then + echo -e "${YELLOW}⚠ SKIP_ENTITY_OPERATOR_WAIT is set - checking broker pod only${NC}" - if kubectl wait --for=condition=Ready kafka/a2a-kafka -n kafka --timeout=10s 2>/dev/null; then - echo -e "${GREEN}✓ Kafka deployed${NC}" - break - fi + # Wait for broker pod to be ready (skip entity operator check) + for i in {1..60}; do + echo "Checking Kafka broker status (attempt $i/60)..." + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true - if [ $i -eq 60 ]; then - echo -e "${RED}ERROR: Timeout waiting for Kafka${NC}" - kubectl describe kafka/a2a-kafka -n kafka - kubectl get events -n kafka --sort-by='.lastTimestamp' - exit 1 - fi -done + if kubectl wait --for=condition=Ready pod/a2a-kafka-broker-0 -n kafka --timeout=5s 2>/dev/null; then + echo -e "${GREEN}✓ Kafka broker pod is ready${NC}" + echo -e "${YELLOW}⚠ Entity operator may not be ready, but this does not affect functionality${NC}" + break + fi + + if [ $i -eq 60 ]; then + echo -e "${RED}ERROR: Timeout waiting for Kafka broker${NC}" + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka + kubectl describe pod a2a-kafka-broker-0 -n kafka 2>/dev/null || true + exit 1 + fi + + sleep 5 + done +else + echo -e "${YELLOW} If waiting for Kafka times out, run ./cleanup.sh, and retry having set 'SKIP_ENTITY_OPERATOR_WAIT=true'${NC}" + # Standard wait for full Kafka resource (includes entity operator) + for i in {1..60}; do + echo "Checking Kafka status (attempt $i/60)..." + kubectl get kafka -n kafka -o wide 2>/dev/null || true + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true + + if kubectl wait --for=condition=Ready kafka/a2a-kafka -n kafka --timeout=10s 2>/dev/null; then + echo -e "${GREEN}✓ Kafka deployed${NC}" + break + fi + + if [ $i -eq 60 ]; then + echo -e "${RED}ERROR: Timeout waiting for Kafka${NC}" + kubectl describe kafka/a2a-kafka -n kafka + kubectl get events -n kafka --sort-by='.lastTimestamp' + exit 1 + fi + done +fi # Create Kafka Topic for event replication echo "" echo "Creating Kafka topic for event replication..." kubectl apply -f ../k8s/03-kafka-topic.yaml -echo "Waiting for Kafka topic to be ready..." -kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s -echo -e "${GREEN}✓ Kafka topic created${NC}" + +if [ "${SKIP_ENTITY_OPERATOR_WAIT}" = "true" ]; then + echo -e "${YELLOW}⚠ SKIP_ENTITY_OPERATOR_WAIT is set - polling Kafka broker for topic${NC}" + echo " Topic operator may not be ready, waiting for broker to create topic. This check can take several minutes..." + + # Wait for topic to actually exist in Kafka broker (not just CRD) + for i in {1..30}; do + if kubectl exec a2a-kafka-broker-0 -n kafka -- \ + /opt/kafka/bin/kafka-topics.sh --list --bootstrap-server localhost:9092 2>/dev/null | \ + grep -q "a2a-replicated-events"; then + echo -e "${GREEN}✓ Topic exists in Kafka broker${NC}" + break + fi + if [ $i -eq 30 ]; then + echo -e "${RED}ERROR: Topic not found in broker after 30 attempts${NC}" + exit 1 + fi + sleep 2 + done +else + echo "Waiting for Kafka topic to be ready..." + echo -e "${YELLOW} If waiting for Kafka times out, run ./cleanup.sh, and retry having set 'SKIP_ENTITY_OPERATOR_WAIT=true'${NC}" + kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s + echo -e "${GREEN}✓ Kafka topic created${NC}" +fi # Deploy Agent ConfigMap echo "" From 81042a36e921413f1fe598ee3e23c2e65b09e572 Mon Sep 17 00:00:00 2001 From: Kabir Khan Date: Tue, 28 Oct 2025 11:14:44 +0000 Subject: [PATCH 2/2] Review comment --- examples/cloud-deployment/scripts/deploy.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/cloud-deployment/scripts/deploy.sh b/examples/cloud-deployment/scripts/deploy.sh index 6b51f476f..e267f3302 100755 --- a/examples/cloud-deployment/scripts/deploy.sh +++ b/examples/cloud-deployment/scripts/deploy.sh @@ -292,9 +292,14 @@ if [ "${SKIP_ENTITY_OPERATOR_WAIT}" = "true" ]; then done else echo "Waiting for Kafka topic to be ready..." - echo -e "${YELLOW} If waiting for Kafka times out, run ./cleanup.sh, and retry having set 'SKIP_ENTITY_OPERATOR_WAIT=true'${NC}" - kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s - echo -e "${GREEN}✓ Kafka topic created${NC}" + if kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s; then + echo -e "${GREEN}✓ Kafka topic created${NC}" + else + echo -e "${RED}ERROR: Timeout waiting for Kafka topic${NC}" + echo -e "${YELLOW}The topic operator may not be ready in this environment.${NC}" + echo -e "${YELLOW}Run ./cleanup.sh, then retry with: export SKIP_ENTITY_OPERATOR_WAIT=true${NC}" + exit 1 + fi fi # Deploy Agent ConfigMap