diff --git a/examples/cloud-deployment/README.md b/examples/cloud-deployment/README.md index 989ba173b..bf1e4cd60 100644 --- a/examples/cloud-deployment/README.md +++ b/examples/cloud-deployment/README.md @@ -90,7 +90,22 @@ cd scripts ./deploy.sh --container-tool podman ``` -Note that using Kind with Podman on Linux may have some occasional issues due to Kind's experimental support for Podman. In our testing, a reboot normally solves this. +Note that using Kind with Podman on Linux may have some occasional issues due to Kind's experimental support for Podman. In our testing, a reboot normally solves this. + +**Troubleshooting entity operator timeout:** + +In some environments (particularly Linux with Podman), the Kafka entity operator may not start properly, causing deployment to timeout while waiting for Kafka to be ready. If you encounter this issue, you can skip the entity operator wait: + +```bash +export SKIP_ENTITY_OPERATOR_WAIT=true +./deploy.sh --container-tool podman +``` + +This tells the script to: +- Check only the Kafka broker pod (not the full Kafka resource with entity operator) +- Poll the Kafka broker directly to verify topic creation (instead of waiting for the topic operator) + +The entity operator manages topic and user resources, but the broker handles the actual message streaming. Skipping the entity operator wait does not affect the demo's core functionality. The script will: - Create Kind cluster with local registry support (if not already exists) diff --git a/examples/cloud-deployment/scripts/deploy.sh b/examples/cloud-deployment/scripts/deploy.sh index 4c7afeb4a..e267f3302 100755 --- a/examples/cloud-deployment/scripts/deploy.sh +++ b/examples/cloud-deployment/scripts/deploy.sh @@ -219,34 +219,88 @@ echo -e "${GREEN}✓ PostgreSQL deployed${NC}" echo "" echo "Deploying Kafka..." kubectl apply -f ../k8s/02-kafka.yaml -echo "Waiting for Kafka to be ready (using KRaft mode, typically 2-3 minutes)..." +echo "Waiting for Kafka to be ready (using KRaft mode, typically 2-3 minutes. Timeout is 10 minutes)..." -# Monitor progress while waiting -for i in {1..60}; do - echo "Checking Kafka status (attempt $i/60)..." - kubectl get kafka -n kafka -o wide 2>/dev/null || true - kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true +# Check if we should skip entity operator wait (workaround for some environments) +if [ "${SKIP_ENTITY_OPERATOR_WAIT}" = "true" ]; then + echo -e "${YELLOW}⚠ SKIP_ENTITY_OPERATOR_WAIT is set - checking broker pod only${NC}" - if kubectl wait --for=condition=Ready kafka/a2a-kafka -n kafka --timeout=10s 2>/dev/null; then - echo -e "${GREEN}✓ Kafka deployed${NC}" - break - fi + # Wait for broker pod to be ready (skip entity operator check) + for i in {1..60}; do + echo "Checking Kafka broker status (attempt $i/60)..." + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true - if [ $i -eq 60 ]; then - echo -e "${RED}ERROR: Timeout waiting for Kafka${NC}" - kubectl describe kafka/a2a-kafka -n kafka - kubectl get events -n kafka --sort-by='.lastTimestamp' - exit 1 - fi -done + if kubectl wait --for=condition=Ready pod/a2a-kafka-broker-0 -n kafka --timeout=5s 2>/dev/null; then + echo -e "${GREEN}✓ Kafka broker pod is ready${NC}" + echo -e "${YELLOW}⚠ Entity operator may not be ready, but this does not affect functionality${NC}" + break + fi + + if [ $i -eq 60 ]; then + echo -e "${RED}ERROR: Timeout waiting for Kafka broker${NC}" + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka + kubectl describe pod a2a-kafka-broker-0 -n kafka 2>/dev/null || true + exit 1 + fi + + sleep 5 + done +else + echo -e "${YELLOW} If waiting for Kafka times out, run ./cleanup.sh, and retry having set 'SKIP_ENTITY_OPERATOR_WAIT=true'${NC}" + # Standard wait for full Kafka resource (includes entity operator) + for i in {1..60}; do + echo "Checking Kafka status (attempt $i/60)..." + kubectl get kafka -n kafka -o wide 2>/dev/null || true + kubectl get pods -n kafka -l strimzi.io/cluster=a2a-kafka 2>/dev/null || true + + if kubectl wait --for=condition=Ready kafka/a2a-kafka -n kafka --timeout=10s 2>/dev/null; then + echo -e "${GREEN}✓ Kafka deployed${NC}" + break + fi + + if [ $i -eq 60 ]; then + echo -e "${RED}ERROR: Timeout waiting for Kafka${NC}" + kubectl describe kafka/a2a-kafka -n kafka + kubectl get events -n kafka --sort-by='.lastTimestamp' + exit 1 + fi + done +fi # Create Kafka Topic for event replication echo "" echo "Creating Kafka topic for event replication..." kubectl apply -f ../k8s/03-kafka-topic.yaml -echo "Waiting for Kafka topic to be ready..." -kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s -echo -e "${GREEN}✓ Kafka topic created${NC}" + +if [ "${SKIP_ENTITY_OPERATOR_WAIT}" = "true" ]; then + echo -e "${YELLOW}⚠ SKIP_ENTITY_OPERATOR_WAIT is set - polling Kafka broker for topic${NC}" + echo " Topic operator may not be ready, waiting for broker to create topic. This check can take several minutes..." + + # Wait for topic to actually exist in Kafka broker (not just CRD) + for i in {1..30}; do + if kubectl exec a2a-kafka-broker-0 -n kafka -- \ + /opt/kafka/bin/kafka-topics.sh --list --bootstrap-server localhost:9092 2>/dev/null | \ + grep -q "a2a-replicated-events"; then + echo -e "${GREEN}✓ Topic exists in Kafka broker${NC}" + break + fi + if [ $i -eq 30 ]; then + echo -e "${RED}ERROR: Topic not found in broker after 30 attempts${NC}" + exit 1 + fi + sleep 2 + done +else + echo "Waiting for Kafka topic to be ready..." + if kubectl wait --for=condition=Ready kafkatopic/a2a-replicated-events -n kafka --timeout=60s; then + echo -e "${GREEN}✓ Kafka topic created${NC}" + else + echo -e "${RED}ERROR: Timeout waiting for Kafka topic${NC}" + echo -e "${YELLOW}The topic operator may not be ready in this environment.${NC}" + echo -e "${YELLOW}Run ./cleanup.sh, then retry with: export SKIP_ENTITY_OPERATOR_WAIT=true${NC}" + exit 1 + fi +fi # Deploy Agent ConfigMap echo ""