From 2d3f6b0bbce74e23e2ce0a731c5e9e95a693e483 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Mon, 17 Nov 2025 14:24:12 +0100 Subject: [PATCH] Added profile for production. --- CHANGELOG.md | 1 + charts/eoapi/profiles/README.md | 30 +- charts/eoapi/profiles/production.yaml | 379 +++++++++++++++++++ charts/eoapi/samples/cloudevents-sink.yaml | 27 -- charts/eoapi/samples/values-with-nginx.yaml | 21 - docs/autoscaling.md | 2 +- docs/examples/values-autoscaling.yaml | 208 ---------- docs/examples/values-full-observability.yaml | 297 --------------- docs/observability.md | 2 +- 9 files changed, 409 insertions(+), 558 deletions(-) create mode 100644 charts/eoapi/profiles/production.yaml delete mode 100644 charts/eoapi/samples/cloudevents-sink.yaml delete mode 100644 charts/eoapi/samples/values-with-nginx.yaml delete mode 100644 docs/examples/values-autoscaling.yaml delete mode 100644 docs/examples/values-full-observability.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 255a9e7e..2a03f2a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Tests for autoscaling [#343](https://github.com/developmentseed/eoapi-k8s/pull/343) - Added tests for observability stack [#342](https://github.com/developmentseed/eoapi-k8s/pull/342) - Added validation to require `postgrescluster.enabled: false` when using external databases [#346](https://github.com/developmentseed/eoapi-k8s/pull/346) +- Added a production.yaml profile [#354](https://github.com/developmentseed/eoapi-k8s/pull/354) ### Changed diff --git a/charts/eoapi/profiles/README.md b/charts/eoapi/profiles/README.md index d0f8fac6..331198c7 100644 --- a/charts/eoapi/profiles/README.md +++ b/charts/eoapi/profiles/README.md @@ -9,7 +9,7 @@ Profiles are pre-configured values files that override the default `values.yaml` ## Available Profiles ### Core Profile (`core.yaml`) -**Use Case:** Production deployments with stable, well-tested services only. +**Use Case:** Minimal production deployment with stable services only. **Includes:** - PostgreSQL with PgSTAC @@ -23,9 +23,30 @@ Profiles are pre-configured values files that override the default `values.yaml` - Development tools - Monitoring stack - STAC Browser UI +- Autoscaling **Resources:** Production-optimized with higher resource allocations. +### Production Profile (`production.yaml`) +**Use Case:** Full production deployment with autoscaling and observability. + +**Includes:** +- All core services +- High availability PostgreSQL (2 replicas) +- Autoscaling for all API services +- Complete monitoring stack (Prometheus) +- Grafana dashboards for observability +- STAC Browser UI +- Custom metrics for request-rate scaling + +**Configuration:** +- Autoscaling enabled (CPU and request-rate based) +- Persistent storage for metrics (30 days retention) +- Production-optimized resource allocations +- TLS enabled by default + +**Resources:** High resource allocations optimized for production workloads. + ### Experimental Profile (`experimental.yaml`) **Use Case:** Development, testing, and evaluation of all eoAPI features. @@ -69,10 +90,13 @@ Profiles are pre-configured values files that override the default `values.yaml` Deploy with a single profile: ```bash -# Production deployment with core services only +# Minimal production deployment helm install eoapi ./charts/eoapi -f profiles/core.yaml -# Development deployment with all features +# Full production with autoscaling and observability +helm install eoapi ./charts/eoapi -f profiles/production.yaml + +# Development deployment with all experimental features helm install eoapi ./charts/eoapi -f profiles/experimental.yaml ``` diff --git a/charts/eoapi/profiles/production.yaml b/charts/eoapi/profiles/production.yaml new file mode 100644 index 00000000..9acaa8eb --- /dev/null +++ b/charts/eoapi/profiles/production.yaml @@ -0,0 +1,379 @@ +# eoAPI Production Profile +# Production-ready deployment with autoscaling and observability +# Includes: All core services, monitoring stack, autoscaling, and observability +# +# Usage: +# helm install eoapi ./charts/eoapi -f profiles/production.yaml +# helm upgrade eoapi ./charts/eoapi -f profiles/production.yaml +# +# Prerequisites: +# - Configure ingress.host with your domain +# - Ensure metrics-server is available in your cluster +# - Review and adjust resource allocations based on your workload + +###################### +# DATABASE +###################### +# Production PostgreSQL configuration +postgresql: + type: "postgrescluster" + +postgrescluster: + enabled: true + postgresVersion: 16 + postGISVersion: "3.4" + pgBouncerReplicas: 2 # HA setup + monitoring: true # Enable PostgreSQL metrics + instances: + - name: eoapi + replicas: 2 # High availability + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "50Gi" # Production storage + resources: + requests: + cpu: "2048m" + memory: "4096Mi" + limits: + cpu: "4096m" + memory: "8192Mi" + users: + - name: postgres + databases: + - eoapi + - postgres + options: "SUPERUSER" + - name: eoapi + databases: + - eoapi + - postgres + options: "CREATEDB CREATEROLE" + password: + type: AlphaNumeric + +###################### +# PGSTAC BOOTSTRAP +###################### +pgstacBootstrap: + enabled: true + settings: + loadSamples: false # No samples in production + + # Production PgSTAC settings + pgstacSettings: + queue_timeout: "10 minutes" + use_queue: "false" + update_collection_extent: "true" + context: "auto" + context_estimated_count: "100000" + context_estimated_cost: "100000" + context_stats_ttl: "1 day" + + resources: + requests: + cpu: "1024m" + memory: "2048Mi" + limits: + cpu: "2048m" + memory: "4096Mi" + +###################### +# MONITORING & METRICS +###################### +# Essential for autoscaling and observability +monitoring: + metricsServer: + enabled: true + apiService: + create: true + + prometheus: + enabled: true + alertmanager: + enabled: false # Use Grafana alerting instead + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: true + prometheus-node-exporter: + enabled: true + resources: + limits: + cpu: "50m" + memory: "64Mi" + requests: + cpu: "50m" + memory: "64Mi" + server: + persistentVolume: + enabled: true + size: 30Gi + retention: "15d" + resources: + limits: + cpu: "1000m" + memory: "2048Mi" + requests: + cpu: "500m" + memory: "1024Mi" + service: + type: ClusterIP + +# Custom metrics for request-rate based autoscaling +prometheusAdapter: + enabled: true + resources: + limits: + cpu: "200m" + memory: "256Mi" + requests: + cpu: "100m" + memory: "128Mi" + +###################### +# OBSERVABILITY +###################### +observability: + grafana: + enabled: true + persistence: + enabled: true + size: 10Gi + service: + type: ClusterIP + resources: + limits: + cpu: "200m" + memory: "256Mi" + requests: + cpu: "100m" + memory: "128Mi" + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: "http://{{ .Release.Name }}-prometheus-server" + access: proxy + isDefault: true + +###################### +# API SERVICES WITH AUTOSCALING +###################### +stac: + enabled: true + ingress: + enabled: true + path: "/stac" + autoscaling: + enabled: true + minReplicas: 2 # HA minimum + maxReplicas: 20 + type: "both" # CPU and request rate + behavior: + scaleDown: + stabilizationWindowSeconds: 300 # 5 minutes + scaleUp: + stabilizationWindowSeconds: 30 + targets: + cpu: 70 + requestRate: 50000m # 50 req/sec per pod + settings: + resources: + requests: + cpu: "750m" + memory: "1536Mi" + limits: + cpu: "1500m" + memory: "3072Mi" + envVars: + HOST: "0.0.0.0" + PORT: "8080" + WEB_CONCURRENCY: "6" + STAC_FASTAPI_DEBUG: "False" + STAC_FASTAPI_CORS_ORIGINS: '["*"]' + +raster: + enabled: true + ingress: + enabled: true + path: "/raster" + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 15 + type: "both" + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + scaleUp: + stabilizationWindowSeconds: 60 + targets: + cpu: 60 # Lower due to resource intensity + requestRate: 30000m # 30 req/sec per pod + settings: + resources: + requests: + cpu: "1024m" + memory: "3072Mi" + limits: + cpu: "2048m" + memory: "6144Mi" + envVars: + # GDAL optimization + GDAL_CACHEMAX: "512" + GDAL_DISABLE_READDIR_ON_OPEN: "EMPTY_DIR" + GDAL_INGESTED_BYTES_AT_OPEN: "32768" + GDAL_HTTP_MERGE_CONSECUTIVE_RANGES: "YES" + GDAL_HTTP_MULTIPLEX: "YES" + GDAL_HTTP_VERSION: "2" + GDAL_HTTP_MAX_RETRY: "3" + GDAL_HTTP_RETRY_DELAY: "1" + GDAL_SKIP: "VRT" + PYTHONWARNINGS: "ignore" + VSI_CACHE: "TRUE" + VSI_CACHE_SIZE: "5000000" + # Uvicorn settings + HOST: "0.0.0.0" + PORT: "8080" + WEB_CONCURRENCY: "4" + TITILER_DEBUG: "False" + +vector: + enabled: true + ingress: + enabled: true + path: "/vector" + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + type: "both" + behavior: + scaleDown: + stabilizationWindowSeconds: 240 + scaleUp: + stabilizationWindowSeconds: 45 + targets: + cpu: 75 + requestRate: 75000m # 75 req/sec per pod + settings: + resources: + requests: + cpu: "512m" + memory: "1024Mi" + limits: + cpu: "1024m" + memory: "2048Mi" + envVars: + TIPG_CATALOG_TTL: "300" + TIPG_DEBUG: "False" + HOST: "0.0.0.0" + PORT: "8080" + WEB_CONCURRENCY: "6" + +# Optional: Enable if you need multidimensional support +multidim: + enabled: false + ingress: + enabled: true + path: "/multidim" + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 5 + type: "cpu" # CPU-only due to resource intensity + targets: + cpu: 50 + settings: + resources: + requests: + cpu: "2048m" + memory: "8192Mi" + limits: + cpu: "4096m" + memory: "16384Mi" + envVars: + GDAL_CACHEMAX: "1024" + GDAL_DISABLE_READDIR_ON_OPEN: "EMPTY_DIR" + GDAL_INGESTED_BYTES_AT_OPEN: "32768" + GDAL_HTTP_MERGE_CONSECUTIVE_RANGES: "YES" + GDAL_HTTP_MULTIPLEX: "YES" + GDAL_HTTP_VERSION: "2" + PYTHONWARNINGS: "ignore" + VSI_CACHE: "TRUE" + VSI_CACHE_SIZE: "10000000" + HOST: "0.0.0.0" + PORT: "8080" + WEB_CONCURRENCY: "2" + +###################### +# UI COMPONENTS +###################### +browser: + enabled: true + replicaCount: 2 # HA setup + settings: + resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "100m" + memory: "128Mi" + +docServer: + enabled: true + +###################### +# OPTIONAL FEATURES +###################### +# Disable experimental features in production +eoapi-notifier: + enabled: false + +knative: + enabled: false + +###################### +# INGRESS +###################### +ingress: + enabled: true + className: "nginx" + pathType: "Prefix" + host: "eoapi.example.com" # CHANGE THIS to your domain + tls: + enabled: true + secretName: eoapi-tls + # certManager: true # Uncomment if using cert-manager + +###################### +# SECURITY & RBAC +###################### +serviceAccount: + create: true + automount: true + annotations: {} + # Add cloud provider annotations if needed + # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/eoapi-role + +###################### +# GENERAL SETTINGS +###################### +service: + port: 8080 + +# Enable autoscaling globally +autoscaling: + enabled: true + +# Connection pooling for better performance +database: + enabled: true + connectionPooling: + enabled: true diff --git a/charts/eoapi/samples/cloudevents-sink.yaml b/charts/eoapi/samples/cloudevents-sink.yaml deleted file mode 100644 index ec9acbe6..00000000 --- a/charts/eoapi/samples/cloudevents-sink.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: serving.knative.dev/v1 -kind: Service -metadata: - name: eoapi-cloudevents-sink - namespace: eoapi -spec: - template: - metadata: - annotations: - autoscaling.knative.dev/minScale: "1" - autoscaling.knative.dev/maxScale: "1" - spec: - containers: - - name: cloudevents-sink - image: gcr.io/knative-samples/helloworld-go - ports: - - containerPort: 8080 - env: - - name: TARGET - value: "CloudEvents Sink" - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 200m - memory: 256Mi diff --git a/charts/eoapi/samples/values-with-nginx.yaml b/charts/eoapi/samples/values-with-nginx.yaml deleted file mode 100644 index fa2100ba..00000000 --- a/charts/eoapi/samples/values-with-nginx.yaml +++ /dev/null @@ -1,21 +0,0 @@ -ingress: - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - className: nginx - enabled: true - host: eoapi.local - tls: - certManager: false - enabled: true - secretName: eoapi-tls -namespace: eoapi -postgrescluster: - enabled: true - name: pgstac -resources: - limits: - cpu: 1000m - memory: 1Gi - requests: - cpu: 200m - memory: 256Mi diff --git a/docs/autoscaling.md b/docs/autoscaling.md index fc2c2f14..8fe55498 100644 --- a/docs/autoscaling.md +++ b/docs/autoscaling.md @@ -169,7 +169,7 @@ vector: ## Configuration Examples -For complete configuration examples, see the [examples directory](../examples/). +For complete configuration examples, see the [production profile](../charts/eoapi/profiles/production.yaml). ## Resource Requirements diff --git a/docs/examples/values-autoscaling.yaml b/docs/examples/values-autoscaling.yaml deleted file mode 100644 index e971946e..00000000 --- a/docs/examples/values-autoscaling.yaml +++ /dev/null @@ -1,208 +0,0 @@ -# Example values for eoAPI with core monitoring and autoscaling enabled -# -# To use this configuration: -# -# 1. Update the ingress.host to your actual domain -# 2. Adjust scaling targets based on your load testing results -# 3. Monitor resource usage and adjust requests/limits accordingly -# 4. Consider enabling TLS for production deployments -# -# IMPORTANT: This configuration enables monitoring components that are -# disabled by default. This is required for autoscaling to work. -# -# For observability and dashboards, install the separate eoapi-observability chart: -# helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi -# -# Load testing recommendations: -# - Test each service endpoint individually -# - Monitor HPA metrics: kubectl get hpa -n eoapi -w -# - Check custom metrics: kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" -# - Review Prometheus targets to ensure metrics collection is working - -gitSha: "latest" - -###################### -# INGRESS -###################### -ingress: - enabled: true - className: "nginx" - # IMPORTANT: Set a proper hostname for metrics collection - # nginx ingress controller requires a specific host (not wildcard) to expose metrics - host: "your-eoapi.example.com" # Replace with your domain - tls: - enabled: true - secretName: eoapi-tls - -###################### -# DATABASE -###################### -# Using default PostgreSQL cluster configuration -postgrescluster: - enabled: true - instances: - - name: eoapi - replicas: 1 - dataVolumeClaimSpec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "50Gi" # Increased for production workloads - cpu: "2048m" # More CPU for database under load - memory: "4096Mi" # More memory for database performance - -###################### -# MONITORING & AUTOSCALING -###################### -# Essential monitoring components for autoscaling -monitoring: - metricsServer: - enabled: true - apiService: - create: true - prometheus: - enabled: true - alertmanager: - enabled: false - prometheus-pushgateway: - enabled: false - kube-state-metrics: - enabled: true - prometheus-node-exporter: - enabled: true - resources: - limits: - cpu: 10m - memory: 30Mi - requests: - cpu: 10m - memory: 30Mi - server: - service: - type: ClusterIP - -# Custom metrics for request-rate based autoscaling -prometheusAdapter: - enabled: true - -###################### -# SERVICE CONFIGURATION WITH AUTOSCALING -###################### - -# STAC API Service -stac: - enabled: true - autoscaling: - enabled: true - minReplicas: 2 # Start with 2 replicas for availability - maxReplicas: 20 # Scale up to handle high loads - type: "requestRate" # Scale based on request rate - behavior: - scaleDown: - stabilizationWindowSeconds: 300 # Wait 5 minutes before scaling down - scaleUp: - stabilizationWindowSeconds: 30 # Scale up quickly (30 seconds) - targets: - requestRate: 50000m # Scale when average > 50 requests/second - settings: - resources: - limits: - cpu: "1000m" - memory: "2048Mi" - requests: - cpu: "500m" # Higher baseline for autoscaling - memory: "1024Mi" - -# Raster Service (TiTiler) -raster: - enabled: true - autoscaling: - enabled: true - minReplicas: 1 - maxReplicas: 15 - type: "requestRate" - behavior: - scaleDown: - stabilizationWindowSeconds: 180 # Scale down slower for raster (3 min) - scaleUp: - stabilizationWindowSeconds: 60 # Scale up moderately fast - targets: - requestRate: 30000m # Scale when average > 30 requests/second (raster is more resource intensive) - settings: - resources: - limits: - cpu: "1536m" # Raster processing needs more CPU - memory: "6144Mi" # Raster processing needs more memory - requests: - cpu: "768m" - memory: "3072Mi" - envVars: - # Optimized GDAL settings for autoscaling - GDAL_CACHEMAX: "512" # Increased cache for better performance - WEB_CONCURRENCY: "8" # More workers for higher throughput - -# Vector Service (TIPG) -vector: - enabled: true - autoscaling: - enabled: true - minReplicas: 1 - maxReplicas: 10 - type: "requestRate" - behavior: - scaleDown: - stabilizationWindowSeconds: 240 - scaleUp: - stabilizationWindowSeconds: 45 - targets: - requestRate: 75000m # Vector is typically lighter, can handle more requests - settings: - resources: - limits: - cpu: "1000m" - memory: "2048Mi" - requests: - cpu: "512m" - memory: "1024Mi" - -# Multidimensional Service (optional) -multidim: - enabled: false # Disabled by default - autoscaling: - enabled: true - minReplicas: 1 - maxReplicas: 8 - type: "requestRate" - targets: - requestRate: 25000m # Conservative scaling for multidim - settings: - resources: - limits: - cpu: "2048m" # Multidim can be very CPU intensive - memory: "8192Mi" # Large memory requirements for multidim data - requests: - cpu: "1024m" - memory: "4096Mi" - -###################### -# STAC BROWSER -###################### -browser: - enabled: true - replicaCount: 2 # Static replicas (browser is just static files) - -###################### -# PGSTAC BOOTSTRAP -###################### -pgstacBootstrap: - enabled: true - settings: - loadSamples: false # Disable sample data for production - resources: - requests: - cpu: "1024m" - memory: "2048Mi" - limits: - cpu: "1024m" - memory: "2048Mi" diff --git a/docs/examples/values-full-observability.yaml b/docs/examples/values-full-observability.yaml deleted file mode 100644 index da0cf202..00000000 --- a/docs/examples/values-full-observability.yaml +++ /dev/null @@ -1,297 +0,0 @@ -# Example values for eoAPI with full observability stack -# -# This configuration provides comprehensive observability including: -# - Core metrics collection and autoscaling (included in main chart) -# - Persistent Prometheus storage with 30-day retention -# - Advanced HPA policies with both CPU and request-rate scaling -# - Production-ready resource allocations -# - High availability setup with multiple replicas -# -# To deploy the full stack: -# -# 1. Deploy main chart with monitoring: -# helm install eoapi eoapi/eoapi -f values-full-observability.yaml --namespace eoapi --create-namespace -# -# 2. Deploy observability chart separately: -# helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi -# -# 3. Optional: Configure external integrations -# - DataDog: Set up prometheus scraping -# - New Relic: Deploy NR Kubernetes integration -# - External Grafana: Point to the exposed Prometheus service -# -# Monitoring endpoints (if LoadBalancer is used): -# - Prometheus: http://:9090 -# - Grafana: http:// (from observability chart) -# -# Security considerations: -# - Use internal LoadBalancers for Prometheus in production -# - Set up proper RBAC for service accounts -# - Configure network policies to restrict access -# - Enable TLS for all external endpoints -# -# Performance tuning: -# - Monitor actual resource usage and adjust requests/limits -# - Tune HPA scaling policies based on traffic patterns -# - Adjust Prometheus retention based on storage costs -# - Consider using remote storage for Prometheus (S3, GCS, etc.) - -# Git SHA for deployments (set via CI/CD or command line) -gitSha: "latest" - -###################### -# INGRESS -###################### -ingress: - enabled: true - className: "nginx" - # IMPORTANT: Set a proper hostname for metrics collection - host: "eoapi.example.com" # Replace with your domain - tls: - enabled: true - secretName: eoapi-tls - -###################### -# DATABASE -###################### -postgrescluster: - enabled: true - monitoring: true # Enable PostgreSQL monitoring - instances: - - name: eoapi - replicas: 2 # HA setup for production - dataVolumeClaimSpec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "100Gi" - cpu: "2048m" - memory: "8192Mi" - -###################### -# COMPREHENSIVE MONITORING -###################### -monitoring: - # Essential components - metricsServer: - enabled: true - apiService: - create: true - - # Full Prometheus setup with all collectors - prometheus: - enabled: true - # Keep alertmanager disabled - we'll use Grafana alerting instead - alertmanager: - enabled: false - # Enable pushgateway for advanced metrics - prometheus-pushgateway: - enabled: true - # Full metrics collection - kube-state-metrics: - enabled: true - prometheus-node-exporter: - enabled: true - # Production-ready resource allocation - resources: - limits: - cpu: 50m - memory: 64Mi - requests: - cpu: 50m - memory: 64Mi - # Prometheus server configuration - server: - # Expose Prometheus for external access (optional) - service: - type: LoadBalancer - annotations: - service.beta.kubernetes.io/aws-load-balancer-type: "nlb" - service.beta.kubernetes.io/aws-load-balancer-internal: "true" - # Persistent storage for metrics - persistentVolume: - enabled: true - size: 50Gi - storageClass: "gp3" # Adjust for your cloud provider - # Retention and performance settings - retention: "30d" # Keep 30 days of metrics - resources: - limits: - cpu: "2000m" - memory: "4096Mi" - requests: - cpu: "1000m" - memory: "2048Mi" - - # Advanced prometheus-adapter configuration - prometheusAdapter: - enabled: true - # Enhanced resource allocation - resources: - limits: - cpu: 250m - memory: 256Mi - requests: - cpu: 100m - memory: 128Mi - -###################### -# SERVICES WITH ADVANCED AUTOSCALING -###################### - -stac: - enabled: true - autoscaling: - enabled: true - minReplicas: 3 # Higher minimum for HA - maxReplicas: 30 - type: "both" # Scale on both CPU and request rate - behaviour: - scaleDown: - stabilizationWindowSeconds: 600 # 10 minutes - policies: - - type: Percent - value: 50 - periodSeconds: 300 - scaleUp: - stabilizationWindowSeconds: 60 - policies: - - type: Percent - value: 100 - periodSeconds: 60 - targets: - cpu: 70 - requestRate: 40000m - settings: - resources: - limits: - cpu: "1500m" - memory: "3072Mi" - requests: - cpu: "750m" - memory: "1536Mi" - -raster: - enabled: true - autoscaling: - enabled: true - minReplicas: 2 - maxReplicas: 25 - type: "both" - behaviour: - scaleDown: - stabilizationWindowSeconds: 900 # 15 minutes - raster workloads are bursty - scaleUp: - stabilizationWindowSeconds: 120 # 2 minutes - targets: - cpu: 60 # Lower CPU target due to intensive processing - requestRate: 20000m - settings: - resources: - limits: - cpu: "2048m" - memory: "8192Mi" - requests: - cpu: "1024m" - memory: "4096Mi" - envVars: - GDAL_CACHEMAX: "1024" # 1GB cache - WEB_CONCURRENCY: "4" # Conservative for memory usage - GDAL_HTTP_MAX_RETRY: "3" - GDAL_HTTP_RETRY_DELAY: "1" - -vector: - enabled: true - autoscaling: - enabled: true - minReplicas: 2 - maxReplicas: 15 - type: "both" - targets: - cpu: 75 - requestRate: 60000m - settings: - resources: - limits: - cpu: "1200m" - memory: "2560Mi" - requests: - cpu: "600m" - memory: "1280Mi" - -multidim: - enabled: true # Enable for comprehensive setup - autoscaling: - enabled: true - minReplicas: 1 - maxReplicas: 10 - type: "cpu" # CPU-based scaling for multidim workloads - targets: - cpu: 50 # Very conservative due to resource intensity - settings: - resources: - limits: - cpu: "4096m" - memory: "16384Mi" # 16GB for large multidim datasets - requests: - cpu: "2048m" - memory: "8192Mi" - -###################### -# STAC BROWSER -###################### -browser: - enabled: true - replicaCount: 3 # HA setup - -###################### -# PGSTAC BOOTSTRAP -###################### -pgstacBootstrap: - enabled: true - settings: - loadSamples: false # No samples in production - waitConfig: - timeout: 1800 # 30 minutes timeout for large migrations - resources: - requests: - cpu: "1024m" - memory: "2048Mi" - limits: - cpu: "2048m" - memory: "4096Mi" - -###################### -# INTEGRATED OBSERVABILITY -###################### -# Grafana dashboards integrated with main chart (replaces separate eoapi-observability chart) -observability: - grafana: - enabled: true - persistence: - enabled: true - size: 10Gi - service: - type: LoadBalancer - annotations: - service.beta.kubernetes.io/aws-load-balancer-type: "nlb" - service.beta.kubernetes.io/aws-load-balancer-internal: "false" - resources: - limits: - cpu: 100m - memory: 200Mi - requests: - cpu: 50m - memory: 100Mi - -###################### -# ADDITIONAL PRODUCTION SETTINGS -###################### - -# Service account with monitoring permissions -serviceAccount: - create: true - annotations: - # Add cloud provider annotations if needed - # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/eoapi-monitoring-role diff --git a/docs/observability.md b/docs/observability.md index 05b1d757..6436e3d7 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -47,7 +47,7 @@ For production deployments, use configuration files instead of command-line flag helm install eoapi eoapi/eoapi -f values-full-observability.yaml ``` -**For a complete example**: See [examples/values-full-observability.yaml](../examples/values-full-observability.yaml) +**For a complete example**: See [production profile](../charts/eoapi/profiles/production.yaml) ## Architecture & Components