From 2d3f6b0bbce74e23e2ce0a731c5e9e95a693e483 Mon Sep 17 00:00:00 2001
From: Felix Delattre <felix@developmentseed.org>
Date: Mon, 17 Nov 2025 14:24:12 +0100
Subject: [PATCH] Added profile for production.

---
 CHANGELOG.md                                 |   1 +
 charts/eoapi/profiles/README.md              |  30 +-
 charts/eoapi/profiles/production.yaml        | 379 +++++++++++++++++++
 charts/eoapi/samples/cloudevents-sink.yaml   |  27 --
 charts/eoapi/samples/values-with-nginx.yaml  |  21 -
 docs/autoscaling.md                          |   2 +-
 docs/examples/values-autoscaling.yaml        | 208 ----------
 docs/examples/values-full-observability.yaml | 297 ---------------
 docs/observability.md                        |   2 +-
 9 files changed, 409 insertions(+), 558 deletions(-)
 create mode 100644 charts/eoapi/profiles/production.yaml
 delete mode 100644 charts/eoapi/samples/cloudevents-sink.yaml
 delete mode 100644 charts/eoapi/samples/values-with-nginx.yaml
 delete mode 100644 docs/examples/values-autoscaling.yaml
 delete mode 100644 docs/examples/values-full-observability.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 255a9e7e..2a03f2a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Tests for autoscaling [#343](https://github.com/developmentseed/eoapi-k8s/pull/343)
 - Added tests for observability stack [#342](https://github.com/developmentseed/eoapi-k8s/pull/342)
 - Added validation to require `postgrescluster.enabled: false` when using external databases [#346](https://github.com/developmentseed/eoapi-k8s/pull/346)
+- Added a production.yaml profile [#354](https://github.com/developmentseed/eoapi-k8s/pull/354)
 
 ### Changed
 
diff --git a/charts/eoapi/profiles/README.md b/charts/eoapi/profiles/README.md
index d0f8fac6..331198c7 100644
--- a/charts/eoapi/profiles/README.md
+++ b/charts/eoapi/profiles/README.md
@@ -9,7 +9,7 @@ Profiles are pre-configured values files that override the default `values.yaml`
 ## Available Profiles
 
 ### Core Profile (`core.yaml`)
-**Use Case:** Production deployments with stable, well-tested services only.
+**Use Case:** Minimal production deployment with stable services only.
 
 **Includes:**
 - PostgreSQL with PgSTAC
@@ -23,9 +23,30 @@ Profiles are pre-configured values files that override the default `values.yaml`
 - Development tools
 - Monitoring stack
 - STAC Browser UI
+- Autoscaling
 
 **Resources:** Production-optimized with higher resource allocations.
 
+### Production Profile (`production.yaml`)
+**Use Case:** Full production deployment with autoscaling and observability.
+
+**Includes:**
+- All core services
+- High availability PostgreSQL (2 replicas)
+- Autoscaling for all API services
+- Complete monitoring stack (Prometheus)
+- Grafana dashboards for observability
+- STAC Browser UI
+- Custom metrics for request-rate scaling
+
+**Configuration:**
+- Autoscaling enabled (CPU and request-rate based)
+- Persistent storage for metrics (30 days retention)
+- Production-optimized resource allocations
+- TLS enabled by default
+
+**Resources:** High resource allocations optimized for production workloads.
+
 ### Experimental Profile (`experimental.yaml`)
 **Use Case:** Development, testing, and evaluation of all eoAPI features.
 
@@ -69,10 +90,13 @@ Profiles are pre-configured values files that override the default `values.yaml`
 
 Deploy with a single profile:
 ```bash
-# Production deployment with core services only
+# Minimal production deployment
 helm install eoapi ./charts/eoapi -f profiles/core.yaml
 
-# Development deployment with all features
+# Full production with autoscaling and observability
+helm install eoapi ./charts/eoapi -f profiles/production.yaml
+
+# Development deployment with all experimental features
 helm install eoapi ./charts/eoapi -f profiles/experimental.yaml
 ```
 
diff --git a/charts/eoapi/profiles/production.yaml b/charts/eoapi/profiles/production.yaml
new file mode 100644
index 00000000..9acaa8eb
--- /dev/null
+++ b/charts/eoapi/profiles/production.yaml
@@ -0,0 +1,379 @@
+# eoAPI Production Profile
+# Production-ready deployment with autoscaling and observability
+# Includes: All core services, monitoring stack, autoscaling, and observability
+#
+# Usage:
+#   helm install eoapi ./charts/eoapi -f profiles/production.yaml
+#   helm upgrade eoapi ./charts/eoapi -f profiles/production.yaml
+#
+# Prerequisites:
+#   - Configure ingress.host with your domain
+#   - Ensure metrics-server is available in your cluster
+#   - Review and adjust resource allocations based on your workload
+
+######################
+# DATABASE
+######################
+# Production PostgreSQL configuration
+postgresql:
+  type: "postgrescluster"
+
+postgrescluster:
+  enabled: true
+  postgresVersion: 16
+  postGISVersion: "3.4"
+  pgBouncerReplicas: 2  # HA setup
+  monitoring: true      # Enable PostgreSQL metrics
+  instances:
+    - name: eoapi
+      replicas: 2  # High availability
+      dataVolumeClaimSpec:
+        accessModes:
+          - "ReadWriteOnce"
+        resources:
+          requests:
+            storage: "50Gi"  # Production storage
+      resources:
+        requests:
+          cpu: "2048m"
+          memory: "4096Mi"
+        limits:
+          cpu: "4096m"
+          memory: "8192Mi"
+  users:
+    - name: postgres
+      databases:
+        - eoapi
+        - postgres
+      options: "SUPERUSER"
+    - name: eoapi
+      databases:
+        - eoapi
+        - postgres
+      options: "CREATEDB CREATEROLE"
+      password:
+        type: AlphaNumeric
+
+######################
+# PGSTAC BOOTSTRAP
+######################
+pgstacBootstrap:
+  enabled: true
+  settings:
+    loadSamples: false  # No samples in production
+
+    # Production PgSTAC settings
+    pgstacSettings:
+      queue_timeout: "10 minutes"
+      use_queue: "false"
+      update_collection_extent: "true"
+      context: "auto"
+      context_estimated_count: "100000"
+      context_estimated_cost: "100000"
+      context_stats_ttl: "1 day"
+
+    resources:
+      requests:
+        cpu: "1024m"
+        memory: "2048Mi"
+      limits:
+        cpu: "2048m"
+        memory: "4096Mi"
+
+######################
+# MONITORING & METRICS
+######################
+# Essential for autoscaling and observability
+monitoring:
+  metricsServer:
+    enabled: true
+    apiService:
+      create: true
+
+  prometheus:
+    enabled: true
+    alertmanager:
+      enabled: false  # Use Grafana alerting instead
+    prometheus-pushgateway:
+      enabled: false
+    kube-state-metrics:
+      enabled: true
+    prometheus-node-exporter:
+      enabled: true
+      resources:
+        limits:
+          cpu: "50m"
+          memory: "64Mi"
+        requests:
+          cpu: "50m"
+          memory: "64Mi"
+    server:
+      persistentVolume:
+        enabled: true
+        size: 30Gi
+      retention: "15d"
+      resources:
+        limits:
+          cpu: "1000m"
+          memory: "2048Mi"
+        requests:
+          cpu: "500m"
+          memory: "1024Mi"
+      service:
+        type: ClusterIP
+
+# Custom metrics for request-rate based autoscaling
+prometheusAdapter:
+  enabled: true
+  resources:
+    limits:
+      cpu: "200m"
+      memory: "256Mi"
+    requests:
+      cpu: "100m"
+      memory: "128Mi"
+
+######################
+# OBSERVABILITY
+######################
+observability:
+  grafana:
+    enabled: true
+    persistence:
+      enabled: true
+      size: 10Gi
+    service:
+      type: ClusterIP
+    resources:
+      limits:
+        cpu: "200m"
+        memory: "256Mi"
+      requests:
+        cpu: "100m"
+        memory: "128Mi"
+    datasources:
+      datasources.yaml:
+        apiVersion: 1
+        datasources:
+          - name: Prometheus
+            type: prometheus
+            url: "http://{{ .Release.Name }}-prometheus-server"
+            access: proxy
+            isDefault: true
+
+######################
+# API SERVICES WITH AUTOSCALING
+######################
+stac:
+  enabled: true
+  ingress:
+    enabled: true
+    path: "/stac"
+  autoscaling:
+    enabled: true
+    minReplicas: 2      # HA minimum
+    maxReplicas: 20
+    type: "both"        # CPU and request rate
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 300  # 5 minutes
+      scaleUp:
+        stabilizationWindowSeconds: 30
+    targets:
+      cpu: 70
+      requestRate: 50000m  # 50 req/sec per pod
+  settings:
+    resources:
+      requests:
+        cpu: "750m"
+        memory: "1536Mi"
+      limits:
+        cpu: "1500m"
+        memory: "3072Mi"
+    envVars:
+      HOST: "0.0.0.0"
+      PORT: "8080"
+      WEB_CONCURRENCY: "6"
+      STAC_FASTAPI_DEBUG: "False"
+      STAC_FASTAPI_CORS_ORIGINS: '["*"]'
+
+raster:
+  enabled: true
+  ingress:
+    enabled: true
+    path: "/raster"
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 15
+    type: "both"
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 300
+      scaleUp:
+        stabilizationWindowSeconds: 60
+    targets:
+      cpu: 60  # Lower due to resource intensity
+      requestRate: 30000m  # 30 req/sec per pod
+  settings:
+    resources:
+      requests:
+        cpu: "1024m"
+        memory: "3072Mi"
+      limits:
+        cpu: "2048m"
+        memory: "6144Mi"
+    envVars:
+      # GDAL optimization
+      GDAL_CACHEMAX: "512"
+      GDAL_DISABLE_READDIR_ON_OPEN: "EMPTY_DIR"
+      GDAL_INGESTED_BYTES_AT_OPEN: "32768"
+      GDAL_HTTP_MERGE_CONSECUTIVE_RANGES: "YES"
+      GDAL_HTTP_MULTIPLEX: "YES"
+      GDAL_HTTP_VERSION: "2"
+      GDAL_HTTP_MAX_RETRY: "3"
+      GDAL_HTTP_RETRY_DELAY: "1"
+      GDAL_SKIP: "VRT"
+      PYTHONWARNINGS: "ignore"
+      VSI_CACHE: "TRUE"
+      VSI_CACHE_SIZE: "5000000"
+      # Uvicorn settings
+      HOST: "0.0.0.0"
+      PORT: "8080"
+      WEB_CONCURRENCY: "4"
+      TITILER_DEBUG: "False"
+
+vector:
+  enabled: true
+  ingress:
+    enabled: true
+    path: "/vector"
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 10
+    type: "both"
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 240
+      scaleUp:
+        stabilizationWindowSeconds: 45
+    targets:
+      cpu: 75
+      requestRate: 75000m  # 75 req/sec per pod
+  settings:
+    resources:
+      requests:
+        cpu: "512m"
+        memory: "1024Mi"
+      limits:
+        cpu: "1024m"
+        memory: "2048Mi"
+    envVars:
+      TIPG_CATALOG_TTL: "300"
+      TIPG_DEBUG: "False"
+      HOST: "0.0.0.0"
+      PORT: "8080"
+      WEB_CONCURRENCY: "6"
+
+# Optional: Enable if you need multidimensional support
+multidim:
+  enabled: false
+  ingress:
+    enabled: true
+    path: "/multidim"
+  autoscaling:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 5
+    type: "cpu"  # CPU-only due to resource intensity
+    targets:
+      cpu: 50
+  settings:
+    resources:
+      requests:
+        cpu: "2048m"
+        memory: "8192Mi"
+      limits:
+        cpu: "4096m"
+        memory: "16384Mi"
+    envVars:
+      GDAL_CACHEMAX: "1024"
+      GDAL_DISABLE_READDIR_ON_OPEN: "EMPTY_DIR"
+      GDAL_INGESTED_BYTES_AT_OPEN: "32768"
+      GDAL_HTTP_MERGE_CONSECUTIVE_RANGES: "YES"
+      GDAL_HTTP_MULTIPLEX: "YES"
+      GDAL_HTTP_VERSION: "2"
+      PYTHONWARNINGS: "ignore"
+      VSI_CACHE: "TRUE"
+      VSI_CACHE_SIZE: "10000000"
+      HOST: "0.0.0.0"
+      PORT: "8080"
+      WEB_CONCURRENCY: "2"
+
+######################
+# UI COMPONENTS
+######################
+browser:
+  enabled: true
+  replicaCount: 2  # HA setup
+  settings:
+    resources:
+      requests:
+        cpu: "50m"
+        memory: "64Mi"
+      limits:
+        cpu: "100m"
+        memory: "128Mi"
+
+docServer:
+  enabled: true
+
+######################
+# OPTIONAL FEATURES
+######################
+# Disable experimental features in production
+eoapi-notifier:
+  enabled: false
+
+knative:
+  enabled: false
+
+######################
+# INGRESS
+######################
+ingress:
+  enabled: true
+  className: "nginx"
+  pathType: "Prefix"
+  host: "eoapi.example.com"  # CHANGE THIS to your domain
+  tls:
+    enabled: true
+    secretName: eoapi-tls
+    # certManager: true  # Uncomment if using cert-manager
+
+######################
+# SECURITY & RBAC
+######################
+serviceAccount:
+  create: true
+  automount: true
+  annotations: {}
+    # Add cloud provider annotations if needed
+    # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/eoapi-role
+
+######################
+# GENERAL SETTINGS
+######################
+service:
+  port: 8080
+
+# Enable autoscaling globally
+autoscaling:
+  enabled: true
+
+# Connection pooling for better performance
+database:
+  enabled: true
+  connectionPooling:
+    enabled: true
diff --git a/charts/eoapi/samples/cloudevents-sink.yaml b/charts/eoapi/samples/cloudevents-sink.yaml
deleted file mode 100644
index ec9acbe6..00000000
--- a/charts/eoapi/samples/cloudevents-sink.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: serving.knative.dev/v1
-kind: Service
-metadata:
-  name: eoapi-cloudevents-sink
-  namespace: eoapi
-spec:
-  template:
-    metadata:
-      annotations:
-        autoscaling.knative.dev/minScale: "1"
-        autoscaling.knative.dev/maxScale: "1"
-    spec:
-      containers:
-      - name: cloudevents-sink
-        image: gcr.io/knative-samples/helloworld-go
-        ports:
-        - containerPort: 8080
-        env:
-        - name: TARGET
-          value: "CloudEvents Sink"
-        resources:
-          requests:
-            cpu: 100m
-            memory: 128Mi
-          limits:
-            cpu: 200m
-            memory: 256Mi
diff --git a/charts/eoapi/samples/values-with-nginx.yaml b/charts/eoapi/samples/values-with-nginx.yaml
deleted file mode 100644
index fa2100ba..00000000
--- a/charts/eoapi/samples/values-with-nginx.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-ingress:
-  annotations:
-    nginx.ingress.kubernetes.io/rewrite-target: /$2
-  className: nginx
-  enabled: true
-  host: eoapi.local
-  tls:
-    certManager: false
-    enabled: true
-    secretName: eoapi-tls
-namespace: eoapi
-postgrescluster:
-  enabled: true
-  name: pgstac
-resources:
-  limits:
-    cpu: 1000m
-    memory: 1Gi
-  requests:
-    cpu: 200m
-    memory: 256Mi
diff --git a/docs/autoscaling.md b/docs/autoscaling.md
index fc2c2f14..8fe55498 100644
--- a/docs/autoscaling.md
+++ b/docs/autoscaling.md
@@ -169,7 +169,7 @@ vector:
 
 ## Configuration Examples
 
-For complete configuration examples, see the [examples directory](../examples/).
+For complete configuration examples, see the [production profile](../charts/eoapi/profiles/production.yaml).
 
 ## Resource Requirements
 
diff --git a/docs/examples/values-autoscaling.yaml b/docs/examples/values-autoscaling.yaml
deleted file mode 100644
index e971946e..00000000
--- a/docs/examples/values-autoscaling.yaml
+++ /dev/null
@@ -1,208 +0,0 @@
-# Example values for eoAPI with core monitoring and autoscaling enabled
-#
-# To use this configuration:
-#
-# 1. Update the ingress.host to your actual domain
-# 2. Adjust scaling targets based on your load testing results
-# 3. Monitor resource usage and adjust requests/limits accordingly
-# 4. Consider enabling TLS for production deployments
-#
-# IMPORTANT: This configuration enables monitoring components that are
-# disabled by default. This is required for autoscaling to work.
-#
-# For observability and dashboards, install the separate eoapi-observability chart:
-# helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi
-#
-# Load testing recommendations:
-# - Test each service endpoint individually
-# - Monitor HPA metrics: kubectl get hpa -n eoapi -w
-# - Check custom metrics: kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1"
-# - Review Prometheus targets to ensure metrics collection is working
-
-gitSha: "latest"
-
-######################
-# INGRESS
-######################
-ingress:
-  enabled: true
-  className: "nginx"
-  # IMPORTANT: Set a proper hostname for metrics collection
-  # nginx ingress controller requires a specific host (not wildcard) to expose metrics
-  host: "your-eoapi.example.com"  # Replace with your domain
-  tls:
-    enabled: true
-    secretName: eoapi-tls
-
-######################
-# DATABASE
-######################
-# Using default PostgreSQL cluster configuration
-postgrescluster:
-  enabled: true
-  instances:
-  - name: eoapi
-    replicas: 1
-    dataVolumeClaimSpec:
-      accessModes:
-      - "ReadWriteOnce"
-      resources:
-        requests:
-          storage: "50Gi"  # Increased for production workloads
-          cpu: "2048m"     # More CPU for database under load
-          memory: "4096Mi" # More memory for database performance
-
-######################
-# MONITORING & AUTOSCALING
-######################
-# Essential monitoring components for autoscaling
-monitoring:
-  metricsServer:
-    enabled: true
-    apiService:
-      create: true
-  prometheus:
-    enabled: true
-    alertmanager:
-      enabled: false
-    prometheus-pushgateway:
-      enabled: false
-    kube-state-metrics:
-      enabled: true
-    prometheus-node-exporter:
-      enabled: true
-      resources:
-        limits:
-          cpu: 10m
-          memory: 30Mi
-        requests:
-          cpu: 10m
-          memory: 30Mi
-    server:
-      service:
-        type: ClusterIP
-
-# Custom metrics for request-rate based autoscaling
-prometheusAdapter:
-  enabled: true
-
-######################
-# SERVICE CONFIGURATION WITH AUTOSCALING
-######################
-
-# STAC API Service
-stac:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 2      # Start with 2 replicas for availability
-    maxReplicas: 20     # Scale up to handle high loads
-    type: "requestRate" # Scale based on request rate
-    behavior:
-      scaleDown:
-        stabilizationWindowSeconds: 300  # Wait 5 minutes before scaling down
-      scaleUp:
-        stabilizationWindowSeconds: 30   # Scale up quickly (30 seconds)
-    targets:
-      requestRate: 50000m  # Scale when average > 50 requests/second
-  settings:
-    resources:
-      limits:
-        cpu: "1000m"
-        memory: "2048Mi"
-      requests:
-        cpu: "500m"      # Higher baseline for autoscaling
-        memory: "1024Mi"
-
-# Raster Service (TiTiler)
-raster:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 1
-    maxReplicas: 15
-    type: "requestRate"
-    behavior:
-      scaleDown:
-        stabilizationWindowSeconds: 180  # Scale down slower for raster (3 min)
-      scaleUp:
-        stabilizationWindowSeconds: 60   # Scale up moderately fast
-    targets:
-      requestRate: 30000m  # Scale when average > 30 requests/second (raster is more resource intensive)
-  settings:
-    resources:
-      limits:
-        cpu: "1536m"     # Raster processing needs more CPU
-        memory: "6144Mi" # Raster processing needs more memory
-      requests:
-        cpu: "768m"
-        memory: "3072Mi"
-    envVars:
-      # Optimized GDAL settings for autoscaling
-      GDAL_CACHEMAX: "512"  # Increased cache for better performance
-      WEB_CONCURRENCY: "8"  # More workers for higher throughput
-
-# Vector Service (TIPG)
-vector:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 1
-    maxReplicas: 10
-    type: "requestRate"
-    behavior:
-      scaleDown:
-        stabilizationWindowSeconds: 240
-      scaleUp:
-        stabilizationWindowSeconds: 45
-    targets:
-      requestRate: 75000m  # Vector is typically lighter, can handle more requests
-  settings:
-    resources:
-      limits:
-        cpu: "1000m"
-        memory: "2048Mi"
-      requests:
-        cpu: "512m"
-        memory: "1024Mi"
-
-# Multidimensional Service (optional)
-multidim:
-  enabled: false  # Disabled by default
-  autoscaling:
-    enabled: true
-    minReplicas: 1
-    maxReplicas: 8
-    type: "requestRate"
-    targets:
-      requestRate: 25000m  # Conservative scaling for multidim
-  settings:
-    resources:
-      limits:
-        cpu: "2048m"     # Multidim can be very CPU intensive
-        memory: "8192Mi" # Large memory requirements for multidim data
-      requests:
-        cpu: "1024m"
-        memory: "4096Mi"
-
-######################
-# STAC BROWSER
-######################
-browser:
-  enabled: true
-  replicaCount: 2  # Static replicas (browser is just static files)
-
-######################
-# PGSTAC BOOTSTRAP
-######################
-pgstacBootstrap:
-  enabled: true
-  settings:
-    loadSamples: false  # Disable sample data for production
-    resources:
-      requests:
-        cpu: "1024m"
-        memory: "2048Mi"
-      limits:
-        cpu: "1024m"
-        memory: "2048Mi"
diff --git a/docs/examples/values-full-observability.yaml b/docs/examples/values-full-observability.yaml
deleted file mode 100644
index da0cf202..00000000
--- a/docs/examples/values-full-observability.yaml
+++ /dev/null
@@ -1,297 +0,0 @@
-# Example values for eoAPI with full observability stack
-#
-# This configuration provides comprehensive observability including:
-# - Core metrics collection and autoscaling (included in main chart)
-# - Persistent Prometheus storage with 30-day retention
-# - Advanced HPA policies with both CPU and request-rate scaling
-# - Production-ready resource allocations
-# - High availability setup with multiple replicas
-#
-# To deploy the full stack:
-#
-# 1. Deploy main chart with monitoring:
-#    helm install eoapi eoapi/eoapi -f values-full-observability.yaml --namespace eoapi --create-namespace
-#
-# 2. Deploy observability chart separately:
-#    helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi
-#
-# 3. Optional: Configure external integrations
-#    - DataDog: Set up prometheus scraping
-#    - New Relic: Deploy NR Kubernetes integration
-#    - External Grafana: Point to the exposed Prometheus service
-#
-# Monitoring endpoints (if LoadBalancer is used):
-# - Prometheus: http://<prometheus-lb-ip>:9090
-# - Grafana: http://<grafana-lb-ip> (from observability chart)
-#
-# Security considerations:
-# - Use internal LoadBalancers for Prometheus in production
-# - Set up proper RBAC for service accounts
-# - Configure network policies to restrict access
-# - Enable TLS for all external endpoints
-#
-# Performance tuning:
-# - Monitor actual resource usage and adjust requests/limits
-# - Tune HPA scaling policies based on traffic patterns
-# - Adjust Prometheus retention based on storage costs
-# - Consider using remote storage for Prometheus (S3, GCS, etc.)
-
-# Git SHA for deployments (set via CI/CD or command line)
-gitSha: "latest"
-
-######################
-# INGRESS
-######################
-ingress:
-  enabled: true
-  className: "nginx"
-  # IMPORTANT: Set a proper hostname for metrics collection
-  host: "eoapi.example.com"  # Replace with your domain
-  tls:
-    enabled: true
-    secretName: eoapi-tls
-
-######################
-# DATABASE
-######################
-postgrescluster:
-  enabled: true
-  monitoring: true  # Enable PostgreSQL monitoring
-  instances:
-  - name: eoapi
-    replicas: 2  # HA setup for production
-    dataVolumeClaimSpec:
-      accessModes:
-      - "ReadWriteOnce"
-      resources:
-        requests:
-          storage: "100Gi"
-          cpu: "2048m"
-          memory: "8192Mi"
-
-######################
-# COMPREHENSIVE MONITORING
-######################
-monitoring:
-  # Essential components
-  metricsServer:
-    enabled: true
-    apiService:
-      create: true
-
-  # Full Prometheus setup with all collectors
-  prometheus:
-    enabled: true
-    # Keep alertmanager disabled - we'll use Grafana alerting instead
-    alertmanager:
-      enabled: false
-    # Enable pushgateway for advanced metrics
-    prometheus-pushgateway:
-      enabled: true
-    # Full metrics collection
-    kube-state-metrics:
-      enabled: true
-    prometheus-node-exporter:
-      enabled: true
-      # Production-ready resource allocation
-      resources:
-        limits:
-          cpu: 50m
-          memory: 64Mi
-        requests:
-          cpu: 50m
-          memory: 64Mi
-    # Prometheus server configuration
-    server:
-      # Expose Prometheus for external access (optional)
-      service:
-        type: LoadBalancer
-        annotations:
-          service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
-          service.beta.kubernetes.io/aws-load-balancer-internal: "true"
-      # Persistent storage for metrics
-      persistentVolume:
-        enabled: true
-        size: 50Gi
-        storageClass: "gp3"  # Adjust for your cloud provider
-      # Retention and performance settings
-      retention: "30d"  # Keep 30 days of metrics
-      resources:
-        limits:
-          cpu: "2000m"
-          memory: "4096Mi"
-        requests:
-          cpu: "1000m"
-          memory: "2048Mi"
-
-  # Advanced prometheus-adapter configuration
-  prometheusAdapter:
-    enabled: true
-    # Enhanced resource allocation
-    resources:
-      limits:
-        cpu: 250m
-        memory: 256Mi
-      requests:
-        cpu: 100m
-        memory: 128Mi
-
-######################
-# SERVICES WITH ADVANCED AUTOSCALING
-######################
-
-stac:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 3  # Higher minimum for HA
-    maxReplicas: 30
-    type: "both"    # Scale on both CPU and request rate
-    behaviour:
-      scaleDown:
-        stabilizationWindowSeconds: 600  # 10 minutes
-        policies:
-        - type: Percent
-          value: 50
-          periodSeconds: 300
-      scaleUp:
-        stabilizationWindowSeconds: 60
-        policies:
-        - type: Percent
-          value: 100
-          periodSeconds: 60
-    targets:
-      cpu: 70
-      requestRate: 40000m
-  settings:
-    resources:
-      limits:
-        cpu: "1500m"
-        memory: "3072Mi"
-      requests:
-        cpu: "750m"
-        memory: "1536Mi"
-
-raster:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 2
-    maxReplicas: 25
-    type: "both"
-    behaviour:
-      scaleDown:
-        stabilizationWindowSeconds: 900  # 15 minutes - raster workloads are bursty
-      scaleUp:
-        stabilizationWindowSeconds: 120  # 2 minutes
-    targets:
-      cpu: 60  # Lower CPU target due to intensive processing
-      requestRate: 20000m
-  settings:
-    resources:
-      limits:
-        cpu: "2048m"
-        memory: "8192Mi"
-      requests:
-        cpu: "1024m"
-        memory: "4096Mi"
-    envVars:
-      GDAL_CACHEMAX: "1024"  # 1GB cache
-      WEB_CONCURRENCY: "4"   # Conservative for memory usage
-      GDAL_HTTP_MAX_RETRY: "3"
-      GDAL_HTTP_RETRY_DELAY: "1"
-
-vector:
-  enabled: true
-  autoscaling:
-    enabled: true
-    minReplicas: 2
-    maxReplicas: 15
-    type: "both"
-    targets:
-      cpu: 75
-      requestRate: 60000m
-  settings:
-    resources:
-      limits:
-        cpu: "1200m"
-        memory: "2560Mi"
-      requests:
-        cpu: "600m"
-        memory: "1280Mi"
-
-multidim:
-  enabled: true  # Enable for comprehensive setup
-  autoscaling:
-    enabled: true
-    minReplicas: 1
-    maxReplicas: 10
-    type: "cpu"  # CPU-based scaling for multidim workloads
-    targets:
-      cpu: 50  # Very conservative due to resource intensity
-  settings:
-    resources:
-      limits:
-        cpu: "4096m"
-        memory: "16384Mi"  # 16GB for large multidim datasets
-      requests:
-        cpu: "2048m"
-        memory: "8192Mi"
-
-######################
-# STAC BROWSER
-######################
-browser:
-  enabled: true
-  replicaCount: 3  # HA setup
-
-######################
-# PGSTAC BOOTSTRAP
-######################
-pgstacBootstrap:
-  enabled: true
-  settings:
-    loadSamples: false  # No samples in production
-  waitConfig:
-    timeout: 1800  # 30 minutes timeout for large migrations
-  resources:
-    requests:
-      cpu: "1024m"
-      memory: "2048Mi"
-    limits:
-      cpu: "2048m"
-      memory: "4096Mi"
-
-######################
-# INTEGRATED OBSERVABILITY
-######################
-# Grafana dashboards integrated with main chart (replaces separate eoapi-observability chart)
-observability:
-  grafana:
-    enabled: true
-    persistence:
-      enabled: true
-      size: 10Gi
-    service:
-      type: LoadBalancer
-      annotations:
-        service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
-        service.beta.kubernetes.io/aws-load-balancer-internal: "false"
-    resources:
-      limits:
-        cpu: 100m
-        memory: 200Mi
-      requests:
-        cpu: 50m
-        memory: 100Mi
-
-######################
-# ADDITIONAL PRODUCTION SETTINGS
-######################
-
-# Service account with monitoring permissions
-serviceAccount:
-  create: true
-  annotations:
-  # Add cloud provider annotations if needed
-  # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/eoapi-monitoring-role
diff --git a/docs/observability.md b/docs/observability.md
index 05b1d757..6436e3d7 100644
--- a/docs/observability.md
+++ b/docs/observability.md
@@ -47,7 +47,7 @@ For production deployments, use configuration files instead of command-line flag
 helm install eoapi eoapi/eoapi -f values-full-observability.yaml
 ```
 
-**For a complete example**: See [examples/values-full-observability.yaml](../examples/values-full-observability.yaml)
+**For a complete example**: See [production profile](../charts/eoapi/profiles/production.yaml)
 
 ## Architecture & Components