In [None]:
example_etl = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "ETL Job Schema",
  "definitions": {
    "EnvVarRef": {
      "type": "object",
      "required": ["env"],
      "properties": {
        "env": { "type": "string" },
        "default": { "type": "string" }
      }
    },
    "SourceType": {
      "type": "string",
      "enum": ["postgres", "mysql", "csv", "s3", "api"]
    },
    "TransformationType": {
      "type": "string",
      "enum": ["map", "filter", "aggregate", "join", "window"]
    },
    "ConnectionConfig": {
      "type": "object",
      "required": ["host", "port", "username", "password"],
      "properties": {
        "host": { "type": "string" },
        "port": { "type": "integer" },
        "username": { "type": "string" },
        "password": {
          "oneOf": [
            { "type": "string" },
            { "$ref": "#/definitions/EnvVarRef" }
          ]
        },
        "database": { "type": "string" },
        "ssl_mode": {
          "type": "string",
          "default": "prefer"
        }
      }
    },
    "S3Config": {
      "type": "object",
      "required": ["bucket", "key", "region"],
      "properties": {
        "bucket": { "type": "string" },
        "key": { "type": "string" },
        "region": { "type": "string" },
        "aws_access_key_id": {
          "oneOf": [
            { "type": "string" },
            { "$ref": "#/definitions/EnvVarRef" }
          ]
        },
        "aws_secret_access_key": {
          "oneOf": [
            { "type": "string" },
            { "$ref": "#/definitions/EnvVarRef" }
          ]
        }
      }
    },
    "Source": {
      "type": "object",
      "required": ["name", "type", "connection", "schema"],
      "properties": {
        "name": { "type": "string" },
        "type": { "$ref": "#/definitions/SourceType" },
        "connection": {
          "oneOf": [
            { "$ref": "#/definitions/ConnectionConfig" },
            { "$ref": "#/definitions/S3Config" }
          ]
        },
        "query": { "type": "string" },
        "schema": {
          "type": "object",
          "additionalProperties": { "type": "string" }
        }
      }
    },
    "Transform": {
      "type": "object",
      "required": ["name", "type", "config", "dependencies"],
      "properties": {
        "name": { "type": "string" },
        "type": { "$ref": "#/definitions/TransformationType" },
        "config": {
          "type": "object",
          "additionalProperties": True
        },
        "dependencies": {
          "type": "array",
          "items": { "type": "string" }
        }
      }
    },
    "Sink": {
      "type": "object",
      "required": ["name", "type", "connection"],
      "properties": {
        "name": { "type": "string" },
        "type": { "$ref": "#/definitions/SourceType" },
        "connection": {
          "oneOf": [
            { "$ref": "#/definitions/ConnectionConfig" },
            { "$ref": "#/definitions/S3Config" }
          ]
        },
        "table": { "type": "string" },
        "mode": {
          "type": "string",
          "default": "append",
          "enum": ["append", "overwrite", "merge"]
        }
      }
    },
    "Schedule": {
      "type": "object",
      "required": ["cron"],
      "properties": {
        "cron": { "type": "string" },
        "timezone": {
          "type": "string",
          "default": "UTC"
        },
        "start_date": {
          "type": "string",
          "format": "date-time"
        },
        "end_date": {
          "type": "string",
          "format": "date-time"
        }
      }
    }
  },
  "type": "object",
  "required": ["name", "owner", "sources", "transforms", "sink", "schedule"],
  "properties": {
    "name": { "type": "string" },
    "description": { "type": "string" },
    "owner": { "type": "string" },
    "sources": {
      "type": "array",
      "items": { "$ref": "#/definitions/Source" }
    },
    "transforms": {
      "type": "array",
      "items": { "$ref": "#/definitions/Transform" }
    },
    "sink": { "$ref": "#/definitions/Sink" },
    "schedule": { "$ref": "#/definitions/Schedule" },
    "tags": {
      "type": "object",
      "additionalProperties": { "type": "string" },
      "default": {}
    }
  }
}


: 

In [None]:
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Data Warehouse Configuration",
  "description": "Schema for configuring a complete data warehouse deployment",
  "type": "object",
  "required": ["services"],
  "definitions": {
    "resourceRequirements": {
      "type": "object",
      "properties": {
        "requests": {
          "type": "object",
          "properties": {
            "cpu": {
              "type": "string",
              "pattern": "^\\d+m?$",
              "description": "CPU request in millicores (e.g., '200m') or cores (e.g., '1')"
            },
            "memory": {
              "type": "string",
              "pattern": "^\\d+[KMGTPEZYkmgtpezy]i?$",
              "description": "Memory request (e.g., '128Mi', '1Gi')"
            }
          },
          "required": ["cpu", "memory"]
        },
        "limits": {
          "type": "object",
          "properties": {
            "cpu": {
              "type": "string",
              "pattern": "^\\d+m?$"
            },
            "memory": {
              "type": "string",
              "pattern": "^\\d+[KMGTPEZYkmgtpezy]i?$"
            }
          }
        }
      }
    },
    "serviceConfig": {
      "type": "object",
      "required": ["service_name", "resources"],
      "properties": {
        "service_name": {
          "type": "string",
          "enum": ["airflow", "kafka", "bigquery", "spark", "metastore", "superset"]
        },
        "resources": { "$ref": "#/definitions/resourceRequirements" },
        "version": {
          "type": "string",
          "description": "Service version"
        },
        "config": {
          "type": "object",
          "description": "Service-specific configuration"
        },
        "enabled": {
          "type": "boolean",
          "default": true,
          "description": "Whether this service should be deployed"
        }
      }
    }
  },
  "properties": {
    "services": {
      "type": "array",
      "items": { "$ref": "#/definitions/serviceConfig" },
      "minItems": 1,
      "description": "List of data warehouse services to deploy"
    },
    "image": {
      "type": "object",
      "required": ["repository", "tag"],
      "properties": {
        "repository": {
          "type": "string",
          "description": "Docker image repository"
        },
        "tag": {
          "type": "string",
          "description": "Docker image tag"
        },
        "pullPolicy": {
          "type": "string",
          "enum": ["Always", "Never", "IfNotPresent"],
          "default": "IfNotPresent"
        }
      }
    },
    "service": {
      "type": "object",
      "properties": {
        "ports": {
          "type": "array",
          "items": {
            "type": "integer",
            "minimum": 1,
            "maximum": 65535
          },
          "uniqueItems": true
        },
        "type": {
          "type": "string",
          "enum": ["ClusterIP", "NodePort", "LoadBalancer"],
          "default": "ClusterIP"
        }
      }
    },
    "replicaCount": {
      "type": "integer",
      "minimum": 1,
      "default": 1,
      "description": "Number of replicas for the deployment"
    },
    "resources": { "$ref": "#/definitions/resourceRequirements" },
    "ingress": {
      "type": "object",
      "properties": {
        "enabled": {
          "type": "boolean",
          "default": false
        },
        "external": {
          "type": "boolean",
          "default": false
        },
        "annotations": {
          "type": "object",
          "additionalProperties": {
            "type": "string"
          }
        },
        "hosts": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "hostname"
          }
        },
        "tls": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "secretName": {
                "type": "string"
              },
              "hosts": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "hostname"
                }
              }
            }
          }
        }
      }
    },
    "environment": {
      "type": "string",
      "enum": ["development", "staging", "production"],
      "default": "development"
    },
    "monitoring": {
      "type": "object",
      "properties": {
        "enabled": {
          "type": "boolean",
          "default": true
        },
        "prometheus": {
          "type": "object",
          "properties": {
            "enabled": {
              "type": "boolean",
              "default": true
            },
            "scrapeInterval": {
              "type": "string",
              "pattern": "^\\d+[smh]$",
              "default": "30s"
            }
          }
        },
        "grafana": {
          "type": "object",
          "properties": {
            "enabled": {
              "type": "boolean",
              "default": true
            },
            "dashboards": {
              "type": "array",
              "items": {
                "type": "string"
              }
            }
          }
        }
      }
    },
    "security": {
      "type": "object",
      "properties": {
        "authentication": {
          "type": "object",
          "properties": {
            "type": {
              "type": "string",
              "enum": ["basic", "oauth2", "ldap"],
              "default": "basic"
            },
            "oauth2": {
              "type": "object",
              "properties": {
                "provider": {
                  "type": "string",
                  "enum": ["google", "github", "okta"]
                },
                "clientId": {
                  "type": "string"
                },
                "clientSecret": {
                  "type": "string"
                }
              }
            }
          }
        },
        "encryption": {
          "type": "object",
          "properties": {
            "enabled": {
              "type": "boolean",
              "default": true
            },
            "atRest": {
              "type": "boolean",
              "default": true
            },
            "inTransit": {
              "type": "boolean",
              "default": true
            }
          }
        }
      }
    }
  }
}