diff --git a/modules/aws/aws-infra/README.md b/modules/aws/aws-infra/README.md new file mode 100644 index 0000000..a1c3bd2 --- /dev/null +++ b/modules/aws/aws-infra/README.md @@ -0,0 +1,422 @@ +# AWS Infrastructure Module for Databricks + +A comprehensive, production-ready AWS infrastructure module that provides all necessary resources for Databricks workloads using official AWS Terraform modules and best practices. + +## Overview + +This module creates a complete AWS infrastructure foundation optimized for Databricks, featuring: + +- **🔧 Simplified Configuration**: Uses official `terraform-aws-modules/vpc` for networking +- **🔒 Secure Storage**: S3 buckets with encryption for workspace and Unity Catalog +- **👤 IAM Integration**: Cross-account and Unity Catalog roles with Databricks-generated policies +- **🔗 VPC Endpoints**: Private access to AWS services (S3, STS, Kinesis) +- **🛡️ Network Firewall**: Configurable FQDN and network-based filtering (optional) +- **🌐 Hub-Spoke Architecture**: Transit Gateway with centralized internet egress (optional) +- **🔐 Private Link**: Databricks Private Link endpoints (optional) + +## Architecture + +### Basic Architecture + +``` +┌─────────────────────────────────────────────┐ +│ VPC (10.0.0.0/16) │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Private Sub │ │ Private Sub │ │ +│ │ (AZ-a) │ │ (AZ-b) │ │ +│ │ Databricks │ │ Databricks │ │ +│ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ +│ └─────────┬────────┘ │ +│ │ │ +│ ┌─────────▼─────────┐ │ +│ │ NAT Gateway │ │ +│ │ (Public Subnet) │ │ +│ └─────────┬─────────┘ │ +│ │ │ +│ ┌─────────▼─────────┐ │ +│ │ Internet Gateway │ │ +│ └───────────────────┘ │ +└─────────────────────────────────────────────┘ + │ + ▼ + Internet +``` + +### Hub-Spoke Architecture with Firewall + +``` +┌──────────────────────────────────────────────────┐ +│ Spoke VPC (Databricks - 10.0.0.0/16) │ +│ Private Subnets │ +└────────────────┬─────────────────────────────────┘ + │ Transit Gateway + ▼ +┌──────────────────────────────────────────────────┐ +│ Hub VPC (10.1.0.0/16) │ +│ │ +│ Private Subnet → Network Firewall → NAT → IGW │ +│ (TGW attach) (Inspection) │ +└──────────────────────────────────────────────────┘ + │ + ▼ + Internet +``` + +## Module Components + +### Core Components (Always Created) +- **networking.tf** - VPC, subnets, security groups, NAT gateway (via AWS VPC module) +- **workspacestorage.tf** - Root S3 bucket for Databricks workspace +- **ucstorage.tf** - Unity Catalog S3 buckets (metastore & data) +- **iam.tf** - IAM roles (cross-account, Unity Catalog, optional instance profiles) +- **vpc-endpoints.tf** - VPC endpoints (S3, STS, Kinesis) via AWS module + +### Conditional Components +- **private-link.tf** - Databricks Private Link (when `enable_private_link = true`) + +### Submodules +- **modules/hub-networking/** - Transit Gateway, Hub VPC, and Network Firewall (when `hub_spoke_architecture = true`) + +## Usage Examples + +### Minimal Configuration + +```hcl +module "databricks_infra" { + source = "./modules/aws/aws-infra" + + prefix = "my-databricks" + region = "us-west-2" + + networking = { + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-west-2a", "us-west-2b"] + enable_nat_gateway = true + } + + databricks_account_id = "414351767826" # Databricks AWS account + + tags = { + Environment = "production" + } +} +``` + +### With Hub-Spoke and Network Firewall + +```hcl +module "databricks_infra" { + source = "./modules/aws/aws-infra" + + prefix = "my-databricks" + region = "us-west-2" + + networking = { + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-west-2a", "us-west-2b"] + enable_nat_gateway = true + } + + databricks_account_id = "414351767826" + + # Hub-Spoke Architecture with Firewall + advanced_networking = { + hub_spoke_architecture = true + enable_transit_gateway = true + hub_vpc_cidr = "10.1.0.0/16" + } + + # Network Firewall Configuration + security = { + enable_network_firewall = true + + # Allow specific domains + allowed_fqdns = [ + "*.cloud.databricks.com", + "*.s3.us-west-2.amazonaws.com", + "pypi.org", + "*.pypi.org", + "github.com" + ] + + # Allow specific network rules + allowed_network_rules = [ + { + protocol = "TCP" + source_ip = "$HOME_NET" + destination_ip = "ANY" + destination_port = "443" + }, + { + protocol = "UDP" + source_ip = "$HOME_NET" + destination_ip = "ANY" + destination_port = "53" + } + ] + } + + tags = { + Environment = "production" + } +} +``` + +### With Private Link + +```hcl +module "databricks_infra" { + source = "./modules/aws/aws-infra" + + prefix = "my-databricks" + region = "us-west-2" + + networking = { + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-west-2a", "us-west-2b"] + enable_nat_gateway = false # Not needed with Private Link + } + + databricks_account_id = "414351767826" + + # Private Link Configuration + security = { + enable_private_link = true + backend_service_name = "com.amazonaws.vpce.us-west-2.vpce-svc-0158114c0c730c3bb" + relay_service_name = "com.amazonaws.vpce.us-west-2.vpce-svc-0dc0e98e4e8a7d1f9" + } + + tags = { + Environment = "production" + } +} +``` + +### With Unity Catalog + +```hcl +module "databricks_infra" { + source = "./modules/aws/aws-infra" + + prefix = "my-databricks" + region = "us-west-2" + + networking = { + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-west-2a", "us-west-2b"] + enable_nat_gateway = true + } + + databricks_account_id = "414351767826" + + # Unity Catalog Configuration + create_metastore_bucket = true + unity_catalog_account_id = "414351767826" + external_id = "12345678-1234-1234-1234-123456789abc" + + tags = { + Environment = "production" + } +} +``` + +## Inputs + +### Core Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `prefix` | Prefix for all AWS resources | `string` | - | yes | +| `region` | AWS region for resource deployment | `string` | - | yes | +| `tags` | Common tags for all resources | `map(string)` | `{}` | no | + +### Networking Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `networking.vpc_cidr` | VPC CIDR block | `string` | - | yes | +| `networking.availability_zones` | List of availability zones | `list(string)` | `[]` (auto-detect) | no | +| `networking.enable_nat_gateway` | Enable NAT Gateway for private subnets | `bool` | `true` | no | +| `networking.private_subnet_cidrs` | Custom private subnet CIDRs | `list(string)` | `[]` (auto-calculated) | no | +| `networking.public_subnet_cidrs` | Custom public subnet CIDRs | `list(string)` | `[]` (auto-calculated) | no | + +### Storage Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `create_metastore_bucket` | Create Unity Catalog metastore bucket | `bool` | `false` | no | + +### IAM Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `create_instance_profiles` | Create IAM instance profiles for Databricks clusters | `bool` | `false` | no | +| `databricks_account_id` | Databricks AWS account ID for cross-account role | `string` | `null` | yes | +| `external_id` | External ID for Unity Catalog role trust relationship | `string` | `null` | no | +| `unity_catalog_account_id` | Unity Catalog AWS account ID | `string` | `null` | no | +| `roles_to_assume` | Additional IAM role ARNs for cross-account role to assume | `list(string)` | `[]` | no | + +### Security Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `security.enable_network_firewall` | Enable Network Firewall | `bool` | `false` | no | +| `security.allowed_fqdns` | List of FQDNs to allow through firewall | `list(string)` | `[]` | no | +| `security.allowed_network_rules` | List of network rules (IP, protocol, port) | `list(object)` | `[]` | no | +| `security.enable_private_link` | Enable Databricks Private Link | `bool` | `false` | no | +| `security.backend_service_name` | Backend Private Link service name | `string` | `null` | no | +| `security.relay_service_name` | Relay Private Link service name | `string` | `null` | no | + +### Advanced Networking Configuration + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `advanced_networking.enable_transit_gateway` | Enable Transit Gateway | `bool` | `false` | no | +| `advanced_networking.hub_spoke_architecture` | Enable hub-spoke architecture | `bool` | `false` | no | +| `advanced_networking.hub_vpc_cidr` | CIDR block for hub VPC | `string` | `null` | conditional | + +## Outputs + +| Name | Description | +|------|-------------| +| `vpc_id` | ID of the Spoke VPC | +| `root_bucket_name` | Name of the root storage bucket | +| `metastore_bucket_name` | Name of the Unity Catalog metastore bucket (if created) | +| `data_bucket_name` | Name of the Unity Catalog data bucket | +| `cross_account_role_arn` | ARN of the cross-account IAM role for Databricks | +| `cross_account_role_name` | Name of the cross-account IAM role | +| `unity_catalog_role_arn` | ARN of the Unity Catalog IAM role | +| `unity_catalog_role_name` | Name of the Unity Catalog IAM role | + +## Network Firewall Rules + +### FQDN Rules +The firewall uses domain-based filtering to allow/deny traffic based on FQDNs. Pass your allowed domains via `security.allowed_fqdns`: + +```hcl +allowed_fqdns = [ + "*.cloud.databricks.com", + "*.s3.us-west-2.amazonaws.com", + "pypi.org", + "*.pypi.org", + "files.pythonhosted.org", + "repo1.maven.org", + "github.com" +] +``` + +### Network Rules +For IP/Protocol/Port-based rules, use `security.allowed_network_rules`: + +```hcl +allowed_network_rules = [ + { + protocol = "TCP" + source_ip = "$HOME_NET" + destination_ip = "ANY" + destination_port = "443" + }, + { + protocol = "UDP" + source_ip = "$HOME_NET" + destination_ip = "ANY" + destination_port = "53" + } +] +``` + +### Default Deny +The firewall includes a default deny rule at the lowest priority. Only explicitly allowed traffic passes through. + +## Traffic Flow + +### Hub-Spoke with Firewall + +1. **Spoke VPC Private Subnet** → Route to Hub VPC via Transit Gateway +2. **Transit Gateway** → Forward to Hub VPC Private Subnet +3. **Hub Private Subnet** → Route to NAT Gateway +4. **NAT Gateway** → Performs SNAT +5. **Hub Public Subnet** → Route to Firewall (if enabled) or IGW +6. **Network Firewall** → Inspect traffic (FQDN, IP, Port rules) +7. **Firewall Subnet** → Route to Internet Gateway +8. **Internet Gateway** → Forward to internet + +## Module Dependencies + +This module uses the following official AWS Terraform modules: + +- **[terraform-aws-modules/vpc/aws](https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws)** (~> 5.0) + - VPC, subnets, NAT Gateway, Internet Gateway, route tables +- **[terraform-aws-modules/vpc/aws//modules/vpc-endpoints](https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws)** (~> 5.0) + - VPC endpoints for S3, STS, Kinesis + +## Provider Requirements + +```hcl +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.57.0" + } + databricks = { + source = "databricks/databricks" + version = ">= 1.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.9.0" + } + } +} +``` + +## Best Practices + +### Security +- ✅ Use Private Link for maximum security and reduced data egress costs +- ✅ Enable Network Firewall with allowlist-based FQDN rules +- ✅ Use Unity Catalog IAM roles with least privilege +- ✅ Enable VPC endpoints for S3, STS, and Kinesis + +### Networking +- ✅ Use hub-spoke architecture for centralized internet egress and inspection +- ✅ Deploy NAT Gateway for private subnet internet access +- ✅ Use multiple availability zones for high availability +- ✅ Implement proper subnet sizing for growth + +### Cost Optimization +- ✅ Use single NAT Gateway (default) instead of per-AZ for dev/test +- ✅ Consider Private Link to reduce data egress costs +- ✅ Use VPC endpoints to avoid internet gateway data transfer charges + +## Troubleshooting + +### Common Issues + +**Issue**: Terraform validation fails with "Reference to undeclared resource" +- **Solution**: Run `terraform init -upgrade` to download required modules + +**Issue**: Network Firewall blocks Databricks traffic +- **Solution**: Ensure `allowed_fqdns` includes `*.cloud.databricks.com` and required AWS services + +**Issue**: Unity Catalog role trust relationship fails +- **Solution**: Verify `external_id` matches your Databricks Unity Catalog configuration + +**Issue**: Private Link endpoints not accessible +- **Solution**: Check security group rules allow traffic from Databricks subnets on ports 443, 5432, 8443-8451 + +## Support + +For issues, questions, or contributions: +- Open an issue in the repository +- Refer to [Databricks AWS documentation](https://docs.databricks.com/administration-guide/cloud-configurations/aws/index.html) +- Check [AWS VPC module documentation](https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws) + +## License + +This module is provided as-is for use with Databricks on AWS. diff --git a/modules/aws/aws-infra/components/iam.tf b/modules/aws/aws-infra/components/iam.tf new file mode 100644 index 0000000..cb64d58 --- /dev/null +++ b/modules/aws/aws-infra/components/iam.tf @@ -0,0 +1,159 @@ +# IAM Component +# Creates cross-account roles, Unity Catalog roles, and associated policies + +# Databricks-generated Cross-Account Assume Role Policy +data "databricks_aws_assume_role_policy" "cross_account" { + external_id = var.databricks_config.account_id +} + +# Cross-Account Role for Databricks (Always created) +resource "aws_iam_role" "cross_account" { + name = local.iam_config.cross_account_role_name + assume_role_policy = data.databricks_aws_assume_role_policy.cross_account.json + + tags = merge(local.common_tags, { + Name = local.iam_config.cross_account_role_name + Purpose = "Databricks Cross-Account Access" + Type = "CrossAccount" + }) +} + +# Cross-Account Role Policy +data "aws_iam_policy_document" "cross_account_policy" { + + # Databricks standard permissions + statement { + sid = "Databricks" + effect = "Allow" + + actions = [ + "ec2:AssociateIamInstanceProfile", + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CancelSpotInstanceRequests", + "ec2:CreateKeyPair", + "ec2:CreateSecurityGroup", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteTags", + "ec2:DeleteVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeInstances", + "ec2:DescribeInternetGateways", + "ec2:DescribeKeyPairs", + "ec2:DescribeNetworkAcls", + "ec2:DescribePrefixLists", + "ec2:DescribeReservedInstancesOfferings", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotInstanceRequests", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + "ec2:DescribeVolumes", + "ec2:DescribeVpcAttribute", + "ec2:DescribeVpcs", + "ec2:DetachVolume", + "ec2:DisassociateIamInstanceProfile", + "ec2:ModifyVpcAttribute", + "ec2:ReplaceIamInstanceProfileAssociation", + "ec2:RequestSpotInstances", + "ec2:RevokeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress", + "ec2:RunInstances", + "ec2:TerminateInstances" + ] + + resources = ["*"] + } + + # IAM permissions for instance profiles (only if roles_to_assume is populated) + dynamic "statement" { + for_each = length(var.roles_to_assume) > 0 ? [1] : [] + + content { + sid = "AllowPassRoleInstanceProfile" + effect = "Allow" + + actions = [ + "iam:PassRole" + ] + + resources = concat( + # Allow passing the cross-account role itself + ["arn:aws:iam::${local.account_id}:role/${local.iam_config.cross_account_role_name}"], + # Allow passing additional roles specified in variables + var.roles_to_assume + ) + } + } +} + +# Attach policy to cross-account role +resource "aws_iam_role_policy" "cross_account_inline" { + name = "databricks-cross-account-policy" + role = aws_iam_role.cross_account.id + policy = data.aws_iam_policy_document.cross_account_policy.json +} + +# Databricks-generated Unity Catalog Assume Role Policy +data "databricks_aws_unity_catalog_assume_role_policy" "unity_catalog" { + aws_account_id = local.account_id + role_name = local.iam_config.unity_catalog_role_name + external_id = var.external_id +} + +# Unity Catalog Role (Always created) +resource "aws_iam_role" "unity_catalog" { + name = local.iam_config.unity_catalog_role_name + assume_role_policy = data.databricks_aws_unity_catalog_assume_role_policy.unity_catalog.json + + tags = merge(local.common_tags, { + Name = local.iam_config.unity_catalog_role_name + Purpose = "Unity Catalog Metastore Access" + Type = "UnityCatalog" + }) +} + +# Databricks-generated Unity Catalog IAM Policy +data "databricks_aws_unity_catalog_policy" "unity_catalog" { + aws_account_id = local.account_id + role_name = local.iam_config.unity_catalog_role_name + bucket_name = var.create_metastore_bucket ? aws_s3_bucket.metastore[0].bucket : "" +} + +# Attach policy to Unity Catalog role +resource "aws_iam_role_policy" "unity_catalog_inline" { + name = "unity-catalog-metastore-policy" + role = aws_iam_role.unity_catalog.id + policy = data.databricks_aws_unity_catalog_policy.unity_catalog.json +} + +# Instance Profiles (optional) +resource "aws_iam_instance_profile" "databricks" { + count = var.create_instance_profiles ? 1 : 0 + + name = "${var.prefix}-databricks-instance-profile" + role = aws_iam_role.cross_account.name + + tags = merge(local.common_tags, { + Name = "${var.prefix}-databricks-instance-profile" + Purpose = "Databricks Compute Instance Profile" + }) +} + +# Wait for IAM role propagation (Always runs since roles are always created) +resource "time_sleep" "iam_propagation_wait" { + create_duration = "20s" + + depends_on = [ + aws_iam_role.cross_account, + aws_iam_role.unity_catalog, + aws_iam_role_policy.cross_account_inline, + aws_iam_role_policy.unity_catalog_inline + ] +} diff --git a/modules/aws/aws-infra/iam.tf b/modules/aws/aws-infra/iam.tf new file mode 100644 index 0000000..cb64d58 --- /dev/null +++ b/modules/aws/aws-infra/iam.tf @@ -0,0 +1,159 @@ +# IAM Component +# Creates cross-account roles, Unity Catalog roles, and associated policies + +# Databricks-generated Cross-Account Assume Role Policy +data "databricks_aws_assume_role_policy" "cross_account" { + external_id = var.databricks_config.account_id +} + +# Cross-Account Role for Databricks (Always created) +resource "aws_iam_role" "cross_account" { + name = local.iam_config.cross_account_role_name + assume_role_policy = data.databricks_aws_assume_role_policy.cross_account.json + + tags = merge(local.common_tags, { + Name = local.iam_config.cross_account_role_name + Purpose = "Databricks Cross-Account Access" + Type = "CrossAccount" + }) +} + +# Cross-Account Role Policy +data "aws_iam_policy_document" "cross_account_policy" { + + # Databricks standard permissions + statement { + sid = "Databricks" + effect = "Allow" + + actions = [ + "ec2:AssociateIamInstanceProfile", + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CancelSpotInstanceRequests", + "ec2:CreateKeyPair", + "ec2:CreateSecurityGroup", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteTags", + "ec2:DeleteVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeInstances", + "ec2:DescribeInternetGateways", + "ec2:DescribeKeyPairs", + "ec2:DescribeNetworkAcls", + "ec2:DescribePrefixLists", + "ec2:DescribeReservedInstancesOfferings", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotInstanceRequests", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + "ec2:DescribeVolumes", + "ec2:DescribeVpcAttribute", + "ec2:DescribeVpcs", + "ec2:DetachVolume", + "ec2:DisassociateIamInstanceProfile", + "ec2:ModifyVpcAttribute", + "ec2:ReplaceIamInstanceProfileAssociation", + "ec2:RequestSpotInstances", + "ec2:RevokeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress", + "ec2:RunInstances", + "ec2:TerminateInstances" + ] + + resources = ["*"] + } + + # IAM permissions for instance profiles (only if roles_to_assume is populated) + dynamic "statement" { + for_each = length(var.roles_to_assume) > 0 ? [1] : [] + + content { + sid = "AllowPassRoleInstanceProfile" + effect = "Allow" + + actions = [ + "iam:PassRole" + ] + + resources = concat( + # Allow passing the cross-account role itself + ["arn:aws:iam::${local.account_id}:role/${local.iam_config.cross_account_role_name}"], + # Allow passing additional roles specified in variables + var.roles_to_assume + ) + } + } +} + +# Attach policy to cross-account role +resource "aws_iam_role_policy" "cross_account_inline" { + name = "databricks-cross-account-policy" + role = aws_iam_role.cross_account.id + policy = data.aws_iam_policy_document.cross_account_policy.json +} + +# Databricks-generated Unity Catalog Assume Role Policy +data "databricks_aws_unity_catalog_assume_role_policy" "unity_catalog" { + aws_account_id = local.account_id + role_name = local.iam_config.unity_catalog_role_name + external_id = var.external_id +} + +# Unity Catalog Role (Always created) +resource "aws_iam_role" "unity_catalog" { + name = local.iam_config.unity_catalog_role_name + assume_role_policy = data.databricks_aws_unity_catalog_assume_role_policy.unity_catalog.json + + tags = merge(local.common_tags, { + Name = local.iam_config.unity_catalog_role_name + Purpose = "Unity Catalog Metastore Access" + Type = "UnityCatalog" + }) +} + +# Databricks-generated Unity Catalog IAM Policy +data "databricks_aws_unity_catalog_policy" "unity_catalog" { + aws_account_id = local.account_id + role_name = local.iam_config.unity_catalog_role_name + bucket_name = var.create_metastore_bucket ? aws_s3_bucket.metastore[0].bucket : "" +} + +# Attach policy to Unity Catalog role +resource "aws_iam_role_policy" "unity_catalog_inline" { + name = "unity-catalog-metastore-policy" + role = aws_iam_role.unity_catalog.id + policy = data.databricks_aws_unity_catalog_policy.unity_catalog.json +} + +# Instance Profiles (optional) +resource "aws_iam_instance_profile" "databricks" { + count = var.create_instance_profiles ? 1 : 0 + + name = "${var.prefix}-databricks-instance-profile" + role = aws_iam_role.cross_account.name + + tags = merge(local.common_tags, { + Name = "${var.prefix}-databricks-instance-profile" + Purpose = "Databricks Compute Instance Profile" + }) +} + +# Wait for IAM role propagation (Always runs since roles are always created) +resource "time_sleep" "iam_propagation_wait" { + create_duration = "20s" + + depends_on = [ + aws_iam_role.cross_account, + aws_iam_role.unity_catalog, + aws_iam_role_policy.cross_account_inline, + aws_iam_role_policy.unity_catalog_inline + ] +} diff --git a/modules/aws/aws-infra/locals.tf b/modules/aws/aws-infra/locals.tf new file mode 100644 index 0000000..1249d64 --- /dev/null +++ b/modules/aws/aws-infra/locals.tf @@ -0,0 +1,71 @@ +# Data sources +data "aws_availability_zones" "available" { + state = "available" +} + +data "aws_caller_identity" "current" {} + +data "aws_region" "current" {} + +locals { + # Common tags applied to all resources + common_tags = merge(var.tags, { + "ManagedBy" = "terraform" + "Module" = "aws-infra" + "Prefix" = var.prefix + "Region" = var.region + "CreatedDate" = formatdate("YYYY-MM-DD", timestamp()) + }) + + # Availability Zones + availability_zones = length(var.networking.availability_zones) > 0 ? var.networking.availability_zones : slice(data.aws_availability_zones.available.names, 0, min(length(data.aws_availability_zones.available.names), 3)) + + # Subnet CIDR calculations + private_subnet_cidrs = length(var.networking.private_subnet_cidrs) > 0 ? var.networking.private_subnet_cidrs : [ + for i in range(length(local.availability_zones)) : cidrsubnet(var.networking.vpc_cidr, 8, i + 1) + ] + + public_subnet_cidrs = length(var.networking.public_subnet_cidrs) > 0 ? var.networking.public_subnet_cidrs : [ + for i in range(length(local.availability_zones)) : cidrsubnet(var.networking.vpc_cidr, 8, i + 101) + ] + + # Storage configuration - hardcoded bucket names + root_bucket_name = "${var.prefix}-rootbucket" + metastore_bucket_name = "${var.prefix}-metastore" + data_bucket_name = "${var.prefix}-data" + + + # IAM configuration + iam_config = { + cross_account_role_name = "${var.prefix}-cross-account-role" + unity_catalog_role_name = "${var.prefix}-unity-catalog-role" + + # Databricks trust relationship principal + databricks_principals = ["arn:aws:iam::${var.databricks_account_id}:root"] + + # Unity Catalog specific configuration + unity_catalog_external_id = var.external_id + unity_catalog_principal = "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL" + } + + # Enable firewall if explicitly enabled OR if hub-spoke architecture is enabled + enable_firewall = var.security.enable_network_firewall || var.advanced_networking.hub_spoke_architecture + + # Advanced networking configuration + transit_gateway_config = var.advanced_networking.enable_transit_gateway ? { + name = "${var.prefix}-transit-gateway" + hub_vpc_cidr = var.advanced_networking.hub_vpc_cidr + spoke_vpc_cidr = var.networking.vpc_cidr + + # Hub VPC subnets (single subnet for each type) + hub_public_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 1) + hub_private_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 10) + hub_firewall_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 20) + } : null + + # Current account ID + account_id = data.aws_caller_identity.current.account_id + + # Current region name + current_region = data.aws_region.current.id +} diff --git a/modules/aws/aws-infra/main.tf b/modules/aws/aws-infra/main.tf new file mode 100644 index 0000000..d63346a --- /dev/null +++ b/modules/aws/aws-infra/main.tf @@ -0,0 +1,48 @@ +# AWS Infrastructure Module +# This module provides comprehensive AWS infrastructure for Databricks workloads +# All .tf files in this directory are automatically loaded by Terraform + +# Core Components (Always Created): +# - networking.tf - VPC, Subnets, Security Groups, NAT Gateway +# - workspacestorage.tf - Root S3 Bucket for Databricks workspace +# - ucstorage.tf - Unity Catalog S3 Buckets (metastore & data) +# - iam.tf - IAM Roles (cross-account, Unity Catalog, instance profiles) +# - vpc-endpoints.tf - VPC Endpoints (S3, STS, Kinesis) + +# Conditional Components (Created based on variables): +# - private-link.tf - Databricks Private Link (when enable_private_link = true) + +# Submodules: +# - modules/hub-networking - Transit Gateway, Hub VPC, and Network Firewall (when hub_spoke_architecture = true) + +# Configuration: +# - variables.tf - Input variables +# - locals.tf - Local values and computed configurations +# - outputs.tf - Module outputs +# - versions.tf - Provider version requirements + +# Hub Networking Module (Transit Gateway + Firewall) +module "hub_networking" { + count = var.advanced_networking.hub_spoke_architecture ? 1 : 0 + source = "./modules/hub-networking" + + prefix = var.prefix + region = var.region + + common_tags = local.common_tags + + # Spoke VPC configuration + spoke_vpc_id = module.vpc.vpc_id + spoke_vpc_cidr = var.networking.vpc_cidr + spoke_private_subnet_ids = module.vpc.private_subnets + spoke_route_table_ids = module.vpc.private_route_table_ids + + # Hub VPC configuration + hub_vpc_cidr = var.advanced_networking.hub_vpc_cidr + availability_zones = local.availability_zones + + # Network Firewall configuration + enable_firewall = local.enable_firewall + allowed_fqdns = var.security.allowed_fqdns + allowed_network_rules = var.security.allowed_network_rules +} diff --git a/modules/aws/aws-infra/modules/hub-networking/firewall.tf b/modules/aws/aws-infra/modules/hub-networking/firewall.tf new file mode 100644 index 0000000..64e5d6a --- /dev/null +++ b/modules/aws/aws-infra/modules/hub-networking/firewall.tf @@ -0,0 +1,175 @@ +# Network Firewall Component +# Creates AWS Network Firewall in the Hub VPC with configurable rule groups for advanced security +# Note: Firewall subnets are created in transit-gateway.tf as part of the Hub VPC + +# Rule Group - Allow FQDNs (Domain-based filtering) +resource "aws_networkfirewall_rule_group" "allow_fqdns" { + count = length(var.allowed_fqdns) > 0 ? 1 : 0 + capacity = 100 + name = "${var.prefix}-allow-fqdns-rg" + type = "STATEFUL" + + rule_group { + rule_variables { + ip_sets { + key = "HOME_NET" + ip_set { + definition = [var.spoke_vpc_cidr] + } + } + } + + rules_source { + # Domain-based rules + rules_source_list { + generated_rules_type = "ALLOWLIST" + target_types = ["TLS_SNI", "HTTP_HOST"] + targets = var.allowed_fqdns + } + } + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-allow-fqdns-rg" + }) +} + +# Rule Group - Allow Network Rules (IP, Protocol, Port based filtering) +resource "aws_networkfirewall_rule_group" "allow_network" { + count = length(var.allowed_network_rules) > 0 ? 1 : 0 + capacity = 100 + name = "${var.prefix}-allow-network-rg" + type = "STATEFUL" + + rule_group { + rule_variables { + ip_sets { + key = "HOME_NET" + ip_set { + definition = [var.spoke_vpc_cidr] + } + } + } + + rules_source { + # Network-level rules from variable + dynamic "stateful_rule" { + for_each = var.allowed_network_rules + content { + action = "PASS" + header { + direction = "FORWARD" + protocol = upper(stateful_rule.value.protocol) + source = stateful_rule.value.source_ip + source_port = "ANY" + destination = stateful_rule.value.destination_ip + destination_port = stateful_rule.value.destination_port + } + rule_option { + keyword = "sid" + settings = [tostring(stateful_rule.key + 1)] + } + } + } + } + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-allow-network-rg" + }) +} + +# Rule Group - Deny All Other Traffic (Default Deny) +resource "aws_networkfirewall_rule_group" "deny_all" { + capacity = 10 + name = "${var.prefix}-deny-all-rg" + type = "STATEFUL" + + rule_group { + rule_variables { + ip_sets { + key = "HOME_NET" + ip_set { + definition = [var.spoke_vpc_cidr] + } + } + } + + rules_source { + stateful_rule { + action = "DROP" + header { + direction = "FORWARD" + protocol = "IP" + source = "$HOME_NET" + source_port = "ANY" + destination = "ANY" + destination_port = "ANY" + } + rule_option { + keyword = "sid" + settings = ["100"] + } + } + } + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-deny-all-rg" + }) +} + +# Firewall Policy +resource "aws_networkfirewall_firewall_policy" "main" { + name = "${var.prefix}-firewall-policy" + + firewall_policy { + # Reference FQDN rule group if FQDNs are provided + dynamic "stateful_rule_group_reference" { + for_each = length(var.allowed_fqdns) > 0 ? [1] : [] + content { + priority = 1 + resource_arn = aws_networkfirewall_rule_group.allow_fqdns[0].arn + } + } + + # Reference Network rule group if network rules are provided + dynamic "stateful_rule_group_reference" { + for_each = length(var.allowed_network_rules) > 0 ? [1] : [] + content { + priority = 2 + resource_arn = aws_networkfirewall_rule_group.allow_network[0].arn + } + } + + # Deny all - lowest priority (always applied) + stateful_rule_group_reference { + priority = 100 + resource_arn = aws_networkfirewall_rule_group.deny_all.arn + } + + # Default action for stateless rules - forward to stateful engine + stateless_default_actions = ["aws:forward_to_sfe"] + stateless_fragment_default_actions = ["aws:forward_to_sfe"] + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-firewall-policy" + }) +} + +# Network Firewall +resource "aws_networkfirewall_firewall" "main" { + name = "${var.prefix}-network-firewall" + firewall_policy_arn = aws_networkfirewall_firewall_policy.main.arn + vpc_id = aws_vpc.hub.id + + # Deploy firewall endpoint in hub VPC firewall subnet + subnet_mapping { + subnet_id = aws_subnet.hub_firewall.id + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-network-firewall" + }) +} diff --git a/modules/aws/aws-infra/modules/hub-networking/locals.tf b/modules/aws/aws-infra/modules/hub-networking/locals.tf new file mode 100644 index 0000000..69b0f94 --- /dev/null +++ b/modules/aws/aws-infra/modules/hub-networking/locals.tf @@ -0,0 +1,15 @@ +# Hub Networking Module Locals + +locals { + # Transit Gateway configuration + transit_gateway_name = "${var.prefix}-transit-gateway" + + # Hub VPC subnet CIDRs + hub_public_subnet_cidr = cidrsubnet(var.hub_vpc_cidr, 8, 1) + hub_private_subnet_cidr = cidrsubnet(var.hub_vpc_cidr, 8, 10) + hub_firewall_subnet_cidr = cidrsubnet(var.hub_vpc_cidr, 8, 20) + + # Current region (for firewall rules) + current_region = var.region +} + diff --git a/modules/aws/aws-infra/modules/hub-networking/outputs.tf b/modules/aws/aws-infra/modules/hub-networking/outputs.tf new file mode 100644 index 0000000..07ee733 --- /dev/null +++ b/modules/aws/aws-infra/modules/hub-networking/outputs.tf @@ -0,0 +1,6 @@ +# Hub Networking Module Outputs + +output "hub_vpc_id" { + description = "ID of the hub VPC" + value = aws_vpc.hub.id +} diff --git a/modules/aws/aws-infra/modules/hub-networking/transit-gateway.tf b/modules/aws/aws-infra/modules/hub-networking/transit-gateway.tf new file mode 100644 index 0000000..5dfc0e9 --- /dev/null +++ b/modules/aws/aws-infra/modules/hub-networking/transit-gateway.tf @@ -0,0 +1,251 @@ +# Transit Gateway Component +# Creates Transit Gateway with hub-spoke architecture for enterprise networking +# Transit Gateway +resource "aws_ec2_transit_gateway" "main" { + description = "Transit Gateway for ${var.prefix}" + default_route_table_association = "disable" + default_route_table_propagation = "disable" + dns_support = "enable" + tags = merge(var.common_tags, { + Name = local.transit_gateway_name + }) +} +# Hub VPC (when hub-spoke architecture is enabled) +resource "aws_vpc" "hub" { + cidr_block = var.hub_vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-vpc" + Type = "Hub" + }) +} +# Hub VPC - Internet Gateway +resource "aws_internet_gateway" "hub" { + vpc_id = aws_vpc.hub.id + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-igw" + }) +} +# Hub VPC - Public Subnet (single) +resource "aws_subnet" "hub_public" { + vpc_id = aws_vpc.hub.id + cidr_block = local.hub_public_subnet_cidr + availability_zone = var.availability_zones[0] + map_public_ip_on_launch = true + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-public-subnet" + Type = "HubPublic" + AZ = var.availability_zones[0] + }) +} +# Hub VPC - Private Subnet (single, for Transit Gateway attachment) +resource "aws_subnet" "hub_private" { + vpc_id = aws_vpc.hub.id + cidr_block = local.hub_private_subnet_cidr + availability_zone = var.availability_zones[0] + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-private-subnet" + Type = "HubPrivate" + AZ = var.availability_zones[0] + }) +} +# Hub VPC - Firewall Subnet (single) +resource "aws_subnet" "hub_firewall" { + vpc_id = aws_vpc.hub.id + cidr_block = local.hub_firewall_subnet_cidr + availability_zone = var.availability_zones[0] + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-firewall-subnet" + Type = "HubFirewall" + AZ = var.availability_zones[0] + }) +} +# Hub VPC - NAT Gateway Elastic IP (single) +resource "aws_eip" "hub_nat" { + domain = "vpc" + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-nat-eip" + }) + depends_on = [aws_internet_gateway.hub] +} +# Hub VPC - NAT Gateway (single) +resource "aws_nat_gateway" "hub" { + allocation_id = aws_eip.hub_nat.id + subnet_id = aws_subnet.hub_public.id + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-nat" + AZ = var.availability_zones[0] + }) + depends_on = [aws_internet_gateway.hub] +} +# Hub VPC Route Tables +resource "aws_route_table" "hub_public" { + vpc_id = aws_vpc.hub.id + + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-public-rt" + Type = "HubPublic" + }) +} + +resource "aws_route_table" "hub_private" { + vpc_id = aws_vpc.hub.id + + route { + cidr_block = var.spoke_vpc_cidr + transit_gateway_id = aws_ec2_transit_gateway.main.id + } + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.hub.id + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-private-rt" + Type = "HubPrivate" + }) + depends_on = [aws_ec2_transit_gateway_vpc_attachment.hub] +} + +resource "aws_route_table" "hub_firewall" { + vpc_id = aws_vpc.hub.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.hub.id + } + + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-firewall-rt" + Type = "HubFirewall" + }) +} +# Hub VPC Route Table Associations +resource "aws_route_table_association" "hub_public" { + subnet_id = aws_subnet.hub_public.id + route_table_id = aws_route_table.hub_public.id +} +resource "aws_route_table_association" "hub_private" { + subnet_id = aws_subnet.hub_private.id + route_table_id = aws_route_table.hub_private.id +} +resource "aws_route_table_association" "hub_firewall" { + subnet_id = aws_subnet.hub_firewall.id + route_table_id = aws_route_table.hub_firewall.id +} + +# Route from Hub Public (NAT location) to Firewall for Internet-bound traffic +resource "aws_route" "hub_public_to_firewall" { + count = var.enable_firewall ? 1 : 0 + + route_table_id = aws_route_table.hub_public.id + destination_cidr_block = "0.0.0.0/0" + vpc_endpoint_id = one([for k, v in aws_networkfirewall_firewall.main.firewall_status[0].sync_states : v.attachment[0].endpoint_id]) + + depends_on = [aws_networkfirewall_firewall.main] +} + +# Route from Hub Public to IGW when firewall is NOT enabled +resource "aws_route" "hub_public_to_igw" { + count = var.enable_firewall ? 0 : 1 + + route_table_id = aws_route_table.hub_public.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.hub.id +} + +# Transit Gateway VPC Attachment - Spoke (main VPC) +resource "aws_ec2_transit_gateway_vpc_attachment" "spoke" { + subnet_ids = var.spoke_private_subnet_ids + transit_gateway_id = aws_ec2_transit_gateway.main.id + vpc_id = var.spoke_vpc_id + dns_support = "enable" + tags = merge(var.common_tags, { + Name = "${var.prefix}-spoke-tgw-attachment" + Type = "Spoke" + }) +} +# Transit Gateway VPC Attachment - Hub (if hub-spoke is enabled) +resource "aws_ec2_transit_gateway_vpc_attachment" "hub" { + subnet_ids = [aws_subnet.hub_private.id] + transit_gateway_id = aws_ec2_transit_gateway.main.id + vpc_id = aws_vpc.hub.id + dns_support = "enable" + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-tgw-attachment" + Type = "Hub" + }) +} +# Transit Gateway Route Tables (Custom routing - always required since default propagation is disabled) +resource "aws_ec2_transit_gateway_route_table" "spoke" { + transit_gateway_id = aws_ec2_transit_gateway.main.id + tags = merge(var.common_tags, { + Name = "${var.prefix}-spoke-tgw-rt" + }) +} +resource "aws_ec2_transit_gateway_route_table" "hub" { + transit_gateway_id = aws_ec2_transit_gateway.main.id + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-tgw-rt" + }) +} +# Route Table Associations +resource "aws_ec2_transit_gateway_route_table_association" "spoke" { + transit_gateway_attachment_id = aws_ec2_transit_gateway_vpc_attachment.spoke.id + transit_gateway_route_table_id = aws_ec2_transit_gateway_route_table.spoke.id +} +resource "aws_ec2_transit_gateway_route_table_association" "hub" { + transit_gateway_attachment_id = aws_ec2_transit_gateway_vpc_attachment.hub.id + transit_gateway_route_table_id = aws_ec2_transit_gateway_route_table.hub.id +} +# Transit Gateway Routes +resource "aws_ec2_transit_gateway_route" "spoke_to_hub" { + destination_cidr_block = var.hub_vpc_cidr + transit_gateway_attachment_id = aws_ec2_transit_gateway_vpc_attachment.hub.id + transit_gateway_route_table_id = aws_ec2_transit_gateway_route_table.spoke.id +} +resource "aws_ec2_transit_gateway_route" "hub_to_spoke" { + destination_cidr_block = var.spoke_vpc_cidr + transit_gateway_attachment_id = aws_ec2_transit_gateway_vpc_attachment.spoke.id + transit_gateway_route_table_id = aws_ec2_transit_gateway_route_table.hub.id +} +# Update main VPC route tables to route to Transit Gateway +resource "aws_route" "private_to_tgw" { + count = length(var.spoke_route_table_ids) + route_table_id = var.spoke_route_table_ids[count.index] + destination_cidr_block = var.hub_vpc_cidr + transit_gateway_id = aws_ec2_transit_gateway.main.id + depends_on = [aws_ec2_transit_gateway_vpc_attachment.spoke] +} +# Security Group for Hub VPC +resource "aws_security_group" "hub_default" { + name_prefix = "${var.prefix}-hub-" + vpc_id = aws_vpc.hub.id + description = "Default security group for hub VPC" + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow all outbound traffic" + } + ingress { + from_port = 0 + to_port = 65535 + protocol = "tcp" + cidr_blocks = [var.spoke_vpc_cidr] + description = "Allow traffic from spoke VPC" + } + ingress { + from_port = 0 + to_port = 65535 + protocol = "udp" + cidr_blocks = [var.spoke_vpc_cidr] + description = "Allow UDP traffic from spoke VPC" + } + tags = merge(var.common_tags, { + Name = "${var.prefix}-hub-default-sg" + }) +} diff --git a/modules/aws/aws-infra/modules/hub-networking/variables.tf b/modules/aws/aws-infra/modules/hub-networking/variables.tf new file mode 100644 index 0000000..8e1081f --- /dev/null +++ b/modules/aws/aws-infra/modules/hub-networking/variables.tf @@ -0,0 +1,72 @@ +# Hub Networking Module Variables +# This module creates Transit Gateway, Hub VPC, and Network Firewall + +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "region" { + description = "AWS region" + type = string +} + +variable "common_tags" { + description = "Common tags to apply to all resources" + type = map(string) + default = {} +} + +variable "spoke_vpc_id" { + description = "ID of the spoke (main) VPC" + type = string +} + +variable "spoke_vpc_cidr" { + description = "CIDR block of the spoke (main) VPC" + type = string +} + +variable "spoke_private_subnet_ids" { + description = "IDs of the spoke VPC private subnets" + type = list(string) +} + +variable "spoke_route_table_ids" { + description = "IDs of the spoke VPC private route tables" + type = list(string) +} + +variable "hub_vpc_cidr" { + description = "CIDR block for the hub VPC" + type = string +} + +variable "availability_zones" { + description = "List of availability zones" + type = list(string) +} + +variable "enable_firewall" { + description = "Enable Network Firewall in the hub VPC" + type = bool + default = true +} + +variable "allowed_fqdns" { + description = "List of FQDNs to allow through the firewall" + type = list(string) + default = [] +} + +variable "allowed_network_rules" { + description = "List of network-level rules (IP, protocol, port)" + type = list(object({ + protocol = string + source_ip = string + destination_ip = string + destination_port = string + })) + default = [] +} + diff --git a/modules/aws/aws-infra/networking.tf b/modules/aws/aws-infra/networking.tf new file mode 100644 index 0000000..01a1fb5 --- /dev/null +++ b/modules/aws/aws-infra/networking.tf @@ -0,0 +1,116 @@ +# Networking Component +# Creates VPC, subnets, security groups, NAT gateways, and routing using AWS VPC module + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "~> 5.0" + + name = "${var.prefix}-vpc" + cidr = var.networking.vpc_cidr + + azs = local.availability_zones + private_subnets = local.private_subnet_cidrs + public_subnets = var.networking.enable_nat_gateway ? local.public_subnet_cidrs : [] + + # DNS + enable_dns_hostnames = true + enable_dns_support = true + + # NAT Gateway + enable_nat_gateway = var.networking.enable_nat_gateway + single_nat_gateway = true + + # Tags + tags = local.common_tags + + vpc_tags = { + Name = "${var.prefix}-vpc" + Type = "Main" + } + + private_subnet_tags = { + Type = "Private" + } + + public_subnet_tags = { + Type = "Public" + } + + private_route_table_tags = { + Type = "Private" + } + + public_route_table_tags = { + Type = "Public" + } + + igw_tags = { + Name = "${var.prefix}-igw" + } + + nat_gateway_tags = { + Name = "${var.prefix}-nat-gateway" + } + + nat_eip_tags = { + Name = "${var.prefix}-nat-eip" + } +} + +# Security Group for Databricks +resource "aws_security_group" "default" { + name_prefix = "${var.prefix}-databricks-" + vpc_id = module.vpc.vpc_id + description = "Security group for Databricks workspace" + + # Databricks-specific egress rules for internal communication + dynamic "egress" { + for_each = toset([443, 2443, 6666, 5432, 8443, 8444, 8445, 8446, 8447, 8448, 8449, 8450, 8451]) + content { + description = "Databricks - Workspace SG - REST (443), Secure Cluster Connectivity (2443/6666), Lakebase PostgreSQL (5432), Compute Plane to Control Plane Internal Calls (8443), Unity Catalog Logging and Lineage Data Streaming (8444), Future Extendability (8445-8451)" + from_port = egress.value + to_port = egress.value + protocol = "tcp" + cidr_blocks = [var.networking.vpc_cidr] + } + } + + # Outbound rules to self (required for Databricks clusters) + egress { + from_port = 0 + to_port = 65535 + protocol = "tcp" + self = true + description = "Allow all internal TCP traffic to self" + } + + egress { + from_port = 0 + to_port = 65535 + protocol = "udp" + self = true + description = "Allow all internal UDP traffic to self" + } + + # Inbound rules from self (required for internal cluster communication) + ingress { + from_port = 0 + to_port = 65535 + protocol = "tcp" + self = true + description = "Allow all internal TCP traffic from self" + } + + ingress { + from_port = 0 + to_port = 65535 + protocol = "udp" + self = true + description = "Allow all internal UDP traffic from self" + } + + tags = merge(local.common_tags, { + Name = "${var.prefix}-databricks-sg" + Type = "Databricks" + }) +} diff --git a/modules/aws/aws-infra/outputs.tf b/modules/aws/aws-infra/outputs.tf new file mode 100644 index 0000000..b135ede --- /dev/null +++ b/modules/aws/aws-infra/outputs.tf @@ -0,0 +1,42 @@ +# VPC Output +output "vpc_id" { + description = "ID of the Spoke VPC" + value = module.vpc.vpc_id +} + +# S3 Bucket Names +output "root_bucket_name" { + description = "Name of the root storage bucket" + value = aws_s3_bucket.root.bucket +} + +output "metastore_bucket_name" { + description = "Name of the Unity Catalog metastore bucket (if created)" + value = var.create_metastore_bucket ? aws_s3_bucket.metastore[0].bucket : null +} + +output "data_bucket_name" { + description = "Name of the Unity Catalog data bucket" + value = aws_s3_bucket.data.bucket +} + +# IAM Roles +output "cross_account_role_arn" { + description = "ARN of the cross-account IAM role for Databricks" + value = aws_iam_role.cross_account.arn +} + +output "cross_account_role_name" { + description = "Name of the cross-account IAM role" + value = aws_iam_role.cross_account.name +} + +output "unity_catalog_role_arn" { + description = "ARN of the Unity Catalog IAM role" + value = aws_iam_role.unity_catalog.arn +} + +output "unity_catalog_role_name" { + description = "Name of the Unity Catalog IAM role" + value = aws_iam_role.unity_catalog.name +} diff --git a/modules/aws/aws-infra/private-link.tf b/modules/aws/aws-infra/private-link.tf new file mode 100644 index 0000000..3318943 --- /dev/null +++ b/modules/aws/aws-infra/private-link.tf @@ -0,0 +1,159 @@ +# Private Link Component +# Creates VPC endpoints for Databricks private connectivity + +# Private Link Subnets (dedicated subnets for Databricks VPC endpoints) +resource "aws_subnet" "private_link" { + count = var.security.enable_private_link ? length(local.availability_zones) : 0 + + vpc_id = module.vpc.vpc_id + cidr_block = cidrsubnet(var.networking.vpc_cidr, 8, count.index + 200) + availability_zone = local.availability_zones[count.index] + + tags = merge(local.common_tags, { + Name = "${var.prefix}-private-link-subnet-${count.index + 1}" + Type = "PrivateLink" + AZ = local.availability_zones[count.index] + }) +} + +# Route Table for Private Link Subnets +resource "aws_route_table" "private_link" { + count = var.security.enable_private_link ? 1 : 0 + + vpc_id = module.vpc.vpc_id + + tags = merge(local.common_tags, { + Name = "${var.prefix}-private-link-rt" + Type = "PrivateLink" + }) +} + +# Route Table Association for Private Link Subnets +resource "aws_route_table_association" "private_link" { + count = var.security.enable_private_link ? length(aws_subnet.private_link) : 0 + + subnet_id = aws_subnet.private_link[count.index].id + route_table_id = aws_route_table.private_link[0].id +} + +# Security Group for Private Link Endpoints +resource "aws_security_group" "private_link" { + count = var.security.enable_private_link ? 1 : 0 + + name_prefix = "${var.prefix}-private-link-" + vpc_id = module.vpc.vpc_id + description = "Security group for Databricks Private Link endpoints" + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + security_groups = [aws_security_group.default.id] + description = "HTTPS from Databricks clusters" + } + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = [var.networking.vpc_cidr] + description = "HTTPS from VPC" + } + + # Extended port range for Databricks communication + ingress { + from_port = 6666 + to_port = 6666 + protocol = "tcp" + security_groups = [aws_security_group.default.id] + description = "Databricks internal communication" + } + + ingress { + from_port = 6666 + to_port = 6666 + protocol = "tcp" + cidr_blocks = [var.networking.vpc_cidr] + description = "Databricks internal communication from VPC" + } + + # PostgreSQL port for Lakebase + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + security_groups = [aws_security_group.default.id] + description = "Lakebase PostgreSQL from Databricks clusters" + } + + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = [var.networking.vpc_cidr] + description = "Lakebase PostgreSQL from VPC" + } + + # Control Plane, Unity Catalog, and Future Extendability ports + ingress { + from_port = 8443 + to_port = 8451 + protocol = "tcp" + security_groups = [aws_security_group.default.id] + description = "Databricks Control Plane (8443), Unity Catalog (8444), Future Extendability (8445-8451) from clusters" + } + + ingress { + from_port = 8443 + to_port = 8451 + protocol = "tcp" + cidr_blocks = [var.networking.vpc_cidr] + description = "Databricks Control Plane (8443), Unity Catalog (8444), Future Extendability (8445-8451) from VPC" + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + description = "All outbound traffic" + } + + tags = merge(local.common_tags, { + Name = "${var.prefix}-private-link-sg" + }) +} + +# Databricks Backend Private Link Endpoint +resource "aws_vpc_endpoint" "backend" { + count = var.security.enable_private_link && var.security.backend_service_name != null ? 1 : 0 + + vpc_id = module.vpc.vpc_id + service_name = var.security.backend_service_name + vpc_endpoint_type = "Interface" + subnet_ids = aws_subnet.private_link[*].id + security_group_ids = [aws_security_group.private_link[0].id] + private_dns_enabled = false + + tags = merge(local.common_tags, { + Name = "${var.prefix}-databricks-backend-endpoint" + Type = "DatabricksPrivateLink" + }) +} + +# Databricks Relay Private Link Endpoint +resource "aws_vpc_endpoint" "relay" { + count = var.security.enable_private_link && var.security.relay_service_name != null ? 1 : 0 + + vpc_id = module.vpc.vpc_id + service_name = var.security.relay_service_name + vpc_endpoint_type = "Interface" + subnet_ids = aws_subnet.private_link[*].id + security_group_ids = [aws_security_group.private_link[0].id] + private_dns_enabled = false + + tags = merge(local.common_tags, { + Name = "${var.prefix}-databricks-relay-endpoint" + Type = "DatabricksPrivateLink" + }) +} diff --git a/modules/aws/aws-infra/ucstorage.tf b/modules/aws/aws-infra/ucstorage.tf new file mode 100644 index 0000000..d60c78b --- /dev/null +++ b/modules/aws/aws-infra/ucstorage.tf @@ -0,0 +1,75 @@ +# Unity Catalog S3 Buckets Component +# Creates metastore and data S3 buckets with security best practices + +# Metastore Bucket (for Unity Catalog) +resource "aws_s3_bucket" "metastore" { + count = var.create_metastore_bucket ? 1 : 0 + + bucket = local.metastore_bucket_name + + tags = merge(local.common_tags, { + Name = local.metastore_bucket_name + BucketType = "metastore" + Purpose = "Metastore" + }) +} + +resource "aws_s3_bucket" "data" { + bucket = local.data_bucket_name + + tags = merge(local.common_tags, { + Name = local.data_bucket_name + BucketType = "data" + Purpose = "Data" + }) +} + +# S3 Bucket Server-Side Encryption Configuration - Metastore Bucket +resource "aws_s3_bucket_server_side_encryption_configuration" "metastore" { + count = var.create_metastore_bucket ? 1 : 0 + + bucket = aws_s3_bucket.metastore[0].id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +# S3 Bucket Server-Side Encryption Configuration - Data Bucket +resource "aws_s3_bucket_server_side_encryption_configuration" "data" { + bucket = aws_s3_bucket.data.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +# Note: Versioning is disabled by default (no versioning configuration resources needed) + +# S3 Bucket Public Access Block - Metastore Bucket +resource "aws_s3_bucket_public_access_block" "metastore" { + count = var.create_metastore_bucket ? 1 : 0 + + bucket = aws_s3_bucket.metastore[0].id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# S3 Bucket Public Access Block - Data Bucket +resource "aws_s3_bucket_public_access_block" "data" { + bucket = aws_s3_bucket.data.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + + diff --git a/modules/aws/aws-infra/variables.tf b/modules/aws/aws-infra/variables.tf new file mode 100644 index 0000000..1d32cd5 --- /dev/null +++ b/modules/aws/aws-infra/variables.tf @@ -0,0 +1,137 @@ +# Core Configuration Variables +variable "prefix" { + description = "Prefix for all AWS resources" + type = string +} + +variable "region" { + description = "AWS region for resource deployment" + type = string +} + +variable "tags" { + description = "Common tags for all resources" + type = map(string) + default = {} +} + +# Networking Configuration +variable "networking" { + description = "VPC and networking configuration" + type = object({ + vpc_cidr = string + availability_zones = optional(list(string), []) + enable_nat_gateway = optional(bool, true) + private_subnet_cidrs = optional(list(string), []) + public_subnet_cidrs = optional(list(string), []) + }) +} + +# Storage Configuration - Individual Variables +variable "create_metastore_bucket" { + description = "Create Unity Catalog metastore bucket" + type = bool + default = false +} + +# IAM Configuration - Split into individual variables + +# Instance Profiles (Optional) +variable "create_instance_profiles" { + description = "Create IAM instance profiles for Databricks clusters" + type = bool + default = false +} + +# Cross-Account Configuration (Always created) +variable "databricks_account_id" { + description = "Databricks AWS account ID for cross-account role trust relationship" + type = string + default = null +} + +variable "external_id" { + description = "External ID for Unity Catalog role trust relationship" + type = string + default = null +} + +# Unity Catalog Configuration (Always created) +variable "unity_catalog_account_id" { + description = "Unity Catalog AWS account ID (Databricks account for Unity Catalog)" + type = string + default = null +} + +# Additional IAM Permissions +variable "roles_to_assume" { + description = "Additional IAM role ARNs that the cross-account role should be able to assume" + type = list(string) + default = [] +} + +# Security Configuration +variable "security" { + description = "Advanced security configuration" + type = object({ + # Firewall configuration + enable_network_firewall = optional(bool, false) + allowed_fqdns = optional(list(string), []) + allowed_network_rules = optional(list(object({ + protocol = string + source_ip = string + destination_ip = string + destination_port = string + })), []) + + # Private Link configuration + enable_private_link = optional(bool, false) + backend_service_name = optional(string, null) + relay_service_name = optional(string, null) + }) + + default = {} +} + +# Advanced Networking Configuration +variable "advanced_networking" { + description = "Advanced networking features" + type = object({ + # Transit Gateway + enable_transit_gateway = optional(bool, false) + hub_spoke_architecture = optional(bool, false) + + # Hub VPC configuration (when hub-spoke enabled) + hub_vpc_cidr = optional(string, "10.1.0.0/16") + + # Additional VPC attachments + additional_vpc_attachments = optional(list(object({ + vpc_id = string + vpc_cidr = string + route_cidr = string + subnet_ids = list(string) + })), []) + + # Routing configuration + propagate_default_routes = optional(bool, false) + enable_dns_support = optional(bool, true) + }) + + default = {} + + validation { + condition = !var.advanced_networking.hub_spoke_architecture || var.advanced_networking.enable_transit_gateway + error_message = "Transit Gateway must be enabled when using hub-spoke architecture." + } +} + +# Data Sources Configuration +variable "databricks_config" { + description = "Databricks-specific configuration for policy generation" + type = object({ + account_id = optional(string, null) + # This helps generate proper Databricks policies but doesn't create Databricks resources + }) + + default = {} +} diff --git a/modules/aws/aws-infra/versions.tf b/modules/aws/aws-infra/versions.tf new file mode 100644 index 0000000..532cd80 --- /dev/null +++ b/modules/aws/aws-infra/versions.tf @@ -0,0 +1,20 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.57.0" + } + databricks = { + source = "databricks/databricks" + version = ">= 1.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.9.0" + } + } +} + + diff --git a/modules/aws/aws-infra/vpc-endpoints.tf b/modules/aws/aws-infra/vpc-endpoints.tf new file mode 100644 index 0000000..dbf885b --- /dev/null +++ b/modules/aws/aws-infra/vpc-endpoints.tf @@ -0,0 +1,48 @@ +# VPC Endpoints Component +# Creates VPC endpoints for secure private access to AWS services using AWS VPC Endpoints module + +module "vpc_endpoints" { + source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" + version = "~> 5.0" + + vpc_id = module.vpc.vpc_id + + endpoints = { + s3 = { + service = "s3" + service_type = "Gateway" + route_table_ids = concat( + module.vpc.private_route_table_ids, + var.networking.enable_nat_gateway ? module.vpc.public_route_table_ids : [] + ) + tags = { + Name = "${var.prefix}-s3-vpc-endpoint" + Type = "Gateway" + } + } + + sts = { + service = "sts" + service_type = "Interface" + private_dns_enabled = true + subnet_ids = module.vpc.private_subnets + tags = { + Name = "${var.prefix}-sts-vpc-endpoint" + Type = "Interface" + } + } + + kinesis-streams = { + service = "kinesis-streams" + service_type = "Interface" + private_dns_enabled = true + subnet_ids = module.vpc.private_subnets + tags = { + Name = "${var.prefix}-kinesis-streams-vpc-endpoint" + Type = "Interface" + } + } + } + + tags = local.common_tags +} diff --git a/modules/aws/aws-infra/workspacestorage.tf b/modules/aws/aws-infra/workspacestorage.tf new file mode 100644 index 0000000..427d8f5 --- /dev/null +++ b/modules/aws/aws-infra/workspacestorage.tf @@ -0,0 +1,51 @@ +# Workspace S3 Bucket Component +# Creates root S3 bucket for Databricks workspace with security best practices + +# Root Storage Bucket (for Databricks workspace) - Always created +resource "aws_s3_bucket" "root" { + bucket = local.root_bucket_name + + tags = merge(local.common_tags, { + Name = local.root_bucket_name + BucketType = "root" + Purpose = "Root" + }) +} + +# S3 Bucket Server-Side Encryption Configuration - Root Bucket +resource "aws_s3_bucket_server_side_encryption_configuration" "root" { + bucket = aws_s3_bucket.root.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +# Note: Versioning is disabled by default (no versioning configuration resources needed) + +# S3 Bucket Public Access Block - Root Bucket +resource "aws_s3_bucket_public_access_block" "root" { + bucket = aws_s3_bucket.root.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# Databricks-generated Root Bucket Policy +data "databricks_aws_bucket_policy" "root" { + bucket = aws_s3_bucket.root.bucket +} + +# Root Storage Bucket Policy (for Databricks workspace) +resource "aws_s3_bucket_policy" "root" { + bucket = aws_s3_bucket.root.id + policy = data.databricks_aws_bucket_policy.root.json + + depends_on = [aws_s3_bucket_public_access_block.root] +} + +