Skip to content

data-platform-hq/terraform-azurerm-databricks-ws

Repository files navigation

Azure Databricks Workspace Terraform module

Terraform module for creation Azure Databricks Workspace

Usage

This module provides an ability to deploy Azure Databricks Workspace. Here is an example how to provision Azure Databricks Workspace in managed network.

Currently, it is only possible to provision Databricks Workspace in managed network with help of this module.

# Prerequisite resources for Databricks Workspace Deployment
data "azurerm_virtual_network" "example" {
  name                = "example-vnet"
  resource_group_name = "example-rg"
  location            = "eastus"
}

data "azurerm_network_security_group" "default_nsg" {
  name                = "example-eastus-sg"
  resource_group_name = "example-rg"
}

data "azurerm_key_vault" "example" {
  name                = "example-key-vault"
  resource_group_name = "example-rg"
}

data "azurerm_key_vault_key" "example" {
  name         = "cmk-example"
  key_vault_id = data.azurerm_key_vault.example.id
}

data "azurerm_log_analytics_workspace" "example" {
  name                = "example"
  resource_group_name = "example-rg"
}

module "databricks_public" {
  source  = "data-platform-hq/subnet/azurerm"
  version = "~> 1.0"

  name                = "databricks-public"
  resource_group_name = "example-rg"
  network             = data.azurerm_virtual_network.example.name
  cidr                = cidrsubnet(data.azurerm_virtual_network.example.address_space[0], 6, 0) 
  nsg_id              = data.azurerm_network_security_group.default_nsg.id

  delegations = [{
    name = "Microsoft.Databricks/workspaces"
    actions = [
      "Microsoft.Network/virtualNetworks/subnets/join/action",
      "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action",
      "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action"
    ]
  }]
}

module "databricks_private" {
  source  = "data-platform-hq/subnet/azurerm"
  version = "~> 1.0"

  name                = "databricks-private"
  resource_group_name = "example-rg"
  network             = data.azurerm_virtual_network.example.name
  cidr                = cidrsubnet(data.azurerm_virtual_network.example.address_space[0], 6, 1) 
  nsg_id              = data.azurerm_network_security_group.default_nsg.id

  delegations = [{
    name = "Microsoft.Databricks/workspaces"
    actions = [
      "Microsoft.Network/virtualNetworks/subnets/join/action",
      "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action",
      "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action"
    ]
  }]
}

# Databricks Workspace module usage with prerequisite resources mentioned above
module "databricks_workspace" {
  source  = "data-platform-hq/databricks-ws/azurerm"
  version = "~> 1.0"

  workspace_name              = "example-workspace"
  location                    = "eastus"
  sku                         = "premium"
  resource_group              = "example-rg"
  managed_resource_group_name = "example-managed-rg"

  # Custom resources names
  access_connector_name = "example-databricks-connector"
  diagnostics_name      = "example-databricks-diagnostics"

  # Vnet injection block
  network_id                        = data.azurerm_virtual_network.example.id
  public_subnet_name                = module.databricks_public.name
  private_subnet_name               = module.databricks_private.name
  public_subnet_nsg_association_id  = module.databricks_public.nsg_association_id
  private_subnet_nsg_association_id = module.databricks_private.nsg_association_id
  nsg_rules_required                = "AllRules"

  # CMK Encryption
  key_vault_id     = data.azurerm_key_vault.example.id
  
  # Databricks Services encryption
  managed_services_cmk_enabled          = true
  managed_services_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id

  # Data Plane Cluster Disks CMK Encryption
  managed_disk_cmk_enabled          = true
  managed_disk_cmk_policy_enabled   = true
  managed_disk_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id

  # DBFS
  managed_dbfs_cmk_enabled          = true
  managed_dbfs_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id

  # Other
  access_connector_enabled = true
  log_analytics_workspace  = { (data.azurerm_log_analytics_workspace.example.name) = data.azurerm_log_analytics_workspace.example.id }
}

Requirements

Name Version
terraform >= 1.0.0
azurerm >= 3.104.2

Providers

Name Version
azurerm >= 3.104.2

Modules

No modules.

Resources

Name Type
azurerm_databricks_access_connector.this resource
azurerm_databricks_workspace.this resource
azurerm_databricks_workspace_root_dbfs_customer_managed_key.this resource
azurerm_key_vault_access_policy.databricks_storage_account_msi resource
azurerm_key_vault_access_policy.databricks_ws_disk resource
azurerm_key_vault_access_policy.databricks_ws_service resource
azurerm_monitor_diagnostic_setting.this resource
azurerm_client_config.current data source
azurerm_monitor_diagnostic_categories.this data source
azurerm_storage_account.this data source
azurerm_user_assigned_identity.this data source

Inputs

Name Description Type Default Required
access_connector_enabled Provides an ability to provision Databricks Access Connector which is required for Unity Catalog feature bool true no
access_connector_name Databricks Access Connector optional name string null no
analytics_destination_type Log analytics destination type string "Dedicated" no
diagnostics_name Diagnostic Settings optional name string null no
global_databricks_object_id Global 'AzureDatabricks' SP object id string "9b38785a-6e08-4087-a0c4-20634343f21f" no
key_vault_id Key Vault ID string null no
key_vault_key_permissions List of key vault key permissions for Databricks Global Service Principal list(string)
[
"Get",
"List",
"Encrypt",
"Decrypt",
"WrapKey",
"UnwrapKey",
"GetRotationPolicy"
]
no
key_vault_secret_permissions List of key vault secret permissions for Databricks Global Service Principal list(string)
[
"Get",
"List"
]
no
location Azure location string n/a yes
log_analytics_workspace Log Analytics Workspace Name to ID map map(string) {} no
managed_dbfs_cmk_enabled Boolean flag that determines whether Workspace DBFS is encrypted with CMK key bool false no
managed_dbfs_cmk_key_vault_key_id Key Vault key ID used for Databricks DBFS encryption string null no
managed_disk_cmk_enabled Boolean flag that determines whether Data Plane Disks are encrypted with CMK key bool false no
managed_disk_cmk_key_vault_key_id Key Vault key ID used for Data Plane Disks encryption string null no
managed_disk_cmk_policy_enabled Create Key Vault Policy for Databricks Workspace Managed Disk identity.
Upon initial creation of Workspace with Disk CMK encryption, Disk Encryption Set with managed identity is created, it is used for cluster's disks encryption.

However, if Workspace already provisioned and have to updated to use Managed Disk encryption, then 'Disk Encryption Set' is known after creation.
Which means, that you have to first apply with 'managed_disk_cmk_enabled = true' only and set 'managed_disk_cmk_policy_enabled' to false, because identity is unknown.
On next apply, set 'managed_disk_cmk_policy_enabled' to true, because identity of Managed Disk is created and known.
bool true no
managed_resource_group_name The name of the managed resource group string null no
managed_services_cmk_enabled Encrypts Databricks Workspaces Services like Notebooks and Queries, once CMK type of encryption is enabled it won't be possible to switch back to default Microsoft Managed Encryption. bool false no
managed_services_cmk_key_vault_key_id Key Vault key ID used for Databricks Managed Services encryption string null no
managed_storage_account_identity_enabled Prerequisite for DBFS encryption. Enabled managed Storage Account identity to create Key Vault Policy to access encryption keys bool true no
network_id The ID of a Virtual Network where this Databricks Cluster should be created string n/a yes
no_public_ip Are public IP Addresses not allowed?: [true|false] bool true no
nsg_rules_required Does the data plane to control plane communication happen over private link endpoint only or publicly?: [AllRules, NoAzureDatabricksRules, NoAzureServiceRules] string "AllRules" no
private_subnet_name The name of the Private Subnet within the Virtual Network. Required if virtual_network_id is set string n/a yes
private_subnet_nsg_association_id The resource ID of the azurerm_subnet_network_security_group_association resource which is referred to by the private_subnet_name field. Required if virtual_network_id is set string n/a yes
public_network_access_enabled Allow public access for accessing workspace: [true|false] bool true no
public_subnet_name The name of the Public Subnet within the Virtual Network. Required if virtual_network_id is set string n/a yes
public_subnet_nsg_association_id The resource ID of the azurerm_subnet_network_security_group_association resource which is referred to by the public_subnet_name field. Required if virtual_network_id is set string n/a yes
resource_group The name of the resource group in which to create the storage account string n/a yes
sku The sku to use for the Databricks Workspace: [standard|premium|trial] string "premium" no
storage_firewall_enabled Boolean flag that determines whether public access is disallowed bool false no
tags A mapping of tags to assign to the resource map(string) {} no
workspace_name Databricks Workspace name string n/a yes

Outputs

Name Description
access_connector_id Databricks Access Connector's Id
access_connector_identity Databricks Access Connector's Identities list
databricks_client_id_identity The Client ID of the User Assigned Identity.
databricks_managed_storage_account_id Azure Databricks Workspace Managed Storage Account ID.
databricks_principal_id_identity The Service Principal ID of the User Assigned Identity.
id Azure Databricks Resource ID
sku Azure Databricks Workspace SKU type
workspace_id Azure Databricks Workspace ID
workspace_url Azure Databricks Workspace URL

License

Apache 2 Licensed. For more information please see LICENSE