Terraform module for creation Azure Databricks Workspace
This module provides an ability to deploy Azure Databricks Workspace. Here is an example how to provision Azure Databricks Workspace in managed network.
Currently, it is only possible to provision Databricks Workspace in managed network with help of this module.
# Prerequisite resources for Databricks Workspace Deployment
data "azurerm_virtual_network" "example" {
name = "example-vnet"
resource_group_name = "example-rg"
location = "eastus"
}
data "azurerm_network_security_group" "default_nsg" {
name = "example-eastus-sg"
resource_group_name = "example-rg"
}
data "azurerm_key_vault" "example" {
name = "example-key-vault"
resource_group_name = "example-rg"
}
data "azurerm_key_vault_key" "example" {
name = "cmk-example"
key_vault_id = data.azurerm_key_vault.example.id
}
data "azurerm_log_analytics_workspace" "example" {
name = "example"
resource_group_name = "example-rg"
}
module "databricks_public" {
source = "data-platform-hq/subnet/azurerm"
version = "~> 1.0"
name = "databricks-public"
resource_group_name = "example-rg"
network = data.azurerm_virtual_network.example.name
cidr = cidrsubnet(data.azurerm_virtual_network.example.address_space[0], 6, 0)
nsg_id = data.azurerm_network_security_group.default_nsg.id
delegations = [{
name = "Microsoft.Databricks/workspaces"
actions = [
"Microsoft.Network/virtualNetworks/subnets/join/action",
"Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action",
"Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action"
]
}]
}
module "databricks_private" {
source = "data-platform-hq/subnet/azurerm"
version = "~> 1.0"
name = "databricks-private"
resource_group_name = "example-rg"
network = data.azurerm_virtual_network.example.name
cidr = cidrsubnet(data.azurerm_virtual_network.example.address_space[0], 6, 1)
nsg_id = data.azurerm_network_security_group.default_nsg.id
delegations = [{
name = "Microsoft.Databricks/workspaces"
actions = [
"Microsoft.Network/virtualNetworks/subnets/join/action",
"Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action",
"Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action"
]
}]
}
# Databricks Workspace module usage with prerequisite resources mentioned above
module "databricks_workspace" {
source = "data-platform-hq/databricks-ws/azurerm"
version = "~> 1.0"
workspace_name = "example-workspace"
location = "eastus"
sku = "premium"
resource_group = "example-rg"
managed_resource_group_name = "example-managed-rg"
# Custom resources names
access_connector_name = "example-databricks-connector"
diagnostics_name = "example-databricks-diagnostics"
# Vnet injection block
network_id = data.azurerm_virtual_network.example.id
public_subnet_name = module.databricks_public.name
private_subnet_name = module.databricks_private.name
public_subnet_nsg_association_id = module.databricks_public.nsg_association_id
private_subnet_nsg_association_id = module.databricks_private.nsg_association_id
nsg_rules_required = "AllRules"
# CMK Encryption
key_vault_id = data.azurerm_key_vault.example.id
# Databricks Services encryption
managed_services_cmk_enabled = true
managed_services_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id
# Data Plane Cluster Disks CMK Encryption
managed_disk_cmk_enabled = true
managed_disk_cmk_policy_enabled = true
managed_disk_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id
# DBFS
managed_dbfs_cmk_enabled = true
managed_dbfs_cmk_key_vault_key_id = data.azurerm_key_vault_key.example.id
# Other
access_connector_enabled = true
log_analytics_workspace = { (data.azurerm_log_analytics_workspace.example.name) = data.azurerm_log_analytics_workspace.example.id }
}
Name | Version |
---|---|
terraform | >= 1.0.0 |
azurerm | >= 4.0.1 |
Name | Version |
---|---|
azurerm | >= 4.0.1 |
No modules.
Name | Type |
---|---|
azurerm_databricks_access_connector.this | resource |
azurerm_databricks_workspace.this | resource |
azurerm_databricks_workspace_root_dbfs_customer_managed_key.this | resource |
azurerm_key_vault_access_policy.databricks_storage_account_msi | resource |
azurerm_key_vault_access_policy.databricks_ws_disk | resource |
azurerm_key_vault_access_policy.databricks_ws_service | resource |
azurerm_monitor_diagnostic_setting.this | resource |
azurerm_client_config.current | data source |
azurerm_monitor_diagnostic_categories.this | data source |
azurerm_storage_account.this | data source |
azurerm_user_assigned_identity.this | data source |
Name | Description | Type | Default | Required |
---|---|---|---|---|
access_connector_enabled | Provides an ability to provision Databricks Access Connector which is required for Unity Catalog feature | bool |
true |
no |
access_connector_name | Databricks Access Connector optional name | string |
null |
no |
analytics_destination_type | Log analytics destination type | string |
"Dedicated" |
no |
diagnostics_name | Diagnostic Settings optional name | string |
null |
no |
global_databricks_object_id | Global 'AzureDatabricks' SP object id | string |
"9b38785a-6e08-4087-a0c4-20634343f21f" |
no |
key_vault_id | Key Vault ID | string |
null |
no |
key_vault_key_permissions | List of key vault key permissions for Databricks Global Service Principal | list(string) |
[ |
no |
key_vault_secret_permissions | List of key vault secret permissions for Databricks Global Service Principal | list(string) |
[ |
no |
location | Azure location | string |
n/a | yes |
log_analytics_workspace | Log Analytics Workspace Name to ID map | map(string) |
{} |
no |
managed_dbfs_cmk_enabled | Boolean flag that determines whether Workspace DBFS is encrypted with CMK key | bool |
false |
no |
managed_dbfs_cmk_key_vault_key_id | Key Vault key ID used for Databricks DBFS encryption | string |
null |
no |
managed_disk_cmk_enabled | Boolean flag that determines whether Data Plane Disks are encrypted with CMK key | bool |
false |
no |
managed_disk_cmk_key_vault_key_id | Key Vault key ID used for Data Plane Disks encryption | string |
null |
no |
managed_disk_cmk_policy_enabled | Create Key Vault Policy for Databricks Workspace Managed Disk identity. Upon initial creation of Workspace with Disk CMK encryption, Disk Encryption Set with managed identity is created, it is used for cluster's disks encryption. However, if Workspace already provisioned and have to updated to use Managed Disk encryption, then 'Disk Encryption Set' is known after creation. Which means, that you have to first apply with 'managed_disk_cmk_enabled = true' only and set 'managed_disk_cmk_policy_enabled' to false, because identity is unknown. On next apply, set 'managed_disk_cmk_policy_enabled' to true, because identity of Managed Disk is created and known. |
bool |
true |
no |
managed_resource_group_name | The name of the managed resource group | string |
null |
no |
managed_services_cmk_enabled | Encrypts Databricks Workspaces Services like Notebooks and Queries, once CMK type of encryption is enabled it won't be possible to switch back to default Microsoft Managed Encryption. | bool |
false |
no |
managed_services_cmk_key_vault_key_id | Key Vault key ID used for Databricks Managed Services encryption | string |
null |
no |
managed_storage_account_identity_enabled | Prerequisite for DBFS encryption. Enabled managed Storage Account identity to create Key Vault Policy to access encryption keys | bool |
true |
no |
network_id | The ID of a Virtual Network where this Databricks Cluster should be created | string |
n/a | yes |
no_public_ip | Are public IP Addresses not allowed?: [true|false] | bool |
true |
no |
nsg_rules_required | Does the data plane to control plane communication happen over private link endpoint only or publicly?: [AllRules, NoAzureDatabricksRules, NoAzureServiceRules] | string |
"AllRules" |
no |
private_subnet_name | The name of the Private Subnet within the Virtual Network. Required if virtual_network_id is set | string |
n/a | yes |
private_subnet_nsg_association_id | The resource ID of the azurerm_subnet_network_security_group_association resource which is referred to by the private_subnet_name field. Required if virtual_network_id is set | string |
n/a | yes |
public_network_access_enabled | Allow public access for accessing workspace: [true|false] | bool |
true |
no |
public_subnet_name | The name of the Public Subnet within the Virtual Network. Required if virtual_network_id is set | string |
n/a | yes |
public_subnet_nsg_association_id | The resource ID of the azurerm_subnet_network_security_group_association resource which is referred to by the public_subnet_name field. Required if virtual_network_id is set | string |
n/a | yes |
resource_group | The name of the resource group in which to create the storage account | string |
n/a | yes |
sku | The sku to use for the Databricks Workspace: [standard|premium|trial] | string |
"premium" |
no |
storage_firewall_enabled | Boolean flag that determines whether public access is disallowed | bool |
false |
no |
tags | A mapping of tags to assign to the resource | map(string) |
{} |
no |
workspace_name | Databricks Workspace name | string |
n/a | yes |
Name | Description |
---|---|
access_connector_id | Databricks Access Connector's Id |
access_connector_identity | Databricks Access Connector's Identities list |
databricks_client_id_identity | The Client ID of the User Assigned Identity. |
databricks_disk_encryption_set_id | The ID of Managed Disk Encryption Set created by the Databricks Workspace. |
databricks_managed_storage_account_id | Azure Databricks Workspace Managed Storage Account ID. |
databricks_principal_id_identity | The Service Principal ID of the User Assigned Identity. |
id | Azure Databricks Resource ID |
sku | Azure Databricks Workspace SKU type |
workspace_id | Azure Databricks Workspace ID |
workspace_url | Azure Databricks Workspace URL |
Apache 2 Licensed. For more information please see LICENSE