Skip to content

Commit

Permalink
Add Azure MSI auth documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
nfx committed Oct 1, 2021
1 parent b1a6c45 commit a1ef8d6
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 0.3.8

* Added `databricks_repo` resource to manage [Databricks Repos](https://docs.databricks.com/repos.html) ([#771](https://github.com/databrickslabs/terraform-provider-databricks/pull/771))
* Added support for Azure MSI authentication ([#743](https://github.com/databrickslabs/terraform-provider-databricks/pull/743))
* Already deleted `databricks_token` don't fail the apply ([#808](https://github.com/databrickslabs/terraform-provider-databricks/pull/808))
* Multiple documentation improvements

Expand Down
5 changes: 4 additions & 1 deletion common/azure_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,12 @@ func (aa *DatabricksClient) configureWithAzureManagedIdentity(ctx context.Contex
if !aa.IsAzure() {
return nil, nil
}
if !adal.MSIAvailable(ctx, aa.httpClient.HTTPClient) {
if !aa.AzureUseMSI {
return nil, nil
}
if !adal.MSIAvailable(ctx, aa.httpClient.HTTPClient) {
return nil, fmt.Errorf("managed identity is not available")
}
log.Printf("[INFO] Using Azure Managed Identity authentication")
return aa.simpleAADRequestVisitor(ctx, func(resource string) (autorest.Authorizer, error) {
return auth.MSIConfig{
Expand Down
8 changes: 6 additions & 2 deletions common/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ type DatabricksClient struct {
// Deprecated - to be removed in v0.4.0
AzureUsePATForSPN bool `name:"azure_use_pat_for_spn"`

// Use Azure Managed Service Identity authentication
AzureUseMSI bool `name:"azure_use_msi" env:"ARM_USE_MSI" auth:"azure"`

AzureClientSecret string `name:"azure_client_secret" env:"DATABRICKS_AZURE_CLIENT_SECRET,ARM_CLIENT_SECRET" auth:"azure"`
AzureClientID string `name:"azure_client_id" env:"DATABRICKS_AZURE_CLIENT_ID,ARM_CLIENT_ID" auth:"azure"`
AzureTenantID string `name:"azure_tenant_id" env:"DATABRICKS_AZURE_TENANT_ID,ARM_TENANT_ID" auth:"azure"`
Expand Down Expand Up @@ -216,8 +219,8 @@ func (c *DatabricksClient) Authenticate(ctx context.Context) error {
authorizers := []func(context.Context) (func(*http.Request) error, error){
c.configureWithDirectParams,
c.configureWithAzureClientSecret,
c.configureWithAzureCLI,
c.configureWithAzureManagedIdentity,
c.configureWithAzureCLI,
c.configureWithGoogleForAccountsAPI,
c.configureWithGoogleForWorkspace,
c.configureWithDatabricksCfg,
Expand All @@ -241,6 +244,7 @@ func (c *DatabricksClient) Authenticate(ctx context.Context) error {
func (c *DatabricksClient) niceError(message string) error {
info := ""
if len(c.configAttributesUsed) > 0 {
// TODO: first show env vars and filter out the attrs after
info = fmt.Sprintf(" Attributes used: %s", strings.Join(c.configAttributesUsed, ", "))
envVars := envVariablesUsed()
if envVars != "" {
Expand Down Expand Up @@ -386,7 +390,7 @@ func (c *DatabricksClient) configureHTTPCLient() {

// IsAzure returns true if client is configured for Azure Databricks - either by using AAD auth or with host+token combination
func (c *DatabricksClient) IsAzure() bool {
return c.resourceID() != "" || strings.Contains(c.Host, ".azuredatabricks.net")
return c.resourceID() != "" || strings.Contains(c.Host, ".azuredatabricks.net") || c.AzureUseMSI
}

// IsAws returns true if client is configured for AWS
Expand Down
2 changes: 1 addition & 1 deletion common/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func TestDatabricksClient_FormatURL(t *testing.T) {

func TestClientAttributes(t *testing.T) {
ca := ClientAttributes()
assert.Len(t, ca, 24)
assert.Len(t, ca, 25)
}

func TestEnvVarsUsed(t *testing.T) {
Expand Down
62 changes: 39 additions & 23 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ There are currently three supported methods to [authenticate](https://docs.datab

* [PAT Tokens](https://docs.databricks.com/dev-tools/api/latest/authentication.html)
* Username and password pair
* Azure Active Directory Tokens via [Azure CLI](#authenticating-with-azure-cli) or [Service Principals](#authenticating-with-azure-service-principal)
* Azure Active Directory Tokens via [Azure CLI](#authenticating-with-azure-cli), [Service Principals](#authenticating-with-azure-service-principal), or [Managed Service Identities](#authenticating-with-azure-msi)

### Authenticating with Databricks CLI credentials

Expand Down Expand Up @@ -177,18 +177,28 @@ Alternatively, you can provide this value as an environment variable `DATABRICKS

## Special configurations for Azure

The provider works with [Azure CLI authentication](https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli?view=azure-cli-latest) to facilitate local development workflows, though for automated scenarios a service principal auth is necessary (and specification of `azure_client_id`, `azure_client_secret` and `azure_tenant_id` parameters).
The provider works with [Azure CLI authentication](https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli?view=azure-cli-latest) to facilitate local development workflows, though for automated scenarios a service principal auth is necessary (and specification of `azure_use_msi`, `azure_client_id`, `azure_client_secret` and `azure_tenant_id` parameters).

### Authenticating with Azure Service Principal
### Authenticating with Azure MSI

!> **Warning** Please note that the azure service principal authentication currently (since v0.3.7) uses the AAD token for the authentication (SPN should have **Contributor** role on Databricks workspace). You can restore previous functionality (generating the PAT for service principal) by setting `azure_use_pat_for_spn` to `true` (you can regulate the lifetime of generated PAT with `pat_token_duration_seconds` setting). Azure Databricks does not yet support AAD tokens for [secret scopes](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/secrets#--create-secret-scope). Databricks Labs team will refactor it transparently once that support is available. The only impacted field is `pat_token_duration_seconds`, which will be deprecated and fully supported after AAD support.
Since v0.3.8, it's possible to leverage [Azure Managed Service Identity](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/guides/managed_service_identity) authentication, which is using the same environment variables as `azurerm` provider. Both `SystemAssigned` and `UserAssigned` identities work, as long as they have `Contributor` role on subscription level and created the workspace resource, or directly added to workspace through [databricks_service_principal](resources/service_principal.md).

```hcl
provider "databricks" {
host = data.azurerm_databricks_workspace.this.workspace_url
# ARM_USE_MSI environment variable is recommended
azure_use_msi = true
}
```

### Authenticating with Azure CLI

It's possible to use [Azure CLI](https://docs.microsoft.com/cli/azure/) authentication, where the provider would rely on access token cached by `az login` command so that local development scenarios are possible. Technically, the provider will call `az account get-access-token` each time before an access token is about to expire.

```hcl
provider "azurerm" {
client_id = var.client_id
client_secret = var.client_secret
tenant_id = var.tenant_id
subscription_id = var.subscription_id
features {}
}
resource "azurerm_databricks_workspace" "this" {
Expand All @@ -199,24 +209,25 @@ resource "azurerm_databricks_workspace" "this" {
}
provider "databricks" {
host = azurerm_databricks_workspace.this.workspace_url
azure_client_id = var.client_id
azure_client_secret = var.client_secret
azure_tenant_id = var.tenant_id
host = azurerm_databricks_workspace.this.workspace_url
}
resource "databricks_user" "my-user" {
user_name = "test-user@databricks.com"
user_name = "test-user@databricks.com"
display_name = "Test User"
}
```

### Authenticating with Azure CLI
### Authenticating with Azure Service Principal

It's possible to use [Azure CLI](https://docs.microsoft.com/cli/azure/) authentication, where the provider would rely on access token cached by `az login` command so that local development scenarios are possible. Technically, the provider will call `az account get-access-token` each time before an access token is about to expire.
!> **Warning** Please note that the azure service principal authentication currently (since v0.3.7) uses the AAD token for the authentication (SPN should have **Contributor** role on Databricks workspace). You can restore previous functionality (generating the PAT for service principal) by setting `azure_use_pat_for_spn` to `true` (you can regulate the lifetime of generated PAT with `pat_token_duration_seconds` setting). Azure Databricks does not yet support AAD tokens for [secret scopes](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/secrets#--create-secret-scope). Databricks Labs team will refactor it transparently once that support is available. The only impacted field is `pat_token_duration_seconds`, which will be deprecated and fully supported after AAD support.

```hcl
provider "azurerm" {
features {}
client_id = var.client_id
client_secret = var.client_secret
tenant_id = var.tenant_id
subscription_id = var.subscription_id
}
resource "azurerm_databricks_workspace" "this" {
Expand All @@ -227,12 +238,14 @@ resource "azurerm_databricks_workspace" "this" {
}
provider "databricks" {
host = azurerm_databricks_workspace.this.workspace_url
host = azurerm_databricks_workspace.this.workspace_url
azure_client_id = var.client_id
azure_client_secret = var.client_secret
azure_tenant_id = var.tenant_id
}
resource "databricks_user" "my-user" {
user_name = "test-user@databricks.com"
display_name = "Test User"
user_name = "test-user@databricks.com"
}
```

Expand All @@ -245,6 +258,7 @@ resource "databricks_user" "my-user" {
* `azure_tenant_id` - (optional) This is the Azure Active Directory Tenant id in which the Enterprise Application (Service Principal)
resides. Alternatively, you can provide this value as an environment variable `DATABRICKS_AZURE_TENANT_ID` or `ARM_TENANT_ID`.
* `azure_environment` - (optional) This is the Azure Environment which defaults to the `public` cloud. Other options are `german`, `china` and `usgovernment`. Alternatively, you can provide this value as an environment variable `ARM_ENVIRONMENT`.
* `azure_use_msi` - (optional) Use [Azure Managed Service Identity](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/guides/managed_service_identity) authentication. Alternatively, you can provide this value as an environment variable `ARM_USE_MSI`.
* `pat_token_duration_seconds` - The current implementation of the azure auth via sp requires the provider to create a temporary personal access token within Databricks. The current AAD implementation does not cover all the APIs for Authentication. This field determines the duration in which that temporary PAT token is alive. It is measured in seconds and will default to `3600` seconds. **Deprecated since v0.3.8**.

There are multiple environment variable options, the `DATABRICKS_AZURE_*` environment variables take precedence, and the `ARM_*` environment variables provide a way to share authentication configuration using the `databricks` provider alongside the `azurerm` provider.
Expand Down Expand Up @@ -274,6 +288,7 @@ The following configuration attributes can be passed via environment variables:
| `azure_client_secret` | `ARM_CLIENT_SECRET` |
| `azure_client_id` | `ARM_CLIENT_ID` |
| `azure_tenant_id` | `ARM_TENANT_ID` |
| `azure_use_msi` | `ARM_USE_MSI` |
| `azure_environment` | `ARM_ENVIRONMENT` |
| `debug_truncate_bytes` | `DATABRICKS_DEBUG_TRUNCATE_BYTES` |
| `debug_headers` | `DATABRICKS_DEBUG_HEADERS` |
Expand All @@ -293,10 +308,11 @@ provider "databricks" {}
3. Will check for the presence of `host` + `token` pair, continue trying otherwise.
4. Will check for `host` + `username` + `password` presence, continue trying otherwise.
5. Will check for Azure workspace ID, `azure_client_secret` + `azure_client_id` + `azure_tenant_id` presence, continue trying otherwise.
6. Will check for Azure workspace ID presence, and if `AZ CLI` returns an access token, continue trying otherwise.
7. Will check for the `~/.databrickscfg` file in the home directory, will fail otherwise.
8. Will check for `profile` presence and try picking from that file will fail otherwise.
9. Will check for `host` and `token` or `username`+`password` combination, will fail if nothing of these exist.
6. Will check for availability of Azure MSI, if enabled via `azure_use_msi`, continue trying otherwise.
7. Will check for Azure workspace ID presence, and if `AZ CLI` returns an access token, continue trying otherwise.
8. Will check for the `~/.databrickscfg` file in the home directory, will fail otherwise.
9. Will check for `profile` presence and try picking from that file will fail otherwise.
10. Will check for `host` and `token` or `username`+`password` combination, will fail if nothing of these exist.

## Data resources and Authentication is not configured errors

Expand Down
1 change: 1 addition & 0 deletions identity/resource_service_principal.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func ResourceServicePrincipal() *schema.Resource {
}
client := c.(*common.DatabricksClient)
if client.IsAzure() && sp.ApplicationID == "" {
// TODO: verify cases for non-existing resources
return fmt.Errorf("application_id is required for service principals in Azure Databricks")
}
if client.IsAws() && sp.DisplayName == "" {
Expand Down
3 changes: 2 additions & 1 deletion provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ func providerSchema() map[string]*schema.Schema {
ps["azure_client_secret"].Sensitive = true

azCoordinatesDeprecation := "`%s` is deprecated and would be removed in v0.4.0. Please rewrite provider configuration " +
"with `host = data.azurerm_databricks_workspace.example.workspace_url` to achieve the same effect. Please check " +
"with `host = data.azurerm_databricks_workspace.example.workspace_url` to achieve the same effect. " +
"ARM_* environment variables would continue to be used as they're used by `azurerm` provider. See " +
"https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/databricks_workspace#workspace_url for details"
ps["azure_workspace_name"].Deprecated = fmt.Sprintf(azCoordinatesDeprecation, "azure_workspace_name")
ps["azure_resource_group"].Deprecated = fmt.Sprintf(azCoordinatesDeprecation, "azure_resource_group")
Expand Down

0 comments on commit a1ef8d6

Please sign in to comment.