Skip to content

Commit

Permalink
Destination Databricks: use hive_metastore if catalog not set (airbyt…
Browse files Browse the repository at this point in the history
…ehq#25366)

* use hive_metastore if catalog not set

* Automated Change

* add test case

* version bump + changelog

* auto-bump connector version

---------

Co-authored-by: edgao <edgao@users.noreply.github.com>
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
3 people authored and btkcodedev committed Apr 26, 2023
1 parent 75947fe commit 2ad5603
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 16 deletions.
Expand Up @@ -119,7 +119,7 @@
- name: Databricks Lakehouse
destinationDefinitionId: 072d5540-f236-4294-ba7c-ade8fd918496
dockerRepository: airbyte/destination-databricks
dockerImageTag: 1.0.1
dockerImageTag: 1.0.2
documentationUrl: https://docs.airbyte.com/integrations/destinations/databricks
icon: databricks.svg
releaseStage: alpha
Expand Down
Expand Up @@ -1820,7 +1820,7 @@
- "overwrite"
- "append"
- "append_dedup"
- dockerImage: "airbyte/destination-databricks:1.0.1"
- dockerImage: "airbyte/destination-databricks:1.0.2"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/destinations/databricks"
connectionSpecification:
Expand Down
Expand Up @@ -1698,7 +1698,7 @@
"destinationDefinitionId": "072d5540-f236-4294-ba7c-ade8fd918496",
"name": "Databricks Lakehouse",
"dockerRepository": "airbyte/destination-databricks",
"dockerImageTag": "1.0.1",
"dockerImageTag": "1.0.2",
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/databricks",
"icon": "databricks.svg",
"spec": {
Expand Down
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION destination-databricks

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.1
LABEL io.airbyte.version=1.0.2
LABEL io.airbyte.name=airbyte/destination-databricks
Expand Up @@ -6,7 +6,7 @@
"databricks_personal_access_token": "dapi0123456789abcdefghij0123456789AB",
"database_schema": "public",
"data_source": {
"data_source_type": "S3",
"data_source_type": "S3_STORAGE",
"s3_bucket_name": "required",
"s3_bucket_path": "required",
"s3_bucket_region": "required",
Expand Down
Expand Up @@ -26,6 +26,7 @@ public record DatabricksDestinationConfig(String serverHostname,
DatabricksStorageConfigProvider storageConfig) {
static final String DEFAULT_DATABRICKS_PORT = "443";
static final String DEFAULT_DATABASE_SCHEMA = "default";
static final String DEFAULT_CATALOG = "hive_metastore";
static final boolean DEFAULT_PURGE_STAGING_DATA = true;

public static DatabricksDestinationConfig get(final JsonNode config) {
Expand All @@ -38,7 +39,7 @@ public static DatabricksDestinationConfig get(final JsonNode config) {
config.get(DATABRICKS_HTTP_PATH_KEY).asText(),
config.has(DATABRICKS_PORT_KEY) ? config.get(DATABRICKS_PORT_KEY).asText() : DEFAULT_DATABRICKS_PORT,
config.get(DATABRICKS_PERSONAL_ACCESS_TOKEN_KEY).asText(),
config.has(DATABRICKS_CATALOG_KEY) ? config.get(DATABRICKS_CATALOG_KEY).asText() : null,
config.has(DATABRICKS_CATALOG_KEY) ? config.get(DATABRICKS_CATALOG_KEY).asText() : DEFAULT_CATALOG,
config.has(DATABRICKS_SCHEMA_KEY) ? config.get(DATABRICKS_SCHEMA_KEY).asText() : DEFAULT_DATABASE_SCHEMA,
config.has(DATABRICKS_PURGE_STAGING_DATA_KEY) ? config.get(DATABRICKS_PURGE_STAGING_DATA_KEY).asBoolean() : DEFAULT_PURGE_STAGING_DATA,
DatabricksStorageConfigProvider.getDatabricksStorageConfig(config.get(DATABRICKS_DATA_SOURCE_KEY)));
Expand Down
Expand Up @@ -9,6 +9,7 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.destination.databricks.azure.DatabricksAzureBlobStorageConfigProvider;
import io.airbyte.integrations.destination.databricks.s3.DatabricksS3StorageConfigProvider;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -83,4 +84,30 @@ public void testConfigCreationFromJsonAzure() {
assertEquals(DatabricksAzureBlobStorageConfigProvider.class, config2.storageConfig().getClass());
}

@Test
public void testDefaultCatalog() {
final DatabricksDestinationConfig databricksDestinationConfig = DatabricksDestinationConfig.get(Jsons.deserialize(
"""
{
"accept_terms": true,
"databricks_server_hostname": "abc-12345678-wxyz.cloud.databricks.com",
"databricks_http_path": "sql/protocolvx/o/1234567489/0000-1111111-abcd90",
"databricks_port": "443",
"databricks_personal_access_token": "dapi0123456789abcdefghij0123456789AB",
"database_schema": "public",
"data_source": {
"data_source_type": "S3_STORAGE",
"s3_bucket_name": "required",
"s3_bucket_path": "required",
"s3_bucket_region": "required",
"s3_access_key_id": "required",
"s3_secret_access_key": "required"
}
}
"""
));

assertEquals("hive_metastore", databricksDestinationConfig.catalog());
}

}
2 changes: 1 addition & 1 deletion connectors.md
Expand Up @@ -305,7 +305,7 @@
| **Convex** | <img alt="Convex icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/convex.svg" height="30" height="30"/> | Destination | airbyte/destination-convex:0.1.0 | alpha | [docs](https://docs.airbyte.io/integrations/destinations/convex) | [connectors/destination/convex](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/convex) | [destination-convex](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-convex) | <small>`3eb4d99c-11fa-4561-a259-fc88e0c2f8f4`</small> |
| **Cumul.io** | <img alt="Cumul.io icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/cumulio.svg" height="30" height="30"/> | Destination | airbyte/destination-cumulio:0.1.0 | alpha | [docs](https://docs.airbyte.com/integrations/destinations/cumulio) | [connectors/destination/cumulio](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/cumulio) | [destination-cumulio](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-cumulio) | <small>`e088acb6-9780-4568-880c-54c2dd7f431b`</small> |
| **Databend** | <img alt="Databend icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/databend.svg" height="30" height="30"/> | Destination | airbyte/destination-databend:0.1.2 | alpha | [docs](https://docs.airbyte.com/integrations/destinations/databend) | [connectors/destination/databend](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/databend) | [destination-databend](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-databend) | <small>`302e4d8e-08d3-4098-acd4-ac67ca365b88`</small> |
| **Databricks Lakehouse** | <img alt="Databricks Lakehouse icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/databricks.svg" height="30" height="30"/> | Destination | airbyte/destination-databricks:1.0.1 | alpha | [docs](https://docs.airbyte.com/integrations/destinations/databricks) | [connectors/destination/databricks](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/databricks) | [destination-databricks](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-databricks) | <small>`072d5540-f236-4294-ba7c-ade8fd918496`</small> |
| **Databricks Lakehouse** | <img alt="Databricks Lakehouse icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/databricks.svg" height="30" height="30"/> | Destination | airbyte/destination-databricks:1.0.2 | alpha | [docs](https://docs.airbyte.com/integrations/destinations/databricks) | [connectors/destination/databricks](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/databricks) | [destination-databricks](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-databricks) | <small>`072d5540-f236-4294-ba7c-ade8fd918496`</small> |
| **DuckDB** | <img alt="DuckDB icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/duckdb.svg" height="30" height="30"/> | Destination | airbyte/destination-duckdb:0.1.0 | alpha | [docs](https://docs.airbyte.io/integrations/destinations/duckdb) | [connectors/destination/duckdb](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/duckdb) | [destination-duckdb](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-duckdb) | <small>`94bd199c-2ff0-4aa2-b98e-17f0acb72610`</small> |
| **DynamoDB** | <img alt="DynamoDB icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/dynamodb.svg" height="30" height="30"/> | Destination | airbyte/destination-dynamodb:0.1.7 | alpha | [docs](https://docs.airbyte.com/integrations/destinations/dynamodb) | [connectors/destination/dynamodb](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/dynamodb) | [destination-dynamodb](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-dynamodb) | <small>`8ccd8909-4e99-4141-b48d-4984b70b2d89`</small> |
| **E2E Testing** | <img alt="E2E Testing icon" src="https://raw.githubusercontent.com/airbytehq/airbyte /master/airbyte-config-oss/init-oss/src/main/resources/icons/airbyte.svg" height="30" height="30"/> | Destination | airbyte/destination-e2e-test:0.2.4 | unknown | [docs](https://docs.airbyte.com/integrations/destinations/e2e-test) | [connectors/destination/e2e-test](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/destination/e2e-test) | [destination-e2e-test](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/destination-e2e-test) | <small>`2eb65e87-983a-4fd7-b3e3-9d9dc6eb8537`</small> |
Expand Down
19 changes: 10 additions & 9 deletions docs/integrations/destinations/databricks.md
Expand Up @@ -21,7 +21,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing
### 2. Create a metastore and attach it to workspace
> **_IMPORTANT:_** The metastore should be in the same region as the workspaces you want to use to access the data. Make sure that this matches the region of the cloud storage bucket you created earlier.
#### Setup storage bucket and IAM role in AWS
#### Setup storage bucket and IAM role in AWS
Follow [Configure a storage bucket and IAM role in AWS](https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws) to setup AWS bucket with necessary permissions.

#### Create metastore
Expand All @@ -47,7 +47,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing
![](../../.gitbook/assets/destination/databricks/databricks_open_worspace.png)

- Create SQL warehouse:

- ![](../../.gitbook/assets/destination/databricks/databricks_new_warehouse.png)
- Switch to SQL tab
- Click New button
Expand All @@ -66,7 +66,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing

![](../../.gitbook/assets/destination/databricks/databricks_sql_warehouse_connection_details.png)

> **_IMPORTANT:_** `Server hostname`, `Port`, `HTTP path` are used for Airbyte connection
> **_IMPORTANT:_** `Server hostname`, `Port`, `HTTP path` are used for Airbyte connection
### 5. Create Databricks Cluster
> **_TIP:_** If you use Databricks SQL Warehouse skip this step
Expand Down Expand Up @@ -102,7 +102,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing

![](../../.gitbook/assets/destination/databricks/dtabricks_token_user_new.png)

- In the new window put a comment (Optional) and lifetime:
- In the new window put a comment (Optional) and lifetime:

![](../../.gitbook/assets/destination/databricks/databricks_generate_token.png)

Expand All @@ -120,7 +120,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing

![](../../.gitbook/assets/destination/databricks/databricks_new_external_location.png)

> **_TIP:_** The new `Storage credential` can be added in the `Storage Credentials` tab or use same as for Metastore.
> **_TIP:_** The new `Storage credential` can be added in the `Storage Credentials` tab or use same as for Metastore.
## Airbyte Setup
### Databricks fields
Expand All @@ -136,7 +136,7 @@ You could choose a data source type
- Amazon S3 (External storage)
- Azure Blob Storage (External storage)

#### Managed tables data source type
#### Managed tables data source type

Please check Databricks documentation about [What is managed tables](https://docs.databricks.com/lakehouse/data-objects.html#what-is-a-managed-table)

Expand All @@ -145,13 +145,13 @@ Please check Databricks documentation about [What is managed tables](https://doc
#### Amazon S3 data source type (External storage)
> **_IMPORTANT:_** Make sure the `External Locations` has been added to the workspace. Check [Adding External Locations](#8-adding-external-locations-optional) step.
Provide your Amazon S3 data:
Provide your Amazon S3 data:
- `S3 Bucket Name` - The bucket name
- `S3 Bucket Path` - Subdirectory under the above bucket to sync the data into
- `S3 Bucket Region` - See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes.
> **_IMPORTANT:_** The metastore should be in the same region as the workspaces you want to use to access the data. Make sure that this matches the region of the cloud storage bucket you created earlier.
- `S3 Access Key ID` - Corresponding key to the above key id
- `S3 Secret Access Key` -
- `S3 Secret Access Key` -
- See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key.
- We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the bucket.
- `S3 Filename pattern` - The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't be recognized
Expand Down Expand Up @@ -261,7 +261,8 @@ Suppose you are interested in learning more about the Databricks connector or de

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|
| 1.0.1 | 2023-03-30 | [\#23965](https://github.com/airbytehq/airbyte/pull/24657) | Fix support for external tables on S3 |
| 1.0.2 | 2023-04-20 | [\#25366](https://github.com/airbytehq/airbyte/pull/25366) | Fix default catalog to be `hive_metastore` |
| 1.0.1 | 2023-03-30 | [\#24657](https://github.com/airbytehq/airbyte/pull/24657) | Fix support for external tables on S3 |
| 1.0.0 | 2023-03-21 | [\#23965](https://github.com/airbytehq/airbyte/pull/23965) | Added: Managed table storage type, Databricks Catalog field |
| 0.3.1 | 2022-10-15 | [\#18032](https://github.com/airbytehq/airbyte/pull/18032) | Add `SSL=1` to the JDBC URL to ensure SSL connection. |
| 0.3.0 | 2022-10-14 | [\#15329](https://github.com/airbytehq/airbyte/pull/15329) | Add support for Azure storage. |
Expand Down

0 comments on commit 2ad5603

Please sign in to comment.