Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add quoting support #65

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 0 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,25 +191,6 @@ Due to the nature of AWS Athena, not all core dbt functionality is supported.
The following features of dbt are not implemented on Athena:
* Snapshots

#### Known issues

* Quoting is not currently supported
* If you need to quote your sources, escape the quote characters in your source definitions:

```yaml
version: 2

sources:
- name: my_source
tables:
- name: first_table
identifier: "first table" # Not like that
- name: second_table
identifier: "\"second table\"" # Like this
```

* Tables, schemas and database should only be lowercase

### Contributing

This connector works with Python from 3.7 to 3.10.
Expand Down
17 changes: 16 additions & 1 deletion dbt/adapters/athena/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,24 @@ class AthenaIncludePolicy(Policy):

@dataclass(frozen=True, eq=False, repr=False)
class AthenaRelation(BaseRelation):
quote_character: str = ""
quote_character: str = '"' # Presto quote character
include_policy: Policy = AthenaIncludePolicy()

def render_hive(self):
cstruct marked this conversation as resolved.
Show resolved Hide resolved
"""
Render relation with Hive format. Athena uses Hive format for some DDL statements.

See:
- https://aws.amazon.com/athena/faqs/ "Q: How do I create tables and schemas for my data on Amazon S3?"
- https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
"""

old_value = self.quote_character
object.__setattr__(self, "quote_character", "`") # Hive quote char
rendered = self.render()
object.__setattr__(self, "quote_character", old_value)
return rendered


class AthenaSchemaSearchMap(Dict[InformationSchema, Dict[str, Set[Optional[str]]]]):
"""A utility class to keep track of what information_schema tables to
Expand Down
5 changes: 4 additions & 1 deletion dbt/include/athena/macros/adapters/relation.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
{%- do adapter.clean_up_table(relation.schema, relation.table) -%}
{% endif %}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }}
drop {{ relation.type }} if exists
{%- if relation.type == 'table' %} {{ relation.render_hive() -}}
{%- else %} {{ relation -}}
{%- endif -%}
{%- endcall %}
{% endmacro %}
12 changes: 12 additions & 0 deletions dbt/include/athena/macros/adapters/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier().render_hive() }}
{% endcall %}
{% endmacro %}


{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier().render_hive() }} cascade
{% endcall %}
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
{%- set format = config.get('format', default=default_value) -%}

{% call statement('set_table_classification', auto_begin=False) -%}
alter table {{ relation }} set tblproperties ('classification' = '{{ format }}')
alter table {{ relation.render_hive() }} set tblproperties ('classification' = '{{ format }}')
{%- endcall %}
{%- endmacro %}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% macro drop_iceberg(relation) -%}
drop table if exists {{ relation }}
drop table if exists {{ relation.render_hive() }}
{% endmacro %}

{% macro create_table_iceberg(relation, old_relation, tmp_relation, sql) -%}
Expand Down Expand Up @@ -77,7 +77,7 @@

{%- set dest_columns_with_type_csv = dest_columns_with_type | join(', ') -%}

CREATE TABLE {{ relation }} (
CREATE TABLE {{ relation.render_hive() }} (
{{ dest_columns_with_type_csv }}
)
{%- if partitioned_by is not none %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
{%- set s3_data_naming = model['config'].get('s3_data_naming', target.s3_data_naming) -%}

{% set sql %}
create external table {{ this.render() }} (
create external table {{ this.render_hive() }} (
{%- for col_name in agate_table.column_names -%}
{%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}
{%- set type = column_override.get(col_name, inferred_type) -%}
Expand Down
1 change: 1 addition & 0 deletions tests/unit/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
DATABASE_NAME = "test_dbt_athena"
BUCKET = "test-dbt-athena-test-delete-partitions"
AWS_REGION = "eu-west-1"
TABLE_NAME = "test-table"
22 changes: 22 additions & 0 deletions tests/unit/test_relation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from dbt.adapters.athena.relation import AthenaRelation

from .constants import DATA_CATALOG_NAME, DATABASE_NAME, TABLE_NAME


class TestAthenaRelation:
def test_render_hive_uses_hive_style_quotation(self):
relation = AthenaRelation.create(
identifier=TABLE_NAME,
database=DATA_CATALOG_NAME,
schema=DATABASE_NAME,
)
assert relation.render_hive() == f"`{DATABASE_NAME}`.`{TABLE_NAME}`"

def test_render_hive_resets_quote_character_after_call(self):
relation = AthenaRelation.create(
identifier=TABLE_NAME,
database=DATA_CATALOG_NAME,
schema=DATABASE_NAME,
)
relation.render_hive()
assert relation.render() == f'"{DATABASE_NAME}"."{TABLE_NAME}"'