Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions awswrangler/catalog/_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ def create_database(
name: str,
description: Optional[str] = None,
catalog_id: Optional[str] = None,
exist_ok: bool = False,
boto3_session: Optional[boto3.Session] = None,
) -> None:
"""Create a database in AWS Glue Catalog.
Expand All @@ -486,6 +487,9 @@ def create_database(
catalog_id : str, optional
The ID of the Data Catalog from which to retrieve Databases.
If none is provided, the AWS account ID is used by default.
exist_ok : bool
If set to True will not raise an Exception if a Database with the same already exists.
In this case the description will be updated if it is different from the current one.
boto3_session : boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

Expand All @@ -501,16 +505,19 @@ def create_database(
... name='awswrangler_test'
... )
"""
args: Dict[str, str] = {}
client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
args["Name"] = name
args: Dict[str, str] = {"Name": name}
if description is not None:
args["Description"] = description

if catalog_id is not None:
Copy link

@bschaatsbergen bschaatsbergen May 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this conditional removed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By using https://github.com/awslabs/aws-data-wrangler/blob/main/awswrangler/catalog/_utils.py#L17 in the create_database call, the catalog id is used for the creation if it is given but not used otherwise. Thereby the check is not required but included directly in the create call.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I figured so but wanted to double check with you. Thanks @maxispeicher :)

client_glue.create_database(CatalogId=catalog_id, DatabaseInput=args)
else:
client_glue.create_database(DatabaseInput=args)
try:
r = client_glue.get_database(Name=name)
if not exist_ok:
raise exceptions.AlreadyExists(f"Database {name} already exists and <exist_ok> is set to False.")
if description and description != r["Database"].get("Description", ""):
client_glue.update_database(**_catalog_id(catalog_id=catalog_id, Name=name, DatabaseInput=args))
except client_glue.exceptions.EntityNotFoundException:
client_glue.create_database(**_catalog_id(catalog_id=catalog_id, DatabaseInput=args))


@apply_configs
Expand Down
4 changes: 4 additions & 0 deletions awswrangler/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,7 @@ class InvalidFile(Exception):

class FailedQualityCheck(Exception):
"""FailedQualityCheck."""


class AlreadyExists(Exception):
"""AlreadyExists."""
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ def redshift_external_schema(cloudformation_outputs, databases_parameters, glue_
return "aws_data_wrangler_external"


@pytest.fixture(scope="session")
def account_id():
return boto3.client("sts").get_caller_identity().get("Account")


@pytest.fixture(scope="function")
def glue_ctas_database():
name = f"db_{get_time_str_with_random_suffix()}"
Expand Down Expand Up @@ -282,3 +287,10 @@ def assert_filename_prefix(filename, filename_prefix, test_prefix):
assert not filename.startswith(test_prefix)

return assert_filename_prefix


@pytest.fixture(scope="function")
def random_glue_database():
database_name = get_time_str_with_random_suffix()
yield database_name
wr.catalog.delete_database(database_name)
Loading