Skip to content

Commit

Permalink
Truncate Wasb storage account name if it's more than 24 characters (#…
Browse files Browse the repository at this point in the history
…33851)

* Truncate Wasb storage account name if it's more than 24 characters

Storage account names must be between 3 and 24 characters in length but for some
reasons that I can't explain, we saw a situation where the storage name is more than 24 characters
and had to be truncated before it could work. Maybe it was possible in the past to have more than 24 characters or
it could come from cluster but whichever way, the solution that worked was truncating the account name to
24 characters.

* Apply suggestions from code review

Co-authored-by: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com>

* Also add the change to the async part

---------

Co-authored-by: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com>
  • Loading branch information
ephraimbuddy and jedcunningham committed Aug 29, 2023
1 parent 6c943ca commit 9e2d607
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 10 deletions.
32 changes: 24 additions & 8 deletions airflow/providers/microsoft/azure/hooks/wasb.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,18 @@ def get_conn(self) -> BlobServiceClient:
account_url = conn.host if conn.host else f"https://{conn.login}.blob.core.windows.net/"
parsed_url = urlparse(account_url)

if not parsed_url.netloc and "." not in parsed_url.path:
# if there's no netloc and no dots in the path, then user only
# provided the Active Directory ID, not the full URL or DNS name
account_url = f"https://{conn.login}.blob.core.windows.net/"
if not parsed_url.netloc:
if "." not in parsed_url.path:
# if there's no netloc and no dots in the path, then user only
# provided the Active Directory ID, not the full URL or DNS name
account_url = f"https://{conn.login}.blob.core.windows.net/"
else:
# if there's no netloc but there are dots in the path, then user
# provided the DNS name without the https:// prefix.
# Azure storage account name can only be 3 to 24 characters in length
# https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
acc_name = account_url.split(".")[0][:24]
account_url = f"https://{acc_name}." + ".".join(account_url.split(".")[1:])

tenant = self._get_field(extra, "tenant_id")
if tenant:
Expand Down Expand Up @@ -568,10 +576,18 @@ async def get_async_conn(self) -> AsyncBlobServiceClient:
account_url = conn.host if conn.host else f"https://{conn.login}.blob.core.windows.net/"
parsed_url = urlparse(account_url)

if not parsed_url.netloc and "." not in parsed_url.path:
# if there's no netloc and no dots in the path, then user only
# provided the Active Directory ID, not the full URL or DNS name
account_url = f"https://{conn.login}.blob.core.windows.net/"
if not parsed_url.netloc:
if "." not in parsed_url.path:
# if there's no netloc and no dots in the path, then user only
# provided the Active Directory ID, not the full URL or DNS name
account_url = f"https://{conn.login}.blob.core.windows.net/"
else:
# if there's no netloc but there are dots in the path, then user
# provided the DNS name without the https:// prefix.
# Azure storage account name can only be 3 to 24 characters in length
# https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name
acc_name = account_url.split(".")[0][:24]
account_url = f"https://{acc_name}." + ".".join(account_url.split(".")[1:])

tenant = self._get_field(extra, "tenant_id")
if tenant:
Expand Down
8 changes: 6 additions & 2 deletions tests/providers/microsoft/azure/hooks/test_wasb.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,12 @@ def test_extra_client_secret_auth_config_ad_connection(self):
"https://testaccountname.blob.core.windows.net",
),
("testhost", "https://accountlogin.blob.core.windows.net/"),
("testhost.dns", "testhost.dns"),
("testhost.blob.net", "testhost.blob.net"),
("testhost.dns", "https://testhost.dns"),
("testhost.blob.net", "https://testhost.blob.net"),
(
"testhostakjhdisdfbearioyo.blob.core.windows.net",
"https://testhostakjhdisdfbearioy.blob.core.windows.net",
), # more than 24 characters
],
)
def test_proper_account_url_update(
Expand Down

0 comments on commit 9e2d607

Please sign in to comment.