Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fs] Drop support for the deprecated hail-az scheme #14293

Merged
merged 1 commit into from Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
44 changes: 12 additions & 32 deletions hail/python/hailtop/aiocloud/aioazure/fs.py
Expand Up @@ -2,7 +2,6 @@
from types import TracebackType

import aiohttp
import abc
import re
import os
import asyncio
Expand Down Expand Up @@ -304,6 +303,10 @@ def __init__(self, account: str, container: str, path: str, query: Optional[str]
self._path = path
self._query = query

@property
def scheme(self) -> str:
return 'https'

def __repr__(self):
return f'AzureAsyncFSURL({self._account}, {self._container}, {self._path}, {self._query})'

Expand All @@ -328,9 +331,8 @@ def query(self) -> Optional[str]:
return self._query

@property
@abc.abstractmethod
def base(self) -> str:
pass
return f'https://{self._account}.blob.core.windows.net/{self._container}/{self._path}'

def with_path(self, path) -> 'AzureAsyncFSURL':
return self.__class__(self._account, self._container, path, self._query)
Expand All @@ -342,26 +344,6 @@ def __str__(self) -> str:
return self.base if not self._query else f'{self.base}?{self._query}'


class AzureAsyncFSHailAzURL(AzureAsyncFSURL):
@property
def scheme(self) -> str:
return 'hail-az'

@property
def base(self) -> str:
return f'hail-az://{self._account}/{self._container}/{self._path}'


class AzureAsyncFSHttpsURL(AzureAsyncFSURL):
@property
def scheme(self) -> str:
return 'https'

@property
def base(self) -> str:
return f'https://{self._account}.blob.core.windows.net/{self._container}/{self._path}'


# ABS errors if you attempt credentialed access for a public container,
# so we try once with credentials, if that fails use anonymous access for
# that container going forward.
Expand Down Expand Up @@ -416,7 +398,7 @@ def __init__(

@staticmethod
def schemes() -> Set[str]:
return {'hail-az', 'https'}
return {'https'}

@staticmethod
def valid_url(url: str) -> bool:
Expand All @@ -427,7 +409,7 @@ def valid_url(url: str) -> bool:
return False
_, suffix = authority.split('.', maxsplit=1)
return suffix == 'blob.core.windows.net'
return url.startswith('hail-az://')
return False

async def generate_sas_token(
self,
Expand Down Expand Up @@ -466,7 +448,7 @@ def _parse_url(url: str) -> AzureAsyncFSURL:

scheme = url[:colon_index]
if scheme not in AzureAsyncFS.schemes():
raise ValueError(f'invalid scheme, expected hail-az or https: {scheme}')
raise ValueError(f'invalid scheme, expected https: {scheme}')

rest = url[(colon_index + 1) :]
if not rest.startswith('//'):
Expand All @@ -478,7 +460,9 @@ def _parse_url(url: str) -> AzureAsyncFSURL:

match = AzureAsyncFS.PATH_REGEX.fullmatch(container_and_name)
if match is None:
raise ValueError(f'invalid path name, expected hail-az://account/container/blob_name: {container_and_name}')
raise ValueError(
f'invalid path name, expected https://account.blob.core.windows.net/container/blob_name: {container_and_name}'
)

container = match.groupdict()['container']

Expand All @@ -489,14 +473,10 @@ def _parse_url(url: str) -> AzureAsyncFSURL:

name, token = AzureAsyncFS.get_name_parts(name)

if scheme == 'hail-az':
account = authority
return AzureAsyncFSHailAzURL(account, container, name, token)

assert scheme == 'https'
assert len(authority) > len('.blob.core.windows.net')
account = authority[: -len('.blob.core.windows.net')]
return AzureAsyncFSHttpsURL(account, container, name, token)
return AzureAsyncFSURL(account, container, name, token)

@staticmethod
def get_name_parts(name: str) -> Tuple[str, str]:
Expand Down
2 changes: 1 addition & 1 deletion hail/python/hailtop/config/user_config.py
Expand Up @@ -138,7 +138,7 @@ def get_remote_tmpdir(
)
remote_tmpdir = f'gs://{bucket}/batch'
else:
schemes = {'gs', 'hail-az', 'https'}
schemes = {'gs', 'https'}
found_scheme = any(remote_tmpdir.startswith(f'{scheme}://') for scheme in schemes)
if not found_scheme:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion hail/python/test/hail/fs/test_worker_driver_fs.py
Expand Up @@ -151,7 +151,7 @@ def test_requester_pays_with_project_more_than_one_partition():
@run_if_azure
@fails_local_backend
def test_can_access_public_blobs():
public_mt = 'hail-az://azureopendatastorage/gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt'
public_mt = 'https://azureopendatastorage.blob.core.windows.net/gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt'
assert hl.hadoop_exists(public_mt)
with hl.hadoop_open(f'{public_mt}/README.txt') as readme:
assert len(readme.read()) > 0
Expand Down
62 changes: 7 additions & 55 deletions hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala
Expand Up @@ -28,7 +28,7 @@ import java.time.Duration
import org.json4s.Formats
import org.json4s.jackson.JsonMethods

abstract class AzureStorageFSURL(
class AzureStorageFSURL(
val account: String,
val container: String,
val path: String,
Expand All @@ -41,10 +41,13 @@ abstract class AzureStorageFSURL(
else
withPath(s"$path/$c")

def withPath(newPath: String): AzureStorageFSURL
def fromString(s: String): AzureStorageFSURL = AzureStorageFS.parseUrl(s)

def prefix: String
def withPath(newPath: String): AzureStorageFSURL =
new AzureStorageFSURL(account, container, newPath, sasToken)

def prefix: String = s"https://$account.blob.core.windows.net/$container"

def getPath: String = path

override def toString(): String = {
Expand All @@ -55,73 +58,22 @@ abstract class AzureStorageFSURL(
}
}

class AzureStorageFSHailAzURL(
account: String,
container: String,
path: String,
sasToken: Option[String],
) extends AzureStorageFSURL(account, container, path, sasToken) {

override def withPath(newPath: String): AzureStorageFSHailAzURL =
new AzureStorageFSHailAzURL(account, container, newPath, sasToken)

override def prefix: String = s"hail-az://$account/$container"
}

class AzureStorageFSHttpsURL(
account: String,
container: String,
path: String,
sasToken: Option[String],
) extends AzureStorageFSURL(account, container, path, sasToken) {

override def withPath(newPath: String): AzureStorageFSHttpsURL =
new AzureStorageFSHttpsURL(account, container, newPath, sasToken)

override def prefix: String = s"https://$account.blob.core.windows.net/$container"
}

object AzureStorageFS {
private val HAIL_AZ_URI_REGEX = "^hail-az:\\/\\/([a-z0-9_\\-\\.]+)\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r

private val AZURE_HTTPS_URI_REGEX =
"^https:\\/\\/([a-z0-9_\\-\\.]+)\\.blob\\.core\\.windows\\.net\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r

def parseUrl(filename: String): AzureStorageFSURL = {
val scheme = filename.split(":")(0)
if (scheme == "hail-az") {
parseHailAzUrl(filename)
} else if (scheme == "https") {
parseHttpsUrl(filename)
} else {
throw new IllegalArgumentException(s"Invalid scheme, expected hail-az or https: $scheme")
}
}

private[this] def parseHttpsUrl(filename: String): AzureStorageFSHttpsURL = {
AZURE_HTTPS_URI_REGEX
.findFirstMatchIn(filename)
.map { m =>
val (path, sasToken) = parsePathAndQuery(m.group(3))
new AzureStorageFSHttpsURL(m.group(1), m.group(2), path, sasToken)
new AzureStorageFSURL(m.group(1), m.group(2), path, sasToken)
}
.getOrElse(throw new IllegalArgumentException(
"ABS URI must be of the form https://<ACCOUNT>.blob.core.windows.net/<CONTAINER>/<PATH>"
))
}

private[this] def parseHailAzUrl(filename: String): AzureStorageFSHailAzURL = {
HAIL_AZ_URI_REGEX
.findFirstMatchIn(filename)
.map { m =>
val (path, sasToken) = parsePathAndQuery(m.group(3))
new AzureStorageFSHailAzURL(m.group(1), m.group(2), path, sasToken)
}
.getOrElse(throw new IllegalArgumentException(
"hail-az URI must be of the form hail-az://<ACCOUNT>/<CONTAINER>/<PATH>"
))
}

private[this] def parsePathAndQuery(maybeNullPath: String): (String, Option[String]) = {
val pathAndMaybeQuery = Paths.get(if (maybeNullPath == null) ""
else maybeNullPath.stripPrefix("/")).normalize.toString
Expand Down