Skip to content

Commit

Permalink
Allow configuring bucket location (#455)
Browse files Browse the repository at this point in the history
* allow to configure bucket location

* add docstring about location

* change location default to None

Co-authored-by: Martin Durant <martin.durant@alumni.utoronto.ca>
  • Loading branch information
janjagusch and martindurant committed Mar 22, 2022
1 parent 9119d3d commit 07e99a8
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 7 deletions.
26 changes: 23 additions & 3 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ class GCSFileSystem(AsyncFileSystem):
path part) for communication. If not given, defaults to the value
of environment variable "STORAGE_EMULATOR_HOST"; if that is not set
either, will use the standard Google endpoint.
default_location: str
Default location where buckets are created, like 'US' or 'EUROPE-WEST3'.
You can find a list of all available locations here:
https://cloud.google.com/storage/docs/locations#available-locations
"""

scopes = {"read_only", "read_write", "full_control"}
Expand All @@ -241,6 +245,7 @@ def __init__(
loop=None,
timeout=None,
endpoint_url=None,
default_location=None,
**kwargs,
):
super().__init__(
Expand All @@ -264,6 +269,7 @@ def __init__(
self._session = None
self._endpoint = endpoint_url
self.session_kwargs = session_kwargs or {}
self.default_location = default_location

if check_connection:
warnings.warn(
Expand Down Expand Up @@ -570,7 +576,8 @@ async def _list_buckets(self):
next_page_token = page.get("nextPageToken", None)

buckets = [
{"name": i["name"] + "/", "size": 0, "type": "directory"} for i in items
{**i, "name": i["name"] + "/", "size": 0, "type": "directory"}
for i in items
]
self.dircache[""] = buckets
return buckets
Expand All @@ -597,7 +604,11 @@ def invalidate_cache(self, path=None):
path = self._parent(path)

async def _mkdir(
self, bucket, acl="projectPrivate", default_acl="bucketOwnerFullControl"
self,
bucket,
acl="projectPrivate",
default_acl="bucketOwnerFullControl",
location=None,
):
"""
New bucket
Expand All @@ -611,18 +622,27 @@ async def _mkdir(
access for the bucket itself
default_acl: str, one of ACLs
default ACL for objects created in this bucket
location: Optional[str]
Location where buckets are created, like 'US' or 'EUROPE-WEST3'.
If not provided, defaults to `self.default_location`.
You can find a list of all available locations here:
https://cloud.google.com/storage/docs/locations#available-locations
"""
if bucket in ["", "/"]:
raise ValueError("Cannot create root bucket")
if "/" in bucket:
return
json_data = {"name": bucket}
location = location or self.default_location
if location:
json_data["location"] = location
await self._call(
method="POST",
path="b",
predefinedAcl=acl,
project=self.project,
predefinedDefaultObjectAcl=default_acl,
json={"name": bucket},
json=json_data,
json_out=True,
)
self.invalidate_cache(bucket)
Expand Down
16 changes: 13 additions & 3 deletions gcsfs/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,19 @@ def docker_gcs():


@pytest.fixture
def gcs(docker_gcs, populate=True):
GCSFileSystem.clear_instance_cache()
gcs = fsspec.filesystem("gcs", endpoint_url=docker_gcs)
def gcs_factory(docker_gcs):
def factory(default_location=None):
GCSFileSystem.clear_instance_cache()
return fsspec.filesystem(
"gcs", endpoint_url=docker_gcs, default_location=default_location
)

return factory


@pytest.fixture
def gcs(gcs_factory, populate=True):
gcs = gcs_factory()
try:
# ensure we're empty.
try:
Expand Down
47 changes: 46 additions & 1 deletion gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from itertools import chain
from unittest import mock
from urllib.parse import urlparse, parse_qs, unquote
from uuid import uuid4

import pytest
import requests

from fsspec.utils import seek_delimiter
from fsspec.asyn import sync

from gcsfs.tests.settings import TEST_BUCKET, TEST_PROJECT, TEST_REQUESTER_PAYS_BUCKET
from gcsfs.tests.conftest import (
Expand Down Expand Up @@ -770,7 +772,7 @@ def test_bigger_than_block_read(gcs):

def test_current(gcs):
assert GCSFileSystem.current() is gcs
gcs2 = GCSFileSystem(endpoint_url=gcs._endpoint)
gcs2 = GCSFileSystem(endpoint_url=gcs._endpoint, default_location=None)
assert gcs2.session is gcs.session


Expand Down Expand Up @@ -980,6 +982,49 @@ def test_percent_file_name(gcs):
assert set(gcs.ls(parent)) == set([fn, fn2])


@pytest.mark.parametrize(
"location",
[
(None),
("US"),
("EUROPE-WEST3"),
("europe-west3"),
],
)
def test_bucket_location(gcs_factory, location):
gcs = gcs_factory(default_location=location)
if not gcs.on_google:
pytest.skip("emulator can only create buckets in the 'US-CENTRAL1' location.")
bucket_name = str(uuid4())
try:
gcs.mkdir(bucket_name)
bucket = [
b
for b in sync(gcs.loop, gcs._list_buckets, timeout=gcs.timeout)
if b["name"] == bucket_name + "/"
][0]
assert bucket["location"] == (location or "US").upper()
finally:
gcs.rm(bucket_name, recursive=True)


def test_bucket_default_location_overwrite(gcs_factory):
gcs = gcs_factory(default_location="US")
if not gcs.on_google:
pytest.skip("emulator can only create buckets in the 'US-CENTRAL1' location.")
bucket_name = str(uuid4())
try:
gcs.mkdir(bucket_name, location="EUROPE-WEST3")
bucket = [
b
for b in sync(gcs.loop, gcs._list_buckets, timeout=gcs.timeout)
if b["name"] == bucket_name + "/"
][0]
assert bucket["location"] == "EUROPE-WEST3"
finally:
gcs.rm(bucket_name, recursive=True)


def test_dir_marker(gcs):
gcs.touch(f"{TEST_BUCKET}/placeholder/")
gcs.touch(f"{TEST_BUCKET}/placeholder/inner")
Expand Down

0 comments on commit 07e99a8

Please sign in to comment.