From 07e99a824d7093e61fc6f0ecc6a4ff795282217e Mon Sep 17 00:00:00 2001 From: Jan-Benedikt Jagusch Date: Tue, 22 Mar 2022 18:55:32 +0100 Subject: [PATCH] Allow configuring bucket location (#455) * allow to configure bucket location * add docstring about location * change location default to None Co-authored-by: Martin Durant --- gcsfs/core.py | 26 +++++++++++++++++++--- gcsfs/tests/conftest.py | 16 +++++++++++--- gcsfs/tests/test_core.py | 47 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 7 deletions(-) diff --git a/gcsfs/core.py b/gcsfs/core.py index 9db3052b..e1dff9f7 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -216,6 +216,10 @@ class GCSFileSystem(AsyncFileSystem): path part) for communication. If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. + default_location: str + Default location where buckets are created, like 'US' or 'EUROPE-WEST3'. + You can find a list of all available locations here: + https://cloud.google.com/storage/docs/locations#available-locations """ scopes = {"read_only", "read_write", "full_control"} @@ -241,6 +245,7 @@ def __init__( loop=None, timeout=None, endpoint_url=None, + default_location=None, **kwargs, ): super().__init__( @@ -264,6 +269,7 @@ def __init__( self._session = None self._endpoint = endpoint_url self.session_kwargs = session_kwargs or {} + self.default_location = default_location if check_connection: warnings.warn( @@ -570,7 +576,8 @@ async def _list_buckets(self): next_page_token = page.get("nextPageToken", None) buckets = [ - {"name": i["name"] + "/", "size": 0, "type": "directory"} for i in items + {**i, "name": i["name"] + "/", "size": 0, "type": "directory"} + for i in items ] self.dircache[""] = buckets return buckets @@ -597,7 +604,11 @@ def invalidate_cache(self, path=None): path = self._parent(path) async def _mkdir( - self, bucket, acl="projectPrivate", default_acl="bucketOwnerFullControl" + self, + bucket, + acl="projectPrivate", + default_acl="bucketOwnerFullControl", + location=None, ): """ New bucket @@ -611,18 +622,27 @@ async def _mkdir( access for the bucket itself default_acl: str, one of ACLs default ACL for objects created in this bucket + location: Optional[str] + Location where buckets are created, like 'US' or 'EUROPE-WEST3'. + If not provided, defaults to `self.default_location`. + You can find a list of all available locations here: + https://cloud.google.com/storage/docs/locations#available-locations """ if bucket in ["", "/"]: raise ValueError("Cannot create root bucket") if "/" in bucket: return + json_data = {"name": bucket} + location = location or self.default_location + if location: + json_data["location"] = location await self._call( method="POST", path="b", predefinedAcl=acl, project=self.project, predefinedDefaultObjectAcl=default_acl, - json={"name": bucket}, + json=json_data, json_out=True, ) self.invalidate_cache(bucket) diff --git a/gcsfs/tests/conftest.py b/gcsfs/tests/conftest.py index d27bfcfa..75bb8f3c 100644 --- a/gcsfs/tests/conftest.py +++ b/gcsfs/tests/conftest.py @@ -84,9 +84,19 @@ def docker_gcs(): @pytest.fixture -def gcs(docker_gcs, populate=True): - GCSFileSystem.clear_instance_cache() - gcs = fsspec.filesystem("gcs", endpoint_url=docker_gcs) +def gcs_factory(docker_gcs): + def factory(default_location=None): + GCSFileSystem.clear_instance_cache() + return fsspec.filesystem( + "gcs", endpoint_url=docker_gcs, default_location=default_location + ) + + return factory + + +@pytest.fixture +def gcs(gcs_factory, populate=True): + gcs = gcs_factory() try: # ensure we're empty. try: diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index e6bcece7..19dfbaf6 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -5,11 +5,13 @@ from itertools import chain from unittest import mock from urllib.parse import urlparse, parse_qs, unquote +from uuid import uuid4 import pytest import requests from fsspec.utils import seek_delimiter +from fsspec.asyn import sync from gcsfs.tests.settings import TEST_BUCKET, TEST_PROJECT, TEST_REQUESTER_PAYS_BUCKET from gcsfs.tests.conftest import ( @@ -770,7 +772,7 @@ def test_bigger_than_block_read(gcs): def test_current(gcs): assert GCSFileSystem.current() is gcs - gcs2 = GCSFileSystem(endpoint_url=gcs._endpoint) + gcs2 = GCSFileSystem(endpoint_url=gcs._endpoint, default_location=None) assert gcs2.session is gcs.session @@ -980,6 +982,49 @@ def test_percent_file_name(gcs): assert set(gcs.ls(parent)) == set([fn, fn2]) +@pytest.mark.parametrize( + "location", + [ + (None), + ("US"), + ("EUROPE-WEST3"), + ("europe-west3"), + ], +) +def test_bucket_location(gcs_factory, location): + gcs = gcs_factory(default_location=location) + if not gcs.on_google: + pytest.skip("emulator can only create buckets in the 'US-CENTRAL1' location.") + bucket_name = str(uuid4()) + try: + gcs.mkdir(bucket_name) + bucket = [ + b + for b in sync(gcs.loop, gcs._list_buckets, timeout=gcs.timeout) + if b["name"] == bucket_name + "/" + ][0] + assert bucket["location"] == (location or "US").upper() + finally: + gcs.rm(bucket_name, recursive=True) + + +def test_bucket_default_location_overwrite(gcs_factory): + gcs = gcs_factory(default_location="US") + if not gcs.on_google: + pytest.skip("emulator can only create buckets in the 'US-CENTRAL1' location.") + bucket_name = str(uuid4()) + try: + gcs.mkdir(bucket_name, location="EUROPE-WEST3") + bucket = [ + b + for b in sync(gcs.loop, gcs._list_buckets, timeout=gcs.timeout) + if b["name"] == bucket_name + "/" + ][0] + assert bucket["location"] == "EUROPE-WEST3" + finally: + gcs.rm(bucket_name, recursive=True) + + def test_dir_marker(gcs): gcs.touch(f"{TEST_BUCKET}/placeholder/") gcs.touch(f"{TEST_BUCKET}/placeholder/inner")