Skip to content

Commit

Permalink
Inventory report: fix nit and typo (#578)
Browse files Browse the repository at this point in the history
Co-authored-by: Hans Easton <hanseaston@google.com>
  • Loading branch information
hanseaston and Hans Easton committed Sep 12, 2023
1 parent 329f6d8 commit f3c1eb0
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 31 deletions.
12 changes: 3 additions & 9 deletions gcsfs/core.py
Expand Up @@ -539,7 +539,6 @@ async def _get_object(self, path):
return self._process_object(bucket, res)

async def _list_objects(self, path, prefix="", versions=False, **kwargs):

bucket, key, generation = self.split_path(path)
path = path.rstrip("/")

Expand Down Expand Up @@ -583,10 +582,8 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs):
return []
out = pseudodirs + items

use_snapshot_listing = (
False
if not inventory_report_info
else inventory_report_info.get("use_snapshot_listing")
use_snapshot_listing = inventory_report_info and inventory_report_info.get(
"use_snapshot_listing"
)

# Don't cache prefixed/partial listings, in addition to
Expand All @@ -598,7 +595,6 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs):
async def _do_list_objects(
self, path, max_results=None, delimiter="/", prefix="", versions=False, **kwargs
):

"""Object listing for the given {bucket}/{prefix}/ path."""
bucket, _path, generation = self.split_path(path)
_path = "" if not _path else _path.rstrip("/") + "/"
Expand Down Expand Up @@ -659,8 +655,7 @@ async def _concurrent_list_objects_helper(
"""

# Extract out the names of the objects fetched from the inventory report.
snapshot_object_names = [item["name"] for item in items]
snapshot_object_names = sorted(snapshot_object_names)
snapshot_object_names = sorted([item["name"] for item in items])

# Determine the number of coroutines needed to concurrent listing.
# Ideally, want each coroutine to fetch a single page of objects.
Expand Down Expand Up @@ -755,7 +750,6 @@ async def _sequential_list_objects_helper(
next_page_token = page.get("nextPageToken", None)

while next_page_token is not None:

page = await self._call(
"GET",
"b/{}/o",
Expand Down
11 changes: 2 additions & 9 deletions gcsfs/inventory_report.py
Expand Up @@ -106,14 +106,14 @@ class (see 'core.py').
use_snapshot_listing=use_snapshot_listing,
)

# Use the config to fetch all inventory report medadata.
# Use the config to fetch all inventory report metadata.
unsorted_inventory_report_metadata = await cls._fetch_inventory_report_metadata(
gcs_file_system=gcs_file_system,
inventory_report_config=inventory_report_config,
)

# Sort the metadata based on reverse created time order.
inventory_report_metadata = cls._sort_inventory_report_medatada(
inventory_report_metadata = cls._sort_inventory_report_metadata(
unsorted_inventory_report_metadata=unsorted_inventory_report_metadata
)

Expand Down Expand Up @@ -397,13 +397,11 @@ class (see 'core.py').
# will be many inventory reports on the same day. But including this
# logic for robustness.
for metadata in inventory_report_metadata:

inventory_report_date = InventoryReport._convert_str_to_datetime(
metadata["timeCreated"]
).date()

if inventory_report_date == most_recent_date:

# Download the raw inventory report if the date matches.
# Header is not needed, we only need to process and store
# the content.
Expand Down Expand Up @@ -454,7 +452,6 @@ class (see 'core.py').
objects = []

for content in inventory_report_content:

# Split the content into lines based on the specified separator.
lines = content.split(record_separator)

Expand All @@ -464,7 +461,6 @@ class (see 'core.py').

# Parse each line of the inventory report.
for line in lines:

obj = InventoryReport._parse_inventory_report_line(
inventory_report_line=line,
use_snapshot_listing=use_snapshot_listing,
Expand Down Expand Up @@ -554,18 +550,15 @@ def _construct_final_snapshot(objects, prefix, use_snapshot_listing):
# Filter the prefix and returns the list if the user does not want to use
# the snapshot for listing.
if use_snapshot_listing is False:

return [obj for obj in objects if obj.get("name").startswith(prefix)], []

else:

# If the user wants to use the snapshot, generate both the items and
# prefixes manually.
items = []
prefixes = set()

for obj in objects:

# Fetch the name of the object.
obj_name = obj.get("name")

Expand Down
12 changes: 0 additions & 12 deletions gcsfs/tests/test_inventory_report.py
Expand Up @@ -66,7 +66,6 @@ def test_validate_inventory_report_info(
async def test_fetch_raw_inventory_report_config(
self, location, id, exception, expected_result
):

# Mocking the gcs_file_system.
gcs_file_system = mock.MagicMock()
gcs_file_system.project = "project"
Expand Down Expand Up @@ -95,7 +94,6 @@ async def test_fetch_raw_inventory_report_config(
assert result == expected_result

def test_parse_raw_inventory_report_config_invalid_date(self):

today = datetime.today().date()

# Get tomorrow's date.
Expand Down Expand Up @@ -130,7 +128,6 @@ def test_parse_raw_inventory_report_config_invalid_date(self):
)

def test_parse_raw_inventory_report_config_missing_metadata_fields(self):

raw_inventory_report_config = {
"frequencyOptions": mock.MagicMock(),
"objectMetadataReportOptions": {
Expand All @@ -149,7 +146,6 @@ def test_parse_raw_inventory_report_config_missing_metadata_fields(self):
)

def test_parse_raw_inventory_report_config_returns_correct_config(self):

bucket = "bucket"
destination_path = "path/to/inventory-report"
metadata_fields = ["project", "bucket", "name", "size"]
Expand Down Expand Up @@ -209,7 +205,6 @@ def test_parse_raw_inventory_report_config_returns_correct_config(self):

@pytest.mark.asyncio
async def test_fetch_inventory_report_metadata_no_reports(self):

# Create a mock for GCSFileSystem.
gcs_file_system = mock.MagicMock(spec=GCSFileSystem)

Expand All @@ -233,7 +228,6 @@ async def test_fetch_inventory_report_metadata_no_reports(self):

@pytest.mark.asyncio
async def test_fetch_inventory_report_metadata_multiple_calls(self):

# Create a mock for GCSFileSystem.
gcs_file_system = mock.MagicMock(spec=GCSFileSystem)

Expand Down Expand Up @@ -367,7 +361,6 @@ def download_inventory_report_content_setup(self, request):
async def test_download_inventory_report_content(
self, download_inventory_report_content_setup
):

(
gcs_file_system,
inventory_report_metadata,
Expand Down Expand Up @@ -418,7 +411,6 @@ def test_parse_inventory_report_line(
bucket,
expected,
):

# Mock InventoryReportConfig.
inventory_report_config = mock.MagicMock(spec=InventoryReportConfig)
inventory_report_config.obj_name_idx = inventory_report_config_attrs.get(
Expand Down Expand Up @@ -482,7 +474,6 @@ def test_parse_inventory_report_line(
]
)
def parse_inventory_report_content_setup(self, request):

# Mock the necessary parameters.
gcs_file_system = mock.MagicMock()
bucket = mock.MagicMock()
Expand Down Expand Up @@ -515,7 +506,6 @@ def parse_inventory_report_content_setup(self, request):
)

def test_parse_inventory_reports(self, parse_inventory_report_content_setup):

(
gcs_file_system,
inventory_report_content,
Expand Down Expand Up @@ -723,7 +713,6 @@ def test_parse_inventory_reports(self, parse_inventory_report_content_setup):
def test_construct_final_snapshot(
self, use_snapshot_listing, prefix, mock_objects, expected_result
):

# Construct the final snapshot.
result = InventoryReport._construct_final_snapshot(
objects=mock_objects,
Expand All @@ -741,7 +730,6 @@ def test_construct_final_snapshot(
# Test fields of the inventory report config is correctly stored.
class TestInventoryReportConfig:
def test_inventory_report_config_creation(self):

csv_options = {}
bucket = "bucket"
destination_path = ""
Expand Down
1 change: 0 additions & 1 deletion gcsfs/tests/test_inventory_report_listing.py
Expand Up @@ -7,7 +7,6 @@

# Basic integration test to ensure listing returns the correct result.
def test_ls_base(monkeypatch, gcs):

# First get results from original listing.
items = gcs.ls(TEST_BUCKET)

Expand Down

0 comments on commit f3c1eb0

Please sign in to comment.