From daea7d1ebb61d2e7ab3fa769cf580adbb9834952 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 08:12:13 +0000 Subject: [PATCH 1/4] Initial plan From f28f9edf8f689396475f524c634c3ec0203c2731 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 08:17:43 +0000 Subject: [PATCH 2/4] Allow users to override seekable parameter in get_file and cat_file methods Co-authored-by: OneSizeFitsQuorum <32640567+OneSizeFitsQuorum@users.noreply.github.com> --- fsspec/implementations/arrow.py | 4 +- fsspec/implementations/tests/test_arrow.py | 50 ++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index 19eaa9b96..4c1d7d206 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -205,11 +205,11 @@ def modified(self, path): return self.fs.get_file_info(path).mtime def cat_file(self, path, start=None, end=None, **kwargs): - kwargs["seekable"] = start not in [None, 0] + kwargs.setdefault("seekable", start not in [None, 0]) return super().cat_file(path, start=None, end=None, **kwargs) def get_file(self, rpath, lpath, **kwargs): - kwargs["seekable"] = False + kwargs.setdefault("seekable", False) super().get_file(rpath, lpath, **kwargs) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index d564cc4ce..f40c42c07 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -268,3 +268,53 @@ def test_get_kwargs_from_urls_hadoop_fs(): assert kwargs["host"] == "localhost" assert kwargs["port"] == 8020 assert "replication" not in kwargs + + +def test_get_file_seekable_default(fs, remote_dir, tmp_path): + """Test that get_file defaults to seekable=False but allows override.""" + data = b"test data for seekable" + + # Create a test file + with fs.open(remote_dir + "/test_file.txt", "wb") as f: + f.write(data) + + # Test default behavior (seekable=False) + local_file = tmp_path / "test_default.txt" + fs.get_file(remote_dir + "/test_file.txt", str(local_file)) + with open(local_file, "rb") as f: + assert f.read() == data + + # Test with explicit seekable=True + local_file_seekable = tmp_path / "test_seekable.txt" + fs.get_file(remote_dir + "/test_file.txt", str(local_file_seekable), seekable=True) + with open(local_file_seekable, "rb") as f: + assert f.read() == data + + # Test with explicit seekable=False + local_file_not_seekable = tmp_path / "test_not_seekable.txt" + fs.get_file( + remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False + ) + with open(local_file_not_seekable, "rb") as f: + assert f.read() == data + + +def test_cat_file_seekable_override(fs, remote_dir): + """Test that cat_file allows seekable to be overridden.""" + data = b"test data for cat_file seekable" + + # Create a test file + with fs.open(remote_dir + "/test_cat.txt", "wb") as f: + f.write(data) + + # Test default behavior - when start is None, seekable should default to False + result = fs.cat_file(remote_dir + "/test_cat.txt") + assert result == data + + # Test with explicit seekable=True even when start is None + result = fs.cat_file(remote_dir + "/test_cat.txt", seekable=True) + assert result == data + + # Test with explicit seekable=False + result = fs.cat_file(remote_dir + "/test_cat.txt", seekable=False) + assert result == data From 8887739575a9ef7fc95757b365af9fe2a3241ca1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 14 Nov 2025 09:03:40 +0000 Subject: [PATCH 3/4] Initial plan From 233bd014fc205dab0f02a31140dbe7f43a390513 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 14 Nov 2025 09:11:20 +0000 Subject: [PATCH 4/4] Add tests to verify size() behavior with seekable parameter Co-authored-by: OneSizeFitsQuorum <32640567+OneSizeFitsQuorum@users.noreply.github.com> --- fsspec/implementations/tests/test_arrow.py | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index f40c42c07..edf48eb6d 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -318,3 +318,41 @@ def test_cat_file_seekable_override(fs, remote_dir): # Test with explicit seekable=False result = fs.cat_file(remote_dir + "/test_cat.txt", seekable=False) assert result == data + + +def test_seekable_true_allows_size_method(fs, remote_dir): + """Test that size() method works when seekable=True.""" + data = b"test data for size method" * 10 + + # Create a test file + test_file = remote_dir + "/test_size.txt" + with fs.open(test_file, "wb") as f: + f.write(data) + + # Open with seekable=True - size() should work + with fs.open(test_file, "rb", seekable=True) as f: + assert f.seekable() is True + # Verify size() method works and returns correct size + file_size = f.size() + assert file_size == len(data) + # Also verify we can read the data + assert f.read() == data + + +def test_seekable_false_prevents_size_method(fs, remote_dir): + """Test that size() method raises OSError when seekable=False.""" + data = b"test data for size method" * 10 + + # Create a test file + test_file = remote_dir + "/test_size.txt" + with fs.open(test_file, "wb") as f: + f.write(data) + + # Open with seekable=False - size() should raise OSError + with fs.open(test_file, "rb", seekable=False) as f: + assert f.seekable() is False + # Verify size() raises OSError + with pytest.raises(OSError, match="only valid on seekable files"): + f.size() + # Verify we can still read the data + assert f.read() == data