geopandas · jorisvandenbossche · Sep 23, 2021 · Sep 10, 2021 · Sep 13, 2021 · Sep 13, 2021
diff --git a/geopandas/io/arrow.py b/geopandas/io/arrow.py
@@ -32,6 +32,14 @@
 # }
 
 
+def _is_fsspec_url(url):
+    return (
+        isinstance(url, str)
+        and "://" in url
+        and not url.startswith(("http://", "https://"))
+    )
+
+
 def _create_metadata(df):
     """Create and encode geo metadata dict.
 
@@ -339,7 +347,45 @@ def _arrow_to_geopandas(table):
     return GeoDataFrame(df, geometry=geometry)
 
 
-def _read_parquet(path, columns=None, **kwargs):
+def _get_filesystem_path(path, filesystem=None, storage_options=None):
+    """
+    Get the filesystem and path for a given filesystem and path.
+
+    If the filesystem is not None then it's just returned as is.
+    """
+    import pyarrow
+
+    if (
+        isinstance(path, str)
+        and storage_options is None
+        and filesystem is None
+        and LooseVersion(pyarrow.__version__) >= "5.0.0"
+    ):
+        # Use the native pyarrow filesystem if possible.
+        try:
+            from pyarrow.fs import FileSystem
+
+            filesystem, path = FileSystem.from_uri(path)
+        except Exception:
+            # fallback to use get_handle / fsspec for filesystems
+            # that pyarrow doesn't support
+            pass
+
+    if _is_fsspec_url(path) and filesystem is None:
+        fsspec = import_optional_dependency(
+            "fsspec", extra="fsspec is requred for 'storage_options'."
+        )
+        filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
+
+    if filesystem is None and storage_options:
+        raise ValueError(
+            "Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
+        )
+
+    return filesystem, path
+
+
+def _read_parquet(path, columns=None, storage_options=None, **kwargs):
     """
     Load a Parquet object from the file path, returning a GeoDataFrame.
 
@@ -366,6 +412,17 @@ def _read_parquet(path, columns=None, **kwargs):
         geometry read from the file will be set as the geometry column
         of the returned GeoDataFrame.  If no geometry columns are present,
         a ``ValueError`` will be raised.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g. host,
+        port, username, password, etc. For HTTP(S) URLs the key-value pairs are
+        forwarded to urllib as header options. For other URLs (e.g. starting with
+        "s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
+        see fsspec and urllib for more details.
+
+        When no storage options are provided and a filesystem is implemented by
+        both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
+        filesystem is preferred. Provide the instantiated fsspec filesystem using
+        the ``filesystem`` keyword if you wish to use its implementation.
     **kwargs
         Any additional kwargs passed to pyarrow.parquet.read_table().
 
@@ -388,9 +445,15 @@ def _read_parquet(path, columns=None, **kwargs):
     parquet = import_optional_dependency(
         "pyarrow.parquet", extra="pyarrow is required for Parquet support."
     )
+    # TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
+    # adds filesystem as a keyword and match that.
+    filesystem = kwargs.pop("filesystem", None)
+    filesystem, path = _get_filesystem_path(
+        path, filesystem=filesystem, storage_options=storage_options
+    )
 
     kwargs["use_pandas_metadata"] = True
-    table = parquet.read_table(path, columns=columns, **kwargs)
+    table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs)
 
     return _arrow_to_geopandas(table)
 

diff --git a/geopandas/io/tests/test_arrow.py b/geopandas/io/tests/test_arrow.py
@@ -16,6 +16,7 @@
     _create_metadata,
     _decode_metadata,
     _encode_metadata,
+    _get_filesystem_path,
     _validate_dataframe,
     _validate_metadata,
     METADATA_VERSION,
@@ -498,3 +499,45 @@ def test_feather_arrow_version(tmpdir):
         ImportError, match="pyarrow >= 0.17 required for Feather support"
     ):
         df.to_feather(filename)
+
+
+def test_fsspec_url():
+    fsspec = pytest.importorskip("fsspec")
+    import fsspec.implementations.memory
+
+    class MyMemoryFileSystem(fsspec.implementations.memory.MemoryFileSystem):
+        # Simple fsspec filesystem that adds a required keyword.
+        # Attempting to use this filesystem without the keyword will raise an exception.
+        def __init__(self, is_set, *args, **kwargs):
+            self.is_set = is_set
+            super().__init__(*args, **kwargs)
+
+    fsspec.register_implementation("memory", MyMemoryFileSystem, clobber=True)
+    memfs = MyMemoryFileSystem(is_set=True)
+
+    test_dataset = "naturalearth_lowres"
+    df = read_file(get_path(test_dataset))
+
+    with memfs.open("data.parquet", "wb") as f:
+        df.to_parquet(f)
+
+    result = read_parquet("memory://data.parquet", storage_options=dict(is_set=True))
+    assert_geodataframe_equal(result, df)
+
+    result = read_parquet("memory://data.parquet", filesystem=memfs)
+    assert_geodataframe_equal(result, df)
+
+
+def test_non_fsspec_url_with_storage_options_raises():
+    with pytest.raises(ValueError, match="storage_options"):
+        test_dataset = "naturalearth_lowres"
+        read_parquet(get_path(test_dataset), storage_options={"foo": "bar"})
+
+
+@pytest.mark.skipif(
+    pyarrow.__version__ < LooseVersion("5.0.0"),
+    reason="pyarrow.fs requires pyarrow>=5.0.0",
+)
+def test_prefers_pyarrow_fs():
+    filesystem, _ = _get_filesystem_path("file:///data.parquet")
+    assert isinstance(filesystem, pyarrow.fs.LocalFileSystem)