# Explore PyArrow, GeoArrow, GeoParquet for Optimal Performance

Work on Issue [#1 Leverage GeoArrow to speedup vector data processing & viz](https://github.com/WikiWatershed/global-hydrography/issues/1)


# Imports & Setup

In [1]:
from pathlib import Path

import fsspec
import numpy as np
import pandas as pd
import geopandas as gpd
import pyogrio
import pyarrow as pa

## Importing geoarrow slows down some funcitons, so we do it later for testing
# import geoarrow.pyarrow as ga
# import geoarrow.pandas as _

In [2]:
# Confirm your current working directory (cwd) and repo/project directory
working_dir = Path.cwd()
project_dir = working_dir.parent
data_dir = project_dir / 'data_temp' # a temporary data directory that we .gitignore
data_dir

PosixPath('/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp')

In [3]:
# Create local file system using fsspec library
local_fs = fsspec.filesystem('local') 

In [4]:
tdx_dir = data_dir / 'nga'
local_fs.ls(tdx_dir)

['/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/TDX_streamnet_1020011530_01.gpkg',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/TDX_streamreach_basins_7020038340_01.gpkg',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/.DS_Store',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/test.json',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/download.php?file=hydrobasins_level2',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/test.zip',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/TDX_streamreach_basins_1020011530_01.gpkg',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/hydrobasins_level2.geojson',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/TDX_streamreach_basins_1020040190_01.gpkg',
 '/Users/aaufdenkampe/Documents/Python/global-hydrography/data_temp/nga/TDX_stream

In [5]:
tdx_basins_7020038340_fp = tdx_dir / 'TDX_streamreach_basins_7020038340_01.gpkg'
tdx_stream_7020038340_fp = tdx_dir / 'TDX_streamnet_7020038340_01.gpkg'

In [43]:
local_fs.info(tdx_stream_7020038340_fp)['size']

702283776

# Read Arrow directly

In [51]:
# pyogrio.read_arrow() avoids conversion to GeoDataframe
# Takes about 3 sec for Anthony
pa_table = pyogrio.read_arrow(
    tdx_stream_7020038340_fp,
)
pa_table

({'crs': 'EPSG:4326',
  'encoding': 'UTF-8',
  'fields': array(['LINKNO', 'DSLINKNO', 'USLINKNO1', 'USLINKNO2', 'DSNODEID',
         'strmOrder', 'Length', 'Magnitude', 'DSContArea', 'strmDrop',
         'Slope', 'StraightL', 'USContArea', 'WSNO', 'DOUTEND', 'DOUTSTART',
         'DOUTMID'], dtype=object),
  'geometry_type': 'LineString',
  'geometry_name': 'geom',
  'fid_column': 'fid'},
 pyarrow.Table
 LINKNO: int32
 DSLINKNO: int32
 USLINKNO1: int32
 USLINKNO2: int32
 DSNODEID: int64
 strmOrder: int32
 Length: double
 Magnitude: int32
 DSContArea: double
 strmDrop: double
 Slope: double
 StraightL: double
 USContArea: double
 WSNO: int32
 DOUTEND: double
 DOUTSTART: double
 DOUTMID: double
 geom: extension<geoarrow.wkb<WkbType>>
 ----
 LINKNO: [[0,1,593,1777,2,...,114546,114547,115730,115731,116915],[117507,118099,118691,119283,119876,...,478,1070,1071,1662,2254],[2845,2846,2847,3438,3439,...,587,1180,1772,588,589]]
 DSLINKNO: [[1777,2369,1777,2369,4146,...,146515,115139,164275,1163

In [52]:
pa_table?

[0;31mType:[0m        tuple
[0;31mString form:[0m
({'crs': 'EPSG:4326', 'encoding': 'UTF-8', 'fields': array(['LINKNO', 'DSLINKNO', 'USLINKNO1', 'U <...> 36.19],[25.08,8.53,0,0,8.44,...,2.2,8.47,9.2,1.15,0],[1.4,7.99,1.8,0,2.67,...,0,0,0,0.76,0]]
           ...)
[0;31mLength:[0m      2
[0;31mDocstring:[0m  
Built-in immutable sequence.

If no argument is given, the constructor returns an empty tuple.
If iterable is specified the tuple is initialized from iterable's items.

If the argument is a tuple, the return value is the same object.

In [53]:
type(pa_table[0])

dict

In [54]:
pa_table[0]

{'crs': 'EPSG:4326',
 'encoding': 'UTF-8',
 'fields': array(['LINKNO', 'DSLINKNO', 'USLINKNO1', 'USLINKNO2', 'DSNODEID',
        'strmOrder', 'Length', 'Magnitude', 'DSContArea', 'strmDrop',
        'Slope', 'StraightL', 'USContArea', 'WSNO', 'DOUTEND', 'DOUTSTART',
        'DOUTMID'], dtype=object),
 'geometry_type': 'LineString',
 'geometry_name': 'geom',
 'fid_column': 'fid'}

In [55]:
pa_table[0]['crs']

'EPSG:4326'

In [56]:
type(pa_table[1])

pyarrow.lib.Table

# Benchmark Read Methods on local file

`timeit` is more accurate than `time`, for three reasons:

- it repeats the tests many times to eliminate the influence of other tasks on your machine, such as disk flushing and OS scheduling.
- it disables the garbage collector to prevent that process from skewing the results by scheduling a collection run at an inopportune moment.
- it picks the most accurate timer for your OS. See timeit.default_timer.

From https://stackoverflow.com/questions/17579357/time-time-vs-timeit-timeit


## Before GeoArrow Import

In [6]:
%%timeit
pyogrio.list_layers(tdx_stream_7020038340_fp)

5.54 ms ± 112 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%%timeit
pyogrio.read_info(tdx_stream_7020038340_fp, layer=0)

5.87 ms ± 151 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [28]:
%%timeit
pyogrio.read_arrow(tdx_stream_7020038340_fp)

549 ms ± 43.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
pa_table = pyogrio.read_arrow(tdx_stream_7020038340_fp)
pa_table

({'crs': 'EPSG:4326',
  'encoding': 'UTF-8',
  'fields': array(['LINKNO', 'DSLINKNO', 'USLINKNO1', 'USLINKNO2', 'DSNODEID',
         'strmOrder', 'Length', 'Magnitude', 'DSContArea', 'strmDrop',
         'Slope', 'StraightL', 'USContArea', 'WSNO', 'DOUTEND', 'DOUTSTART',
         'DOUTMID'], dtype=object),
  'geometry_type': 'LineString',
  'geometry_name': 'geom',
  'fid_column': 'fid'},
 pyarrow.Table
 LINKNO: int32
 DSLINKNO: int32
 USLINKNO1: int32
 USLINKNO2: int32
 DSNODEID: int64
 strmOrder: int32
 Length: double
 Magnitude: int32
 DSContArea: double
 strmDrop: double
 Slope: double
 StraightL: double
 USContArea: double
 WSNO: int32
 DOUTEND: double
 DOUTSTART: double
 DOUTMID: double
 geom: binary
 ----
 LINKNO: [[0,1,593,1777,2,...,114546,114547,115730,115731,116915],[117507,118099,118691,119283,119876,...,478,1070,1071,1662,2254],[2845,2846,2847,3438,3439,...,587,1180,1772,588,589]]
 DSLINKNO: [[1777,2369,1777,2369,4146,...,146515,115139,164275,116323,117507],[118099,141779,

In [10]:
%%timeit
pyogrio.read_dataframe(
    tdx_stream_7020038340_fp, 
    layer=0,
    use_arrow=True, # 50% faster, but doesn't seem to work with s3
)

2.15 s ± 32.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [105]:
pyo_gdf = pyogrio.read_dataframe(
    tdx_stream_7020038340_fp, 
    layer=0,
    use_arrow=True, # 50% faster, but doesn't seem to work with s3
)
pyo_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 140097 entries, 0 to 140096
Data columns (total 18 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   LINKNO      140097 non-null  int32   
 1   DSLINKNO    140097 non-null  int32   
 2   USLINKNO1   140097 non-null  int32   
 3   USLINKNO2   140097 non-null  int32   
 4   DSNODEID    140097 non-null  int64   
 5   strmOrder   140097 non-null  int32   
 6   Length      140097 non-null  float64 
 7   Magnitude   140097 non-null  int32   
 8   DSContArea  140097 non-null  float64 
 9   strmDrop    140097 non-null  float64 
 10  Slope       140097 non-null  float64 
 11  StraightL   140097 non-null  float64 
 12  USContArea  140097 non-null  float64 
 13  WSNO        140097 non-null  int32   
 14  DOUTEND     140097 non-null  float64 
 15  DOUTSTART   140097 non-null  float64 
 16  DOUTMID     140097 non-null  float64 
 17  geometry    140097 non-null  geometry
dtypes: float64(9), g

In [None]:
%%timeit
gpd.read_file(
    tdx_stream_7020038340_fp, 
    engine='pyogrio',
    use_arrow=True,
)

1.57 s ± 103 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
gpd_gdf = gpd.read_file(
    tdx_stream_7020038340_fp, 
    engine='pyogrio',
    use_arrow=True,
)
gpd_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 140097 entries, 0 to 140096
Data columns (total 18 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   LINKNO      140097 non-null  int32   
 1   DSLINKNO    140097 non-null  int32   
 2   USLINKNO1   140097 non-null  int32   
 3   USLINKNO2   140097 non-null  int32   
 4   DSNODEID    140097 non-null  int64   
 5   strmOrder   140097 non-null  int32   
 6   Length      140097 non-null  float64 
 7   Magnitude   140097 non-null  int32   
 8   DSContArea  140097 non-null  float64 
 9   strmDrop    140097 non-null  float64 
 10  Slope       140097 non-null  float64 
 11  StraightL   140097 non-null  float64 
 12  USContArea  140097 non-null  float64 
 13  WSNO        140097 non-null  int32   
 14  DOUTEND     140097 non-null  float64 
 15  DOUTSTART   140097 non-null  float64 
 16  DOUTMID     140097 non-null  float64 
 17  geometry    140097 non-null  geometry
dtypes: float64(9), g

In [None]:
%%timeit
gpd.read_file(
    tdx_stream_7020038340_fp, 
    engine='pyogrio',
    use_arrow=False,
)

2.81 s ± 90.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## After GeoArrow Import

In [None]:
import geoarrow.pyarrow as ga
import geoarrow.pandas as _

In [None]:
%%timeit
pyogrio.read_arrow(tdx_stream_7020038340_fp)

552 ms ± 18.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
ga_pa_table = pyogrio.read_arrow(tdx_stream_7020038340_fp)
ga_pa_table

({'crs': 'EPSG:4326',
  'encoding': 'UTF-8',
  'fields': array(['LINKNO', 'DSLINKNO', 'USLINKNO1', 'USLINKNO2', 'DSNODEID',
         'strmOrder', 'Length', 'Magnitude', 'DSContArea', 'strmDrop',
         'Slope', 'StraightL', 'USContArea', 'WSNO', 'DOUTEND', 'DOUTSTART',
         'DOUTMID'], dtype=object),
  'geometry_type': 'LineString',
  'geometry_name': 'geom',
  'fid_column': 'fid'},
 pyarrow.Table
 LINKNO: int32
 DSLINKNO: int32
 USLINKNO1: int32
 USLINKNO2: int32
 DSNODEID: int64
 strmOrder: int32
 Length: double
 Magnitude: int32
 DSContArea: double
 strmDrop: double
 Slope: double
 StraightL: double
 USContArea: double
 WSNO: int32
 DOUTEND: double
 DOUTSTART: double
 DOUTMID: double
 geom: extension<geoarrow.wkb<WkbType>>
 ----
 LINKNO: [[0,1,593,1777,2,...,114546,114547,115730,115731,116915],[117507,118099,118691,119283,119876,...,478,1070,1071,1662,2254],[2845,2846,2847,3438,3439,...,587,1180,1772,588,589]]
 DSLINKNO: [[1777,2369,1777,2369,4146,...,146515,115139,164275,1163

NOTE: same read speed, but  geom is now extension<geoarrow.wkb<WkbType>>

In [None]:
%%timeit
pyogrio.read_dataframe(
    tdx_stream_7020038340_fp, 
    layer=0,
    use_arrow=True, # 50% faster, but doesn't seem to work with s3
)

13.7 s ± 391 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


NOTE: this is 6.4 times slower!  13.7 sec vs 2.15 s before importing geoarrow

In [None]:
%%timeit
gpd.read_file(
    tdx_stream_7020038340_fp, 
    engine='pyogrio',
    use_arrow=True,
)

14.1 s ± 1.11 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


NOTE: this is 9.0 times slower!  14.1 sec vs 1.57 s before importign geoarrow

## Conclusions on Read Benchmarks

`pyogrio.read_arrow()` is ~4x faster than the fastest alternative method.

`gpd.read_file(fp, engine='pyogrio', use_arrow=True)` is the 2nd fastest method, but only before importing GeoArrow.

Importing GeoArrow massively slows down `pyogrio.read_dataframe()` and `gpd.read_file()`. Read speeds for `pyogrio.read_arrow()` do not change.

# Convert Arrow Dtypes

In [None]:
# Try converting to arrow dtypes to save storage and speed computation
# NOTE: Can't convert geometry to geoarrow using this menthod
df = gpd_gdf.drop(columns='geometry')
df.info()

pa_df = df.convert_dtypes(dtype_backend='pyarrow')
pa_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 140097 entries, 0 to 140096
Data columns (total 17 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   LINKNO      140097 non-null  int32  
 1   DSLINKNO    140097 non-null  int32  
 2   USLINKNO1   140097 non-null  int32  
 3   USLINKNO2   140097 non-null  int32  
 4   DSNODEID    140097 non-null  int64  
 5   strmOrder   140097 non-null  int32  
 6   Length      140097 non-null  float64
 7   Magnitude   140097 non-null  int32  
 8   DSContArea  140097 non-null  float64
 9   strmDrop    140097 non-null  float64
 10  Slope       140097 non-null  float64
 11  StraightL   140097 non-null  float64
 12  USContArea  140097 non-null  float64
 13  WSNO        140097 non-null  int32  
 14  DOUTEND     140097 non-null  float64
 15  DOUTSTART   140097 non-null  float64
 16  DOUTMID     140097 non-null  float64
dtypes: float64(9), int32(7), int64(1)
memory usage: 14.4 MB
<class 'pandas.core.frame.Data

In [None]:
type(df.LINKNO[0])

numpy.int32

PyArrow doesn't seem to save memory for non-geomery fields

### Convert GeoArrow PyArrow Table

In [None]:
# Get the table
ga_pa_table[1]

pyarrow.Table
LINKNO: int32
DSLINKNO: int32
USLINKNO1: int32
USLINKNO2: int32
DSNODEID: int64
strmOrder: int32
Length: double
Magnitude: int32
DSContArea: double
strmDrop: double
Slope: double
StraightL: double
USContArea: double
WSNO: int32
DOUTEND: double
DOUTSTART: double
DOUTMID: double
geom: extension<geoarrow.wkb<WkbType>>
----
LINKNO: [[0,1,593,1777,2,...,114546,114547,115730,115731,116915],[117507,118099,118691,119283,119876,...,478,1070,1071,1662,2254],[2845,2846,2847,3438,3439,...,587,1180,1772,588,589]]
DSLINKNO: [[1777,2369,1777,2369,4146,...,146515,115139,164275,116323,117507],[118099,141779,119875,119875,128163,...,35406,33630,35406,38366,39550],[33630,38958,41326,41918,42510,...,-1,-1,-1,-1,-1]]
USLINKNO1: [[-1,-1,-1,0,-1,...,113954,113955,62448,106852,108034],[116915,1476,100930,16867,119284,...,-1,-1,-1,-1,-1],[-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1]]
USLINKNO2: [[-1,-1,-1,593,-1,...,91459,89682,63042,89091,883],[882,117507,12130,16275,42915,...,-1,-1,-1,-1,-1],[-1,-1,-1,-1

In [None]:
%%time
# Looks like the conversion to pandas is fast
pa_df = pa_table[1].to_pandas()

CPU times: user 16.8 ms, sys: 61.2 ms, total: 78 ms
Wall time: 10.4 ms


In [None]:
type(pa_df.LINKNO[0])

numpy.int32

In [None]:
type(pa_df.geom[0])

geoarrow.pandas.lib.GeoArrowExtensionScalar

In [None]:
type(pa_df.geom)

pandas.core.series.Series

In [None]:
pa_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 140097 entries, 0 to 140096
Data columns (total 18 columns):
 #   Column      Non-Null Count   Dtype                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  

# Benchmark Write Methods

## Compare Numpy vs Arrow backed dataframes with no geometry

from otherwise identical Pandas dataframes created above.

In [None]:
%%time
file_path = data_dir / 'test_df.parquet'
df.to_parquet(file_path,compression='brotli',)
local_fs.info(file_path)['size']

CPU times: user 1.14 s, sys: 69.5 ms, total: 1.21 s
Wall time: 1.24 s


8216670

In [None]:
%%timeit
pd.read_parquet(file_path)

17.6 ms ± 468 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%%time
file_path = data_dir / 'test_pa_df.parquet'
pa_df.to_parquet(file_path,compression='brotli',)
local_fs.info(file_path)['size']

CPU times: user 1.15 s, sys: 71 ms, total: 1.23 s
Wall time: 1.25 s


8217006

In [None]:
%%timeit
pd.read_parquet(file_path)

16.6 ms ± 111 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Conclusion: about the same write speed and storage size!

## Compare geometry as GeoArrow

In [None]:
gdf.geometry.dtype

<geopandas.array.GeometryDtype at 0x168fce190>

In [None]:
%%time
file_path = data_dir / 'test_gdf.parquet'
gdf.to_parquet(file_path,compression='brotli',)
local_fs.info(file_path)['size']

CPU times: user 51.4 s, sys: 3.66 s, total: 55 s
Wall time: 55.6 s


109243077

In [None]:
%%timeit
gpd.read_parquet(data_dir / 'test_gdf.parquet')

3.26 s ± 50.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Save DF with GeoArrow Geom

In [None]:
pa_df.geom.dtype

GeoArrowExtensionDtype(geoarrow.wkb <PROJJSON:{
  "$schema": "https://p...>)

In [None]:
%%time
file_path = data_dir / 'test_pa_df.parquet'
pa_df.to_parquet(file_path,compression='brotli',)
local_fs.info(file_path)['size']

CPU times: user 49.7 s, sys: 2.18 s, total: 51.9 s
Wall time: 52.7 s


109248267

In [None]:
%%timeit
pd.read_parquet(data_dir / 'test_pa_df.parquet')

ValueError: format number 1 of "geoarrow.wkb{"crs":{
  "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json",
  "type": "GeographicCRS",
  "name": "WGS 84",
  "datum_ensemble": {
    "name": "World Geodetic System 1984 ensemble",
    "members": [
      {
        "name": "World Geodetic System 1984 (Transit)",
        "id": {
          "authority": "EPSG",
          "code": 1166
        }
      },
      {
        "name": "World Geodetic System 1984 (G730)",
        "id": {
          "authority": "EPSG",
          "code": 1152
        }
      },
      {
        "name": "World Geodetic System 1984 (G873)",
        "id": {
          "authority": "EPSG",
          "code": 1153
        }
      },
      {
        "name": "World Geodetic System 1984 (G1150)",
        "id": {
          "authority": "EPSG",
          "code": 1154
        }
      },
      {
        "name": "World Geodetic System 1984 (G1674)",
        "id": {
          "authority": "EPSG",
          "code": 1155
        }
      },
      {
        "name": "World Geodetic System 1984 (G1762)",
        "id": {
          "authority": "EPSG",
          "code": 1156
        }
      },
      {
        "name": "World Geodetic System 1984 (G2139)",
        "id": {
          "authority": "EPSG",
          "code": 1309
        }
      }
    ],
    "ellipsoid": {
      "name": "WGS 84",
      "semi_major_axis": 6378137,
      "inverse_flattening": 298.257223563
    },
    "accuracy": "2.0",
    "id": {
      "authority": "EPSG",
      "code": 6326
    }
  },
  "coordinate_system": {
    "subtype": "ellipsoidal",
    "axis": [
      {
        "name": "Geodetic latitude",
        "abbreviation": "Lat",
        "direction": "north",
        "unit": "degree"
      },
      {
        "name": "Geodetic longitude",
        "abbreviation": "Lon",
        "direction": "east",
        "unit": "degree"
      }
    ]
  },
  "scope": "Horizontal component of 3D system.",
  "area": "World.",
  "bbox": {
    "south_latitude": -90,
    "west_longitude": -180,
    "north_latitude": 90,
    "east_longitude": 180
  },
  "id": {
    "authority": "EPSG",
    "code": 4326
  }
}}" is not recognized

## GeoArrow only works with Pandas, not GeoPandas!

In [None]:
gdf.geometry

0         LINESTRING (-69.67822 46.41356, -69.67822 46.4...
1         LINESTRING (-69.68589 46.40778, -69.68600 46.4...
2         LINESTRING (-69.67822 46.41356, -69.67811 46.4...
3         LINESTRING (-69.68589 46.40778, -69.68589 46.4...
4         LINESTRING (-69.68700 46.37911, -69.68700 46.3...
                                ...                        
140092    LINESTRING (-81.59922 24.64033, -81.59911 24.6...
140093    LINESTRING (-81.63022 24.61767, -81.63011 24.6...
140094    LINESTRING (-81.60144 24.58478, -81.60156 24.5...
140095    LINESTRING (-81.64478 24.57489, -81.64489 24.5...
140096    LINESTRING (-81.68000 24.55900, -81.68011 24.5...
Name: geometry, Length: 140097, dtype: geometry

In [None]:
# Convert to geoarrow
ga_gdf = gdf.copy(deep=True)

In [None]:
ga_gdf.geometry.geoarrow.as_geoarrow()

0         LINESTRING (-69.67822222222121 46.413555555555...
1         LINESTRING (-69.68588888888787 46.407777777777...
2         LINESTRING (-69.67822222222121 46.413555555555...
3         LINESTRING (-69.68588888888787 46.407777777777...
4         LINESTRING (-69.68699999999899 46.379111111111...
                                ...                        
140092    LINESTRING (-81.59922222222181 24.640333333332...
140093    LINESTRING (-81.6302222222218 24.6176666666655...
140094    LINESTRING (-81.60144444444403 24.584777777776...
140095    LINESTRING (-81.64477777777736 24.574888888887...
140096    LINESTRING (-81.67999999999958 24.558999999998...
Length: 140097, dtype: geoarrow.linestring{"crs":{"$schema":"https://proj.org/schemas/v0.7/projjson.schema.json","type":"GeographicCRS","name":"WGS 84","datum_ensemble":{"name":"World Geodetic System 1984 ensemble","members":[{"name":"World Geodetic System 1984 (Transit)","id":{"authority":"EPSG","code":1166}},{"name":"World Geodetic Syst