-
Notifications
You must be signed in to change notification settings - Fork 0
/
ogr_to_geoarrow.py
79 lines (66 loc) · 2.27 KB
/
ogr_to_geoarrow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pyarrow as pa
import pyarrow.feather as feather
import geoarrow.pyarrow as ga
import pyogrio
import pyproj
import glob
import sys
import os
# e.g. gpkg_to_geoarrow.py separate,interleaved,wkb,wkt "*.gpkg"
formats = sys.argv[1].lower().split(",")
pattern = sys.argv[2]
strip_crs = "--strip-crs" in sys.argv
compression = "zstd" if "--compress" in sys.argv else "uncompressed"
for f in glob.glob(pattern):
print(f"Reading {f}...")
info, table = pyogrio.raw.read_arrow(f)
geometry_name = (
info["geometry_name"] if info["geometry_name"] != "" else "wkb_geometry"
)
geometry = ga.as_geoarrow(table.column(geometry_name))
prj = pyproj.CRS(info["crs"])
if not strip_crs:
# Need to fix ga.with_crs() to work with ChunkedArray
geometry = pa.chunked_array(
[ga.with_crs(chunk, prj.to_json()) for chunk in geometry.chunks]
)
for i, nm in enumerate(table.column_names):
if nm == geometry_name:
table = table.remove_column(i)
break
assert geometry.type.coord_type == ga.CoordType.SEPARATE
_, file_extension = os.path.splitext(f)
if "separate" in formats:
out = f.replace(file_extension, ".arrow")
print(f"Writing {out}...")
feather.write_feather(
table.append_column("geometry", geometry),
out,
compression=compression,
)
if "interleaved" in formats:
out = f.replace(file_extension, "-interleaved.arrow")
print(f"Writing {out}...")
feather.write_feather(
table.append_column(
"geometry", ga.with_coord_type(geometry, ga.CoordType.INTERLEAVED)
),
out,
compression=compression,
)
if "wkb" in formats:
out = f.replace(file_extension, "-wkb.arrow")
print(f"Writing {out}...")
feather.write_feather(
table.append_column("geometry", ga.as_wkb(geometry)),
out,
compression=compression,
)
if "wkt" in formats:
out = f.replace(file_extension, "-wkt.arrow")
print(f"Writing {out}...")
feather.write_feather(
table.append_column("geometry", ga.as_wkt(geometry)),
out,
compression=compression,
)