-
Notifications
You must be signed in to change notification settings - Fork 902
/
io.py
109 lines (82 loc) · 3.12 KB
/
io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import shutil
import tempfile
import warnings
import numpy as np
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries, read_file, read_parquet, read_feather
# TEMP: hide warning from to_parquet
warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")
format_dict = {
"ESRI Shapefile": (
".shp",
lambda gdf, filename: gdf.to_file(filename, driver="ESRI Shapefile"),
lambda filename: read_file(filename, driver="ESRI Shapefile"),
),
"GeoJSON": (
".json",
lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"),
lambda filename: read_file(filename, driver="GeoJSON"),
),
"GPKG": (
".gpkg",
lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"),
lambda filename: read_file(filename, driver="GeoJSON"),
),
"Parquet": (
".parquet",
lambda gdf, filename: gdf.to_parquet(filename),
lambda filename: read_parquet(filename),
),
"Feather": (
".feather",
lambda gdf, filename: gdf.to_feather(filename),
lambda filename: read_feather(filename),
),
}
class Bench:
params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"]
param_names = ["file_format"]
def setup(self, file_format):
self.ext, self.writer, self.reader = format_dict[file_format]
num_points = 20000
xs = np.random.rand(num_points)
ys = np.random.rand(num_points)
self.points = GeoSeries([Point(x, y) for (x, y) in zip(xs, ys)])
self.df = GeoDataFrame(
{
"geometry": self.points,
"x": xs,
"y": ys,
"s": np.zeros(num_points, dtype="object"),
}
)
self.tmpdir = tempfile.mkdtemp()
self.filename = os.path.join(self.tmpdir, "frame" + self.ext)
self.writer(self.df, self.filename)
def teardown(self, file_format):
shutil.rmtree(self.tmpdir)
class BenchFrame(Bench):
params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"]
param_names = ["file_format"]
def time_write(self, file_format):
with tempfile.TemporaryDirectory() as tmpdir:
out_filename = os.path.join(tmpdir, "frame" + self.ext)
self.writer(self.df, out_filename)
def time_read(self, file_format):
self.reader(self.filename)
class BenchSeries(Bench):
params = ["ESRI Shapefile", "GeoJSON", "GPKG"]
param_names = ["file_format"]
def setup(self, file_format):
super().setup(file_format)
self.filename_series = os.path.join(self.tmpdir, "series" + self.ext)
self.writer(self.points, self.filename_series)
def time_write_series(self, file_format):
with tempfile.TemporaryDirectory() as tmpdir:
out_filename = os.path.join(tmpdir, "series" + self.ext)
self.writer(self.points, out_filename)
def time_read_series(self, file_format):
GeoSeries.from_file(self.filename_series)
def time_read_series_from_frame(self, file_format):
GeoSeries.from_file(self.filename)