-
Notifications
You must be signed in to change notification settings - Fork 19
/
rasterizer.py
329 lines (261 loc) · 13.3 KB
/
rasterizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# Functions for rasterizing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyfor import gisexport
from pyfor import ground_filter
from pyfor import plot
class Grid:
"""The Grid object is a representation of a point cloud that has been sorted into X and Y dimensional bins. It is \
not quite a raster yet. A raster has only one value per cell, whereas the Grid object merely sorts all points \
into their respective cells.
:param cloud: The "parent" cloud object.
:param cell_size: The size of the cell for sorting in the units of the input cloud object.
:return: Returns a dataframe with sorted x and y with associated bins in a new columns
"""
def __init__(self, cloud, cell_size):
self.cloud = cloud
self.cell_size = cell_size
min_x, max_x = self.cloud.data.min[0], self.cloud.data.max[0]
min_y, max_y = self.cloud.data.min[1], self.cloud.data.max[1]
self.m = int(np.floor((max_y - min_y) / cell_size))
self.n = int(np.floor((max_x - min_x) / cell_size))
# Create bins
bins_x = np.searchsorted(np.linspace(min_x, max_x, self.n), self.cloud.data.points["x"])
bins_y = np.searchsorted(np.linspace(min_y, max_y, self.m), self.cloud.data.points["y"])
self.cloud.data.points["bins_x"] = bins_x
self.cloud.data.points["bins_y"] = bins_y
self.cells = self.cloud.data.points.groupby(['bins_x', 'bins_y'])
def _update(self):
self.cloud.data._update()
self.__init__(self.cloud, self.cell_size)
def raster(self, func, dim, **kwargs):
"""
Generates an m x n matrix with values as calculated for each cell in func. This is a raw array without \
missing cells interpolated. See self.interpolate for interpolation methods.
:param func: A function string, i.e. "max" or a function itself, i.e. np.max. This function must be able to \
take a 1D array of the given dimension as an input and produce a single value as an output. This single value \
will become the value of each cell in the array.
:param dim: The dimension to calculate on as a string, see the column names of self.data for a full list of \
options
:return: A 2D numpy array where the value of each cell is the result of the passed function.
"""
bin_summary = self.cells.agg({dim: func}, **kwargs).reset_index()
array = np.full((self.m, self.n), np.nan)
array[bin_summary["bins_y"], bin_summary["bins_x"]] = bin_summary[dim]
return Raster(array, self)
@property
def empty_cells(self):
"""
Retrieves the cells with no returns in self.data
return: An N x 2 numpy array where each row cooresponds to the [y x] coordinate of the empty cell.
"""
array = self.raster("count", "z").array
emptys = np.argwhere(np.isnan(array))
return emptys
def interpolate(self, func, dim, interp_method="nearest"):
"""
Interpolates missing cells in the grid. This function uses scipy.griddata as a backend. Please see \
documentation for that function for more details.
:param func: The function (or function string) to calculate an array on the gridded data.
:param dim: The dimension (i.e. column name of self.cells) to cast func onto.
:param interp_method: The interpolation method call for scipy.griddata, one of any: "nearest", "cubic", \
"linear"
:return: An interpolated array.
"""
from scipy.interpolate import griddata
# Get points and values that we already have
cell_values = self.cells[dim].agg(func).reset_index()
points = cell_values[['bins_x', 'bins_y']].values
values = cell_values[dim].values
# https://stackoverflow.com/questions/12864445/numpy-meshgrid-points
# TODO assumes a raster occupies a square/rectangular space. Is it possible to not assume this and increase performance?
X, Y = np.mgrid[1:self.n+1, 1:self.m+1]
# TODO generally a slow approach
interp_grid = griddata(points, values, (X, Y), method=interp_method).T
return Raster(interp_grid, self)
def metrics(self, func_dict, as_raster = False):
"""
Calculates summary statistics for each grid cell in the Grid.
:param func_dict: A dictionary containing keys corresponding to the columns of self.data and values that \
correspond to the functions to be called on those columns.
:return: A pandas dataframe with the aggregated metrics.
"""
# Aggregate on the function
aggregate = self.cells.agg(func_dict)
if as_raster == False:
return aggregate
else:
rasters = []
for column in aggregate:
array = np.asarray(aggregate[column].reset_index().pivot('bins_y', 'bins_x'))
raster = Raster(array, self)
rasters.append(raster)
# Get list of dimension names
dims = [tup[0] for tup in list(aggregate)]
# Get list of metric names
metrics = [tup[1] for tup in list(aggregate)]
return pd.DataFrame({'dim': dims, 'metric': metrics, 'raster': rasters}).set_index(['dim', 'metric'])
class Raster:
def __init__(self, array, grid):
self.array = array
self.grid = grid
self.cell_size = self.grid.cell_size
@property
def _affine(self):
"""Constructs the affine transformation, used for plotting and exporting polygons and rasters."""
from rasterio.transform import from_origin
affine = from_origin(self.grid.cloud.data.min[0], self.grid.cloud.data.max[1], self.grid.cell_size, self.grid.cell_size)
return affine
@property
def _convex_hull_mask(self):
"""
Calculates an m x n boolean numpy array where the value of each cell represents whether or not that cell lies \
within the convex hull of the "parent" cloud object. This is used when plotting and writing interpolated \
rasters.
:return:
"""
import fiona
import rasterio
from rasterio.mask import mask
# TODO for now this uses temp files. I would like to change this.
self.grid.cloud.convex_hull.to_file("temp.shp")
with fiona.open("temp.shp", "r") as shapefile:
features = [feature["geometry"] for feature in shapefile]
self.write("temp.tif")
with rasterio.open("temp.tif") as rast:
out_image = mask(rast, features, nodata = np.nan, crop=True)
return out_image[0].data
def plot(self, cmap = "viridis", block = False, return_plot = False):
"""
Default plotting method for the Raster object.
:param block: An optional parameter, mostly for debugging purposes.
"""
#TODO implement cmap
fig = plt.figure()
ax = fig.add_subplot(111)
caz = ax.matshow(self.array)
fig.colorbar(caz)
fig.gca().invert_yaxis()
ax.xaxis.tick_bottom()
ax.set_xticks(np.linspace(0, self.grid.n, 3))
ax.set_yticks(np.linspace(0, self.grid.m, 3))
x_ticks, y_ticks = np.rint(np.linspace(self.grid.cloud.data.min[0], self.grid.cloud.data.max[0], 3)), \
np.rint(np.linspace(self.grid.cloud.data.min[1], self.grid.cloud.data.max[1], 3))
ax.set_xticklabels(x_ticks)
ax.set_yticklabels(y_ticks)
if return_plot == True:
return(ax)
else:
plt.show(block = block)
def iplot3d(self, colorscale="Viridis"):
"""
Plots the raster as a surface using Plotly.
"""
plot.iplot3d_surface(self.array, colorscale)
def local_maxima(self, min_distance=2, threshold_abs=2, as_coordinates=False):
"""
Returns a new Raster object with tops detected using a local maxima filtering method. See
skimage.feature.peak_local_maxima for more information on the filter.
:param min_distance:
:param threshold_abs:
:param multi_top: If multi_top is true, a top can consist of more than one pixel.
:param as_coordinates: Not yet implemented
:return:
"""
from skimage.feature import peak_local_max, corner_peaks
from scipy.ndimage import label
tops = peak_local_max(np.flipud(self.array), indices=False, min_distance=min_distance, threshold_abs=threshold_abs)
tops = label(tops)[0]
# TODO Had to take out corner filter to remove duplicate tops.
tops_raster = DetectedTops(tops, self.grid, self)
return(tops_raster)
def watershed_seg(self, min_distance=2, threshold_abs=2, classify=False):
"""
Returns the watershed segmentation of the Raster as a geopandas dataframe.
:param min_distance: The minimum distance between local height maxima in the same units as the input point \
cloud.
:param threshold_abs: The minimum threshold needed to be called a peak in peak_local_max.
:param classify: If true, sets the `user_data` of the original point cloud data to the segment ID. The \
segment ID is an arbitrary identification number generated by the labels function. This can be useful for \
plotting point clouds where each segment color is unique.
:return: A geopandas data frame, each record is a crown segment.
"""
return CrownSegments(self.array, self.grid, min_distance=min_distance, threshold_abs=threshold_abs)
#if classify == True:
# xy = self.grid.cloud.data.points[["bins_x", "bins_y"]].values
# tree_id = labels[xy[:, 1], xy[:, 0]]
# # Update the CloudData and Grid objects
# self.grid.cloud.data.points["user_data"] = tree_id
# self.grid.cells = self.grid.cloud.data.points.groupby(['bins_x', 'bins_y'])
def pit_filter(self, kernel_size):
"""
Filters pits in the raster. Intended for use with canopy height models (i.e. grid(0.5).interpolate("max", "z").
This function modifies the raster array **in place**.
:param kernel_size: The size of the kernel window to pass over the array. For example 3 -> 3x3 kernel window.
"""
from scipy.signal import medfilt
self.array = medfilt(self.array, kernel_size=kernel_size)
def write(self, path):
"""
Writes the raster to a geotiff. Requires the Cloud.crs attribute to be filled by a projection string (ideally \
wkt or proj4).
:param path: The path to write to.
"""
if not self.grid.cloud.crs:
from warnings import warn
warn('No coordinate reference system defined. Please set the .crs attribute of the Cloud object.', UserWarning)
gisexport.array_to_raster(self.array, self.cell_size, self.grid.cloud.data.min[0], self.grid.cloud.data.max[1],
self.grid.cloud.crs, path)
class DetectedTops(Raster):
"""
This class is for visualization of detected tops with a raster object. Generally created internally via
Raster.local_maxima
"""
def __init__(self, array, grid, chm):
super().__init__(array, grid)
self.chm = chm
def plot(self):
"""
Plots the detected tops against the original input raster.
# https://matplotlib.org/gallery/images_contours_and_fields/image_transparency_blend.html
"""
fig, ax = plt.subplots()
caz = ax.matshow(np.flipud(self.chm.array))
fig.colorbar(caz)
# TODO I might not need to repeat this code from raster
fig.gca().invert_yaxis()
ax.xaxis.tick_bottom()
ax.set_xticks(np.linspace(0, self.grid.n, 3))
ax.set_yticks(np.linspace(0, self.grid.m, 3))
x_ticks, y_ticks = np.rint(np.linspace(self.grid.cloud.data.min[0], self.grid.cloud.data.max[0], 3)), \
np.rint(np.linspace(self.grid.cloud.data.min[1], self.grid.cloud.data.max[1], 3))
ax.set_xticklabels(x_ticks)
ax.set_yticklabels(y_ticks)
container = np.zeros((self.grid.m, self.grid.n, 4))
tops_binary = (self.array > 0).astype(np.int)
container[:, :, 0][tops_binary >0] = 1
container[:, :, 3][tops_binary >0] = 1
ax.imshow(container)
class CrownSegments(Raster):
"""
This class is for visualization of detected crown segments with a raster object.
"""
def __init__(self, array, grid, min_distance, threshold_abs):
from skimage.morphology import watershed
super().__init__(array, grid)
watershed_array = np.flipud(self.array)
tops = self.local_maxima(min_distance=min_distance, threshold_abs=threshold_abs).array
labels = watershed(-watershed_array, tops, mask=watershed_array)
self.segments = gisexport.array_to_polygons(labels, affine=None)
def plot(self):
from matplotlib.collections import PatchCollection
from descartes.patch import PolygonPatch
geoms = self.segments['geometry'].translate(xoff=-0.5, yoff=-0.5).values
fig, ax = plt.subplots()
ax.imshow(np.flipud(self.array))
ax.add_collection(PatchCollection([PolygonPatch(poly) for poly in geoms], facecolors=(1,0,0,0), edgecolors='#e8e8e8'))
plt.xlim((0, self.array.shape[1]))
plt.ylim((0, self.array.shape[0]))
ax.invert_yaxis()
plt.show()