Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accommodate MS and Bands Beyond Default RGB #178

Merged
merged 16 commits into from
Aug 19, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ Here is the full list of configuration parameters you can specify in a ``config.

**over_zoom**: int
An integer greater than 0. If set for XYZ tiles, it will fetch tiles from `zoom` + `over_zoom`, to create higher resolution tiles which fill out the bounds of the original zoom level.

**band_indices**: list
A list of band indices to pull from a tiff. Defaults to (1, 2, 3) if not specified. For using non-RGB tifs, ie MultiSpectral tifs this should be over-written
for example to [5, 3, 2, 7] to extract Red, Green, Blue, and NIR bands from the MultiSpectral spacenet challenge imagery.
16 changes: 12 additions & 4 deletions label_maker/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from os import path as op
from urllib.parse import urlparse
import numpy as np
import rasterio
from PIL import Image

from label_maker.utils import is_tif, get_image_format
Expand Down Expand Up @@ -73,25 +74,29 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False,

# open the images and load those plus the labels into the final arrays
if is_tif(imagery): # if a TIF is provided, use jpg as tile format
image_format = '.jpg'
img_dtype = rasterio.open(imagery).profile['dtype']
image_format = '.tif'

else:
img_dtype = np.uint8
image_format = get_image_format(imagery, kwargs)

for tile in tiles:
image_file = op.join(dest_folder, 'tiles', '{}{}'.format(tile, image_format))
try:
img = Image.open(image_file)
img = rasterio.open(image_file)
except FileNotFoundError:
# we often don't download images for each label (e.g. background tiles)
continue
except OSError:
print('Couldn\'t open {}, skipping'.format(image_file))
continue

np_image = np.array(img)
i = np.array(img.read())
np_image = np.moveaxis(i, 0, 2)
img.close()


x_vals.append(np_image)
if ml_type == 'classification':
y_vals.append(labels[tile])
Expand All @@ -103,7 +108,9 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False,
y_vals.append(labels[tile][..., np.newaxis]) # Add grayscale channel

# Convert lists to numpy arrays
x_vals = np.array(x_vals, dtype=np.uint8)

#TO-DO flexible x_val dtype
x_vals = np.array(x_vals, dtype=img_dtype)
y_vals = np.array(y_vals, dtype=np.uint8)

# Get number of data samples per split from the float proportions
Expand All @@ -128,3 +135,4 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False,

np.savez(op.join(dest_folder, 'data.npz'), **save_dict)
print('Saving packaged file to {}'.format(op.join(dest_folder, 'data.npz')))
print('Image dtype written in npz matches input image dtype: {}'.format(img_dtype))
19 changes: 12 additions & 7 deletions label_maker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def get_tile_tif(tile, imagery, folder, kwargs):
bound = bounds(*[int(t) for t in tile.split('-')])
imagery_offset = kwargs.get('imagery_offset') or [0, 0]
with rasterio.open(imagery) as src:
profile = src.profile
x_res, y_res = src.transform[0], src.transform[4]

# offset our imagery in the "destination pixel" space
Expand Down Expand Up @@ -137,15 +138,19 @@ def get_tile_tif(tile, imagery, folder, kwargs):
window = ((top, bottom), (left, right))

# read the first three bands (assumed RGB) of the TIF into an array
data = np.empty(shape=(3, 256, 256)).astype(src.profile['dtype'])
for k in (1, 2, 3):
src.read(k, window=window, out=data[k - 1], boundless=True)
band_indices = kwargs.get('band_indices', (1, 2, 3))
band_count = len(band_indices)

# save
tile_img = op.join(folder, '{}{}'.format(tile, '.jpg'))
img = Image.fromarray(np.moveaxis(data, 0, -1), mode='RGB')
img.save(tile_img)
arr_shape = (band_count, 256, 256)
data = np.empty(shape=(arr_shape)).astype(src.profile['dtype'])

for i, k in enumerate(band_indices):
src.read(k, window=window, out=data[i], boundless=True)
# save
tile_img = op.join(folder, '{}{}'.format(tile, '.tif'))
with rasterio.open(tile_img, 'w', driver='png', height=256,
martham93 marked this conversation as resolved.
Show resolved Hide resolved
width=256, count=band_count, dtype=profile['dtype']) as w:
w.write(data)
return tile_img

def get_tile_wms(tile, imagery, folder, kwargs):
Expand Down
3 changes: 2 additions & 1 deletion label_maker/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@
'split_vals': {'type': 'list', 'schema': {'type': 'float'}},
'split_names': {'type': 'list', 'schema': {'type': 'string'}},
'tms_image_format': {'type': 'string'},
'over_zoom': {'type': 'integer', 'min': 1}
'over_zoom': {'type': 'integer', 'min': 1},
'band_indices': {'type': 'list'}
}
12 changes: 12 additions & 0 deletions test/fixtures/integration/config.integration.bands.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"zoom": 18,
"classes": [
{"name": "paved", "filter": ["in", "paved", "1"]},
{"name": "unpaved", "filter": ["in", "paved", "2"]}
],
"geojson": "integration-ms/ms-roads.geojson",
"imagery": "integration-ms/ms-img.tif",
"background_ratio": 0,
"ml_type": "classification",
"band_indices": [5, 7]
}
Binary file added test/fixtures/integration/labels-ms.npz
Binary file not shown.
Binary file added test/fixtures/integration/ms_img.tif
Binary file not shown.
Loading