Supertiles (#172)

* super tile start * writing windows * working super tiles * clean up supertiles take 1 * generalize over zoom to work for going up x number of zoom levels as specified in config * remove prints * test for overzoom * clean up main * fix test * fix test again * circle ci env variable for config * fix images command * config format * circle token * change how tokens are read * remove print * config env variable * config env variable * fix circler ci yaml * fix how env is injected * fix config * fix environment variables in tox * option to read access token as environment variable * update docs about access token * Minor supertiling cleanup Co-authored-by: Drew Bollinger <drew@developmentseed.org>
developmentseed · Jul 1, 2020 · 37eeeb5 · 37eeeb5
1 parent 5f15bcd
commit 37eeeb5
Show file tree

Hide file tree

Showing 7 changed files with 107 additions and 8 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -11,7 +11,9 @@ common: &common
         command: .circleci/install_tippecanoe.sh
     - run:
         name: run tox
-        command: ~/.local/bin/tox
+        command: |
+          ~/.local/bin/tox
+
 jobs:
   "python-3.6":
     <<: *common
@@ -94,4 +96,4 @@ workflows:
             tags:
               only: /^[0-9]+.*/
             branches:
-              ignore: /.*/
+              ignore: /.*/
diff --git a/docs/parameters.rst b/docs/parameters.rst
@@ -29,6 +29,7 @@ Here is the full list of configuration parameters you can specify in a ``config.
 	Label Maker expects to receive imagery tiles that are 256 x 256 pixels. You can specific the source of the imagery with one of:
 
  		A template string for a tiled imagery service. Note that you will generally need an API key to obtain images and there may be associated costs. The above example requires a `Mapbox access token <https://www.mapbox.com/help/how-access-tokens-work/>`_. Also see `OpenAerialMap <https://openaerialmap.org/>`_ for open imagery.
+		The access token for TMS image formats can be read from an environment variable https://api.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token={ACCESS_TOKEN}" or added directly the imagery string. 
 
  		A GeoTIFF file location. Works with local files: ``'http://oin-hotosm.s3.amazonaws.com/593ede5ee407d70011386139/0/3041615b-2bdb-40c5-b834-36f580baca29.tif'``
 
@@ -67,4 +68,7 @@ Here is the full list of configuration parameters you can specify in a ``config.
 	An optional list of integers representing the number of pixels to offset imagery. For example ``[15, -5]`` will move the images 15 pixels right and 5 pixels up relative to the requested tile bounds.
 
 **tms_image_format**: string
- An option string that has the downloaded imagery's format such as `.jpg` or `.png` when it isn't provided by the endpoint
+    An option string that has the downloaded imagery's format such as `.jpg` or `.png` when it isn't provided by the endpoint
+
+**over_zoom**: int
+ 	An integer greater than 0. If set for XYZ tiles, it will fetch tiles from `zoom` + `over_zoom`, to create higher resolution tiles which fill out the bounds of the original zoom level.
diff --git a/label_maker/utils.py b/label_maker/utils.py
@@ -1,18 +1,25 @@
 # pylint: disable=unused-argument
 """Provide utility functions"""
+import os
 from os import path as op
 from urllib.parse import urlparse, parse_qs
 
-from mercantile import bounds
+from mercantile import bounds, Tile, children
 from PIL import Image
+import io
 import numpy as np
 import requests
 import rasterio
 from rasterio.crs import CRS
 from rasterio.warp import transform, transform_bounds
+from rasterio.windows import Window
 
 WGS84_CRS = CRS.from_epsg(4326)
 
+class SafeDict(dict):
+    def __missing__(self, key):
+        return '{' + key + '}'
+
 def url(tile, imagery):
     """Return a tile url provided an imagery template and a tile"""
     return imagery.replace('{x}', tile[0]).replace('{y}', tile[1]).replace('{z}', tile[2])
@@ -40,11 +47,50 @@ def download_tile_tms(tile, imagery, folder, kwargs):
 
     image_format = get_image_format(imagery, kwargs)
 
+    if os.environ.get('ACCESS_TOKEN'):
+        token = os.environ.get('ACCESS_TOKEN')
+        imagery = imagery.format_map(SafeDict(ACCESS_TOKEN=token))
+
     r = requests.get(url(tile.split('-'), imagery),
                      auth=kwargs.get('http_auth'))
     tile_img = op.join(folder, '{}{}'.format(tile, image_format))
-    with open(tile_img, 'wb')as w:
-        w.write(r.content)
+    tile = tile.split('-')
+
+    over_zoom = kwargs.get('over_zoom')
+    if over_zoom:
+        new_zoom = over_zoom + kwargs.get('zoom')
+        # get children
+        child_tiles = children(int(tile[0]), int(tile[1]), int(tile[2]), zoom=new_zoom)
+        child_tiles.sort()
+
+        new_dim = 256 * (2 * over_zoom)
+
+        w_lst = []
+        for i in range (2 * over_zoom):
+            for j in range(2 * over_zoom):
+                window = Window(i * 256, j * 256, 256, 256)
+                w_lst.append(window)
+
+        # request children
+        with rasterio.open(tile_img, 'w', driver='jpeg', height=new_dim,
+                        width=new_dim, count=3, dtype=rasterio.uint8) as w:
+                for num, t in enumerate(child_tiles):
+                    t = [str(t[0]), str(t[1]), str(t[2])]
+                    r = requests.get(url(t, imagery),
+                                    auth=kwargs.get('http_auth'))
+                    img = np.array(Image.open(io.BytesIO(r.content)), dtype=np.uint8)
+                    try:
+                        img = img.reshape((256, 256, 3)) # 4 channels returned from some endpoints, but not all
+                    except ValueError:
+                        img = img.reshape((256, 256, 4))
+                    img = img[:, :, :3]
+                    img = np.rollaxis(img, 2, 0)
+                    w.write(img, window=w_lst[num])
+    else:
+        r = requests.get(url(tile, imagery),
+                         auth=kwargs.get('http_auth'))
+        with open(tile_img, 'wb')as w:
+            w.write(r.content)
     return tile_img
 
 def get_tile_tif(tile, imagery, folder, kwargs):

diff --git a/label_maker/validate.py b/label_maker/validate.py
@@ -34,5 +34,6 @@
     'imagery_offset': {'type': 'list', 'schema': {'type': 'integer'}, 'minlength': 2, 'maxlength': 2},
     'split_vals': {'type': 'list', 'schema': {'type': 'float'}},
     'split_names': {'type': 'list', 'schema': {'type': 'string'}},
-    'tms_image_format': {'type': 'string'}
+    'tms_image_format': {'type': 'string'},
+    'over_zoom': {'type': 'integer', 'min': 1}
 }
diff --git a/test/fixtures/integration/config_overzoom.integration.json b/test/fixtures/integration/config_overzoom.integration.json
@@ -0,0 +1,24 @@
+{"country": "portugal",
+  "bounding_box": [
+    -9.4575,
+    38.8467,
+    -9.4510,
+    38.8513
+  ],
+  "zoom": 17,
+  "classes": [
+    { "name": "Water Tower", "filter": ["==", "man_made", "water_tower"] },
+    { "name": "Building", "filter": ["has", "building"] },
+    { "name": "Farmland", "filter": ["==", "landuse", "farmland"] },
+    { "name": "Ruins", "filter": ["==", "historic", "ruins"] },
+    { "name": "Parking", "filter": ["==", "amenity", "parking"] },
+    { "name": "Roads", "filter": ["has", "highway"] }
+  ],
+  "imagery": "https://api.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token={ACCESS_TOKEN}", 
+  "background_ratio": 1,
+  "ml_type": "classification",
+  "seed": 19,
+  "split_names": ["train", "test", "val"],
+  "split_vals": [0.7, 0.2, 0.1],
+  "over_zoom": 1
+}
diff --git a/test/integration/test_classification_package.py b/test/integration/test_classification_package.py
@@ -21,6 +21,10 @@ def setUpClass(cls):
         copyfile('test/fixtures/integration/labels-cl.npz', 'integration-cl-split/labels.npz')
         copytree('test/fixtures/integration/tiles', 'integration-cl-split/tiles')
 
+
+        makedirs('integration-cl-overzoom')
+        copyfile('test/fixtures/integration/labels-cl.npz', 'integration-cl-overzoom/labels.npz')
+
         makedirs('integration-cl-img-f')
         copyfile('test/fixtures/integration/labels-cl-img-f.npz', 'integration-cl-img-f/labels.npz')
         copytree('test/fixtures/integration/tiles_png', 'integration-cl-img-f/tiles')
@@ -29,6 +33,7 @@ def setUpClass(cls):
     def tearDownClass(cls):
         rmtree('integration-cl')
         rmtree('integration-cl-split')
+        rmtree('integration-cl-overzoom')
         rmtree('integration-cl-img-f')
 
     def test_cli(self):
@@ -80,6 +85,22 @@ def test_cli_3way_split(self):
         self.assertEqual(data['y_test'].shape, (2, 7))
         self.assertEqual(data['y_val'].shape, (1, 7))
 
+    def test_overzoom(self):
+        """Verify data.npz produced by CLI when overzoom is used"""
+        cmd = 'label-maker images --dest integration-cl-overzoom --config test/fixtures/integration/config_overzoom.integration.json'
+        cmd = cmd.split(' ')
+        subprocess.run(cmd, universal_newlines=True)
+
+        cmd = 'label-maker package --dest integration-cl-overzoom --config test/fixtures/integration/config_overzoom.integration.json'
+        cmd = cmd.split(' ')
+        subprocess.run(cmd, universal_newlines=True)
+
+        data = np.load('integration-cl-overzoom/data.npz')
+
+        self.assertEqual(data['x_train'].shape, (6, 512, 512, 3))
+        self.assertEqual(data['x_test'].shape, (2, 512, 512, 3))
+        self.assertEqual(data['x_val'].shape, (1, 512, 512, 3))
+
     def test_tms_img_format(self):
         """Verify data.npz produced by CLI"""
 

diff --git a/tox.ini b/tox.ini
@@ -2,6 +2,7 @@
 envlist = py37,py36
 
 [testenv]
+passenv = ACCESS_TOKEN
 extras = test
 commands=
     python -m pytest --cov label_maker --cov-report term-missing --ignore=venv
@@ -46,4 +47,4 @@ include_trailing_comma = True
 multi_line_output = 3
 line_length = 90
 known_first_party = label_maker
-default_section = THIRDPARTY
+default_section = THIRDPARTY