diff --git a/CHANGES.txt b/CHANGES.txt index 0ea38fc..162a8c1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -10,6 +10,7 @@ Bug fixes: Breacking Changes: +- rio cogeo now has subcommands: 'create' and 'validate' (#6). - internal mask creation is now optional (--add-mask). - internal nodata or alpha channel can be forwarded to the output dataset. - removed default overview blocksize to be equal to the raw data blocksize (#60) diff --git a/README.rst b/README.rst index 0febfce..1aa807b 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ rio-cogeo ========= -Cloud Optimized GeoTIFF (COG) creation plugin for rasterio +Cloud Optimized GeoTIFF (COG) creation and validation plugin for Rasterio .. image:: https://badge.fury.io/py/rio-cogeo.svg :target: https://badge.fury.io/py/rio-cogeo @@ -17,14 +17,14 @@ Cloud Optimized GeoTIFF (COG) creation plugin for rasterio Install ======= -.. code-block:: console +.. code-block::console $ pip install -U pip $ pip install rio-cogeo Or install from source: -.. code-block:: console +.. code-block::console $ git clone https://github.com/cogeotiff/rio-cogeo.git $ cd rio-cogeo @@ -34,7 +34,23 @@ Or install from source: Usage ===== -.. code-block:: +.. code-block::console + + $ rio cogeo --help + Usage: rio cogeo [OPTIONS] COMMAND [ARGS]... + + Rasterio cogeo subcommands. + + Options: + --help Show this message and exit. + + Commands: + create Create COGEO + validate Validate COGEO + +- Create a Cloud Optimized Geotiff. + +.. code-block::console $ rio cogeo --help Usage: rio cogeo [OPTIONS] INPUT OUTPUT @@ -55,23 +71,36 @@ Usage -q, --quiet Suppress progress bar and other non-error output. --help Show this message and exit. +- Check if a Cloud Optimized Geotiff is valid. + +.. code-block::console + + $ rio cogeo validate --help + Usage: rio cogeo validate [OPTIONS] INPUT + + Validate Cloud Optimized Geotiff. + + Options: + --help Show this message and exit. + + Examples ======== -.. code-block:: console +.. code-block::console # Create a COGEO with JPEG profile and the first 3 bands of the data - $ rio cogeo mydataset.tif mydataset_jpeg.tif -b 1,2,3 + $ rio cogeo create mydataset.tif mydataset_jpeg.tif -b 1,2,3 - # Create a COGEO with JPEG profile and the first 3 bands of the data and add internal mask - $ rio cogeo mydataset.tif mydataset_jpeg.tif -b 1,2,3 --add-mask + # Validate COGEO + $ rio cogeo validate mydataset_jpeg.tif - # Create a COGEO without compression and with 1024x1024 block size - $ rio cogeo mydataset.tif mydataset_raw.tif --co BLOCKXSIZE=1024 --co BLOCKYSIZE=1024 --cog-profile raw + # Create a COGEO with JPEG profile and the first 3 bands of the data and add internal mask + $ rio cogeo create mydataset.tif mydataset_jpeg.tif -b 1,2,3 --add-mask # Create a COGEO without compression and with 1024x1024 block size and 256 overview blocksize - $ rio cogeo mydataset.tif mydataset_raw.tif --co BLOCKXSIZE=1024 --co BLOCKYSIZE=1024 --cog-profile raw --overview-blocksize 256 - $ GDAL_TIFF_OVR_BLOCKSIZE=256 rio cogeo mydataset.tif mydataset_raw.tif --co BLOCKXSIZE=1024 --co BLOCKYSIZE=1024 --cog-profile raw + $ rio cogeo create mydataset.tif mydataset_raw.tif --co BLOCKXSIZE=1024 --co BLOCKYSIZE=1024 --cog-profile raw --overview-blocksize 256 + $ GDAL_TIFF_OVR_BLOCKSIZE=256 rio cogeo create mydataset.tif mydataset_raw.tif --co BLOCKXSIZE=1024 --co BLOCKYSIZE=1024 --cog-profile raw Default COGEO profiles @@ -160,7 +189,7 @@ to replace the Nodata value or Alpha band in output dataset (supported by most G Note: when adding a `mask` with an input dataset having an alpha band you'll need to use the `bidx` options to remove it from the output dataset. -.. code-block:: console +.. code-block::console # Replace the alpha band by an internal mask $ rio cogeo mydataset_withalpha.tif mydataset_withmask.tif --cog-profile raw --add-mask --bidx 1,2,3 @@ -180,7 +209,7 @@ Issues and pull requests are more than welcome. **dev install** -.. code-block:: console +.. code-block::console $ git clone https://github.com/cogeotiff/rio-cogeo.git $ cd rio-cogeo @@ -190,13 +219,13 @@ Issues and pull requests are more than welcome. This repo is set to use `pre-commit` to run *flake8*, *pydocstring* and *black* ("uncompromising Python code formatter") when commiting new code. -.. code-block:: console +.. code-block::console $ pre-commit install Extras ====== -Blog post good and bad COG formats: https://medium.com/@_VincentS_/do-you-really-want-people-using-your-data-ec94cd94dc3f +Blog post on good and bad COG formats: https://medium.com/@_VincentS_/do-you-really-want-people-using-your-data-ec94cd94dc3f Checkout **rio-glui** (https://github.com/mapbox/rio-glui/) rasterio plugin to explore COG locally in your web browser. diff --git a/rio_cogeo/cogeo.py b/rio_cogeo/cogeo.py index 2063603..e14e52a 100644 --- a/rio_cogeo/cogeo.py +++ b/rio_cogeo/cogeo.py @@ -8,6 +8,7 @@ import rasterio from rasterio.io import MemoryFile +from rasterio.env import GDALVersion from rasterio.vrt import WarpedVRT from rasterio.enums import Resampling from rasterio.shutil import copy @@ -147,3 +148,162 @@ def cog_translate( "Writing output to: {}".format(dst_path), err=True ) copy(mem, dst_path, copy_src_overviews=True, **dst_kwargs) + + +def cog_validate(src_path): + """ + Validate Cloud Optimized Geotiff. + + Parameters + ---------- + src_path : str or PathLike object + A dataset path or URL. Will be opened in "r" mode. + + This script is the rasterio equivalent of + https://svn.osgeo.org/gdal/trunk/gdal/swig/python/samples/validate_cloud_optimized_geotiff.py + + """ + errors = [] + warnings = [] + details = {} + + if not GDALVersion.runtime().at_least("2.2"): + raise Exception("GDAL 2.2 or above required") + + config = dict(GDAL_DISABLE_READDIR_ON_OPEN="FALSE") + with rasterio.Env(**config): + with rasterio.open(src_path) as src: + if not src.driver == "GTiff": + raise Exception("The file is not a GeoTIFF") + + filelist = [os.path.basename(f) for f in src.files] + src_bname = os.path.basename(src_path) + if len(filelist) > 1 and src_bname + ".ovr" in filelist: + errors.append( + "Overviews found in external .ovr file. They should be internal" + ) + + if src.width >= 512 or src.height >= 512: + if not src.is_tiled: + errors.append( + "The file is greater than 512xH or 512xW, but is not tiled" + ) + + overviews = src.overviews(1) + if not overviews: + warnings.append( + "The file is greater than 512xH or 512xW, it is recommended " + "to include internal overviews" + ) + + ifd_offset = int(src.get_tag_item("IFD_OFFSET", "TIFF", bidx=1)) + ifd_offsets = [ifd_offset] + if ifd_offset not in (8, 16): + errors.append( + "The offset of the main IFD should be 8 for ClassicTIFF " + "or 16 for BigTIFF. It is {} instead".format(ifd_offset) + ) + + details["ifd_offsets"] = {} + details["ifd_offsets"]["main"] = ifd_offset + + if not overviews == sorted(overviews): + errors.append("Overviews should be sorted") + + for ix, dec in enumerate(overviews): + + # NOTE: Size check is handled in rasterio `src.overviews` methods + # https://github.com/mapbox/rasterio/blob/4ebdaa08cdcc65b141ed3fe95cf8bbdd9117bc0b/rasterio/_base.pyx + # We just need to make sure the decimation level is > 1 + if not dec > 1: + errors.append( + "Invalid Decimation {} for overview level {}".format(dec, ix) + ) + + # Check that the IFD of descending overviews are sorted by increasing + # offsets + ifd_offset = int(src.get_tag_item("IFD_OFFSET", "TIFF", bidx=1, ovr=ix)) + ifd_offsets.append(ifd_offset) + + details["ifd_offsets"]["overview_{}".format(ix)] = ifd_offset + if ifd_offsets[-1] < ifd_offsets[-2]: + if ix == 0: + errors.append( + "The offset of the IFD for overview of index {} is {}, " + "whereas it should be greater than the one of the main " + "image, which is at byte {}".format( + ix, ifd_offsets[-1], ifd_offsets[-2] + ) + ) + else: + errors.append( + "The offset of the IFD for overview of index {} is {}, " + "whereas it should be greater than the one of index {}, " + "which is at byte {}".format( + ix, ifd_offsets[-1], ix - 1, ifd_offsets[-2] + ) + ) + + block_offset = int(src.get_tag_item("BLOCK_OFFSET_0_0", "TIFF", bidx=1)) + if not block_offset: + errors.append("Missing BLOCK_OFFSET_0_0") + + data_offset = int(block_offset) if block_offset else None + data_offsets = [data_offset] + details["data_offsets"] = {} + details["data_offsets"]["main"] = data_offset + + for ix, dec in enumerate(overviews): + data_offset = int( + src.get_tag_item("BLOCK_OFFSET_0_0", "TIFF", bidx=1, ovr=ix) + ) + data_offsets.append(data_offset) + details["data_offsets"]["overview_{}".format(ix)] = data_offset + + if data_offsets[-1] < ifd_offsets[-1]: + if len(overviews) > 0: + errors.append( + "The offset of the first block of the smallest overview " + "should be after its IFD" + ) + else: + errors.append( + "The offset of the first block of the image should " + "be after its IFD" + ) + + for i in range(len(data_offsets) - 2, 0, -1): + if data_offsets[i] < data_offsets[i + 1]: + errors.append( + "The offset of the first block of overview of index {} should " + "be after the one of the overview of index {}".format(i - 1, i) + ) + + if len(data_offsets) >= 2 and data_offsets[0] < data_offsets[1]: + errors.append( + "The offset of the first block of the main resolution image " + "should be after the one of the overview of index {}".format( + len(overviews) - 1 + ) + ) + + for ix, dec in enumerate(overviews): + with rasterio.open(src_path, OVERVIEW_LEVEL=ix) as ovr_dst: + if ovr_dst.width >= 512 or ovr_dst.height >= 512: + if not ovr_dst.is_tiled: + errors.append("Overview of index {} is not tiled".format(ix)) + + if warnings: + click.secho("The following warnings were found:", fg="yellow", err=True) + for w in warnings: + click.echo("- " + w, err=True) + click.echo(err=True) + + if errors: + click.secho("The following errors were found:", fg="red", err=True) + for e in errors: + click.echo("- " + e, err=True) + + return False + + return True diff --git a/rio_cogeo/scripts/cli.py b/rio_cogeo/scripts/cli.py index d877f22..dd30bbe 100644 --- a/rio_cogeo/scripts/cli.py +++ b/rio_cogeo/scripts/cli.py @@ -8,7 +8,7 @@ from rasterio.rio import options from rasterio.enums import Resampling -from rio_cogeo.cogeo import cog_translate +from rio_cogeo.cogeo import cog_translate, cog_validate from rio_cogeo.profiles import cog_profiles @@ -49,7 +49,13 @@ def convert(self, value, param, ctx): raise click.ClickException("{} is not a valid nodata value.".format(value)) -@click.command() +@click.group(short_help="Create and Validate COGEO") +def cogeo(): + """Rasterio cogeo subcommands.""" + pass + + +@cogeo.command(short_help="Create COGEO") @options.file_in_arg @options.file_out_arg @click.option("--bidx", "-b", type=BdxParamType(), help="Band indexes to copy.") @@ -98,7 +104,7 @@ def convert(self, value, param, ctx): help="Suppress progress bar and other non-error output.", is_flag=True, ) -def cogeo( +def create( input, output, bidx, @@ -136,3 +142,13 @@ def cogeo( config, quiet, ) + + +@cogeo.command(short_help="Validate COGEO") +@options.file_in_arg +def validate(input): + """Validate Cloud Optimized Geotiff.""" + if cog_validate(input): + click.echo("{} is a valid cloud optimized GeoTIFF".format(input)) + else: + click.echo("{} is NOT a valid cloud optimized GeoTIFF".format(input)) diff --git a/tests/fixtures/image_2000px.tif b/tests/fixtures/image_2000px.tif new file mode 100644 index 0000000..e564cb3 Binary files /dev/null and b/tests/fixtures/image_2000px.tif differ diff --git a/tests/fixtures/validate/image_dec.tif b/tests/fixtures/validate/image_dec.tif new file mode 100644 index 0000000..5d33e2b Binary files /dev/null and b/tests/fixtures/validate/image_dec.tif differ diff --git a/tests/fixtures/validate/image_external.tif b/tests/fixtures/validate/image_external.tif new file mode 100644 index 0000000..67c6391 Binary files /dev/null and b/tests/fixtures/validate/image_external.tif differ diff --git a/tests/fixtures/validate/image_external.tif.ovr b/tests/fixtures/validate/image_external.tif.ovr new file mode 100644 index 0000000..f55d3e7 Binary files /dev/null and b/tests/fixtures/validate/image_external.tif.ovr differ diff --git a/tests/fixtures/validate/image_sorted.tif b/tests/fixtures/validate/image_sorted.tif new file mode 100644 index 0000000..2841b95 Binary files /dev/null and b/tests/fixtures/validate/image_sorted.tif differ diff --git a/tests/fixtures/validate/nontiff.jpg b/tests/fixtures/validate/nontiff.jpg new file mode 100644 index 0000000..9bd8ade Binary files /dev/null and b/tests/fixtures/validate/nontiff.jpg differ diff --git a/tests/test_cli.py b/tests/test_cli.py index c1ceadf..d264e57 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,6 +20,9 @@ raster_path_missingnodata = os.path.join( os.path.dirname(__file__), "fixtures", "image_missing_nodata.tif" ) +raster_jpeg = os.path.join( + os.path.dirname(__file__), "fixtures", "validate", "nontiff.jpg" +) @pytest.fixture(autouse=True) @@ -35,7 +38,7 @@ def test_cogeo_valid(): runner = CliRunner() with runner.isolated_filesystem(): result = runner.invoke( - cogeo, [raster_path_rgb, "output.tif", "--add-mask", "--quiet"] + cogeo, ["create", raster_path_rgb, "output.tif", "--add-mask", "--quiet"] ) assert not result.exception assert result.exit_code == 0 @@ -60,7 +63,9 @@ def test_cogeo_valid_external_mask(monkeypatch): runner = CliRunner() with runner.isolated_filesystem(): - result = runner.invoke(cogeo, [raster_path_rgb, "output.tif", "--add-mask"]) + result = runner.invoke( + cogeo, ["create", raster_path_rgb, "output.tif", "--add-mask"] + ) assert not result.exception assert result.exit_code == 0 with rasterio.open("output.tif") as src: @@ -73,7 +78,17 @@ def test_cogeo_validbidx(): runner = CliRunner() with runner.isolated_filesystem(): result = runner.invoke( - cogeo, [raster_path_rgb, "output.tif", "-b", "1", "-p", "raw", "--add-mask"] + cogeo, + [ + "create", + raster_path_rgb, + "output.tif", + "-b", + "1", + "-p", + "raw", + "--add-mask", + ], ) assert not result.exception assert result.exit_code == 0 @@ -86,7 +101,9 @@ def test_cogeo_invalidbidx(): """Should exit with invalid band indexes.""" runner = CliRunner() with runner.isolated_filesystem(): - result = runner.invoke(cogeo, [raster_path_rgb, "output.tif", "-b", "0"]) + result = runner.invoke( + cogeo, ["create", raster_path_rgb, "output.tif", "-b", "0"] + ) assert result.exception assert result.exit_code == 1 @@ -95,7 +112,9 @@ def test_cogeo_invalidbidxString(): """Should exit with invalid band indexes.""" runner = CliRunner() with runner.isolated_filesystem(): - result = runner.invoke(cogeo, [raster_path_rgb, "output.tif", "-b", "a"]) + result = runner.invoke( + cogeo, ["create", raster_path_rgb, "output.tif", "-b", "a"] + ) assert result.exception assert result.exit_code == 1 @@ -106,7 +125,7 @@ def test_cogeo_validnodata(): with runner.isolated_filesystem(): with pytest.warns(LossyCompression): result = runner.invoke( - cogeo, [raster_path_rgb, "output.tif", "--nodata", "0"] + cogeo, ["create", raster_path_rgb, "output.tif", "--nodata", "0"] ) assert not result.exception assert result.exit_code == 0 @@ -117,6 +136,7 @@ def test_cogeo_validnodata(): result = runner.invoke( cogeo, [ + "create", raster_path_nodata, "output.tif", "--co", @@ -140,7 +160,15 @@ def test_cogeo_validGdalOptions(): with runner.isolated_filesystem(): result = runner.invoke( cogeo, - [raster_path_rgb, "output.tif", "-p", "raw", "--co", "COMPRESS=DEFLATE"], + [ + "create", + raster_path_rgb, + "output.tif", + "-p", + "raw", + "--co", + "COMPRESS=DEFLATE", + ], ) assert not result.exception assert result.exit_code == 0 @@ -155,6 +183,7 @@ def test_cogeo_validOvrOption(): result = runner.invoke( cogeo, [ + "create", raster_path_rgb, "output.tif", "--overview-level", @@ -180,6 +209,7 @@ def test_cogeo_overviewTilesize(monkeypatch): result = runner.invoke( cogeo, [ + "create", raster_path_rgb, "output.tif", "--quiet", @@ -202,6 +232,7 @@ def test_cogeo_overviewTilesize(monkeypatch): result = runner.invoke( cogeo, [ + "create", raster_path_rgb, "output.tif", "--quiet", @@ -223,6 +254,7 @@ def test_cogeo_overviewTilesize(monkeypatch): result = runner.invoke( cogeo, [ + "create", raster_path_rgb, "output.tif", "--quiet", @@ -245,6 +277,7 @@ def test_cogeo_validgdalBlockOption(): result = runner.invoke( cogeo, [ + "create", raster_path_rgb, "output.tif", "--co", @@ -267,6 +300,7 @@ def test_cogeo_validNodataCustom(): result = runner.invoke( cogeo, [ + "create", raster_path_nan, "output.tif", "--cog-profile", @@ -291,6 +325,7 @@ def test_cogeo_validNodataCustom(): result = runner.invoke( cogeo, [ + "create", raster_path_missingnodata, "output.tif", "--cog-profile", @@ -315,6 +350,7 @@ def test_cogeo_validNodataCustom(): result = runner.invoke( cogeo, [ + "create", raster_path_missingnodata, "output.tif", "--cog-profile", @@ -336,6 +372,7 @@ def test_cogeo_validNodataCustom(): result = runner.invoke( cogeo, [ + "create", raster_path_nan, "output.tif", "--cog-profile", @@ -346,3 +383,11 @@ def test_cogeo_validNodataCustom(): ) assert result.exception assert result.exit_code == 1 + + +def test_cogeo_validate(): + """Should work as expected.""" + runner = CliRunner() + result = runner.invoke(cogeo, ["validate", raster_path_rgb]) + assert not result.exception + assert result.exit_code == 0 diff --git a/tests/test_validate.py b/tests/test_validate.py new file mode 100644 index 0000000..8ea8e74 --- /dev/null +++ b/tests/test_validate.py @@ -0,0 +1,71 @@ +"""tests rio_cogeo.cogeo.""" + +import os + +import pytest +from click.testing import CliRunner + +from rio_cogeo.cogeo import cog_validate, cog_translate +from rio_cogeo.profiles import cog_profiles + + +raster_rgb = os.path.join(os.path.dirname(__file__), "fixtures", "image_rgb.tif") +raster_external = os.path.join( + os.path.dirname(__file__), "fixtures", "validate", "image_external.tif" +) +raster_ovrsorted = os.path.join( + os.path.dirname(__file__), "fixtures", "validate", "image_sorted.tif" +) +raster_decim = os.path.join( + os.path.dirname(__file__), "fixtures", "validate", "image_dec.tif" +) +raster_jpeg = os.path.join( + os.path.dirname(__file__), "fixtures", "validate", "nontiff.jpg" +) +raster_big = os.path.join(os.path.dirname(__file__), "fixtures", "image_2000px.tif") + + +jpeg_profile = cog_profiles.get("jpeg") +jpeg_profile.update({"blockxsize": 256, "blockysize": 256}) + + +def test_cog_validate_valid(monkeypatch): + """Should work as expected (validate cogeo file).""" + # not tiled + assert not cog_validate(raster_rgb) + + # external overview + assert not cog_validate(raster_external) + + # non-sorted overview + assert not cog_validate(raster_ovrsorted) + + # invalid decimation + assert not cog_validate(raster_decim) + + with pytest.raises(Exception): + cog_validate(raster_jpeg) + + +def test_cog_validate_validCreatioValid(monkeypatch): + """Should work as expected (validate cogeo file).""" + runner = CliRunner() + with runner.isolated_filesystem(): + cog_translate(raster_rgb, "cogeo.tif", jpeg_profile, quiet=True) + assert cog_validate("cogeo.tif") + + cog_translate( + raster_rgb, "cogeo.tif", jpeg_profile, overview_level=0, quiet=True + ) + assert cog_validate("cogeo.tif") + + config = dict(GDAL_TIFF_OVR_BLOCKSIZE="1024") + cog_translate( + raster_big, + "cogeo.tif", + jpeg_profile, + overview_level=1, + config=config, + quiet=True, + ) + assert not cog_validate("cogeo.tif")