From 66f3781df3cd41e7619854f6d77d125b77753377 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 23 Sep 2022 14:08:02 -0400 Subject: [PATCH 1/8] Enable opening datasets with gcps even if no valid crs is present Some datasets like Sentinel-1 GRD files don't have a coordinate reference system (crs), but they may have ground control points (gcps) and can still be opened by rasterio. --- stackstac/rio_reader.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 226c455..7424a62 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -344,12 +344,16 @@ def _open(self) -> ThreadsafeRioDataset: ) # Only make a VRT if the dataset doesn't match the spatial spec we want - if self.spec.vrt_params != { - "crs": ds.crs.to_epsg(), - "transform": ds.transform, - "height": ds.height, - "width": ds.width, - }: + if ( + hasattr(ds.crs, "to_epsg") + and self.spec.vrt_params + != { + "crs": ds.crs.to_epsg(), + "transform": ds.transform, + "height": ds.height, + "width": ds.width, + } + ) or ds.gcps is not None: with self.gdal_env.open_vrt: vrt = WarpedVRT( ds, From 4f656fe0c8fabcafa0ffeb301c7fb4f22a6af7cf Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 23 Sep 2022 14:43:30 -0400 Subject: [PATCH 2/8] Get src_crs from last index position of gcps tuple Need to reproject from a proper source coordinate reference system (src_crs) instead of arbitrarily. --- stackstac/rio_reader.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 7424a62..640a819 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -344,16 +344,12 @@ def _open(self) -> ThreadsafeRioDataset: ) # Only make a VRT if the dataset doesn't match the spatial spec we want - if ( - hasattr(ds.crs, "to_epsg") - and self.spec.vrt_params - != { - "crs": ds.crs.to_epsg(), - "transform": ds.transform, - "height": ds.height, - "width": ds.width, - } - ) or ds.gcps is not None: + if hasattr(ds.crs, "to_epsg") and self.spec.vrt_params != { + "crs": ds.crs.to_epsg(), + "transform": ds.transform, + "height": ds.height, + "width": ds.width, + }: with self.gdal_env.open_vrt: vrt = WarpedVRT( ds, @@ -361,6 +357,16 @@ def _open(self) -> ThreadsafeRioDataset: resampling=self.resampling, **self.spec.vrt_params, ) + elif ds.gcps is not None: + with self.gdal_env.open_vrt: + src_crs = ds.gcps[-1] + vrt = WarpedVRT( + ds, + src_crs=src_crs, + sharing=False, + resampling=self.resampling, + **self.spec.vrt_params, + ) else: logger.info(f"Skipping VRT for {self.url!r}") vrt = None From edbf3a95e3e29c292c88ac875b4dff9ffdf723f2 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 23 Sep 2022 15:44:36 -0400 Subject: [PATCH 3/8] Add a unit test to check that reading GeoTIFFs with gcps work Not very well written, but it's a start I guess. Synthetic GeoTIFF generation code modified from https://github.com/corteva/rioxarray/blob/bfd616594bb82886133b5f4b9356ed16d39014fc/test/integration/test_integration_rioxarray.py#L1053. --- stackstac/tests/test_rio_reader.py | 52 ++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 stackstac/tests/test_rio_reader.py diff --git a/stackstac/tests/test_rio_reader.py b/stackstac/tests/test_rio_reader.py new file mode 100644 index 0000000..ecbf8be --- /dev/null +++ b/stackstac/tests/test_rio_reader.py @@ -0,0 +1,52 @@ +import tempfile + +import numpy as np +from rasterio.control import GroundControlPoint +from rasterio.crs import CRS +from rasterio.enums import Resampling +from rasterio.windows import Window +from stackstac.raster_spec import RasterSpec +from stackstac.rio_reader import AutoParallelRioReader + + +def test_dataset_read_with_gcps(): + """ + Ensure that GeoTIFFs with ground control points (gcps) can be read using + AutoParallelRioReader. + + Regression test for https://github.com/gjoseph92/stackstac/issues/181. + """ + with tempfile.NamedTemporaryFile(suffix=".tif") as tmpfile: + src_gcps = [ + GroundControlPoint(row=0, col=0, x=156113, y=2818720, z=0), + GroundControlPoint(row=0, col=800, x=338353, y=2785790, z=0), + GroundControlPoint(row=800, col=800, x=297939, y=2618518, z=0), + GroundControlPoint(row=800, col=0, x=115698, y=2651448, z=0), + ] + crs = CRS.from_epsg(32618) + with rasterio.open( + tmpfile.name, + mode="w", + height=800, + width=800, + count=1, + dtype=np.uint8, + driver="GTiff", + ) as source: + source.gcps = (src_gcps, crs) + + reader = AutoParallelRioReader( + url=tmpfile.name, + spec=RasterSpec( + epsg=4326, bounds=(90, -10, 110, 10), resolutions_xy=(10, 10) + ), + resampling=rasterio.enums.Resampling.bilinear, + dtype=np.float32, + fill_value=np.nan, + rescale=True, + ) + array = reader.read(window=Window(col_off=0, row_off=0, width=10, height=10)) + + np.testing.assert_allclose( + actual=array, desired=np.array([[0.0, 0.0], [0.0, 0.0]], dtype=np.float32) + ) From cc0cd1fb550273a75e32942decdf6a07ef68c6f9 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 23 Sep 2022 15:50:51 -0400 Subject: [PATCH 4/8] Add an inline comment about trying to make VRT when gcps are present --- stackstac/rio_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 640a819..37c84d1 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -357,6 +357,7 @@ def _open(self) -> ThreadsafeRioDataset: resampling=self.resampling, **self.spec.vrt_params, ) + # Alternatively, try to make VRT when ground control points (gcps) are present elif ds.gcps is not None: with self.gdal_env.open_vrt: src_crs = ds.gcps[-1] From 1b3c47c6bd0d3ec9dd17b25dc8cb1c8496a131a7 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 23 Sep 2022 18:29:36 -0400 Subject: [PATCH 5/8] Remove duplicated WarpedVRT instantiation Put the src_crs in the vrt_params dict to avoid an elif statement. --- stackstac/rio_reader.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 37c84d1..70ed0b0 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -343,27 +343,24 @@ def _open(self) -> ThreadsafeRioDataset: "a separate STAC asset), so you'll need to exclude this asset from your analysis." ) + # Set source crs from ground control points (gcps) if present + if not hasattr(ds.crs, "to_epsg") and ds.gcps is not None: + self.spec.vrt_params["src_crs"] = ds.gcps[-1] + # Only make a VRT if the dataset doesn't match the spatial spec we want - if hasattr(ds.crs, "to_epsg") and self.spec.vrt_params != { - "crs": ds.crs.to_epsg(), - "transform": ds.transform, - "height": ds.height, - "width": ds.width, - }: - with self.gdal_env.open_vrt: - vrt = WarpedVRT( - ds, - sharing=False, - resampling=self.resampling, - **self.spec.vrt_params, - ) - # Alternatively, try to make VRT when ground control points (gcps) are present - elif ds.gcps is not None: + if "src_crs" in self.spec.vrt_params or ( + hasattr(ds.crs, "to_epsg") + and self.spec.vrt_params + != { + "crs": ds.crs.to_epsg(), + "transform": ds.transform, + "height": ds.height, + "width": ds.width, + } + ): with self.gdal_env.open_vrt: - src_crs = ds.gcps[-1] vrt = WarpedVRT( ds, - src_crs=src_crs, sharing=False, resampling=self.resampling, **self.spec.vrt_params, From 1fb37c7ee3ca965ebdd8340637c17a4d4fdf1fc5 Mon Sep 17 00:00:00 2001 From: Gabe Joseph Date: Mon, 24 Oct 2022 15:23:51 -0400 Subject: [PATCH 6/8] avoid mutating `spec.vrt_params` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not a good idea in dask to mutate inputs. This object could be shared with other tasks. Also, I think we just need to check `ds.crs is None`—if `ds.crs` is a CRS, it will always have a `to_epsg` method. (Though that could fail, so I've added handling for that case too.) --- stackstac/rio_reader.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 70ed0b0..5289260 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -7,6 +7,7 @@ import numpy as np import rasterio as rio +import rasterio.errors from rasterio.vrt import WarpedVRT from .rio_env import LayeredEnv @@ -323,9 +324,7 @@ def _open(self) -> ThreadsafeRioDataset: with self.gdal_env.open: with time(f"Initial read for {self.url!r} on {_curthread()}: {{t}}"): try: - ds = SelfCleaningDatasetReader( - self.url, sharing=False - ) + ds = SelfCleaningDatasetReader(self.url, sharing=False) except Exception as e: msg = f"Error opening {self.url!r}: {e!r}" if exception_matches(e, self.errors_as_nodata): @@ -343,26 +342,30 @@ def _open(self) -> ThreadsafeRioDataset: "a separate STAC asset), so you'll need to exclude this asset from your analysis." ) - # Set source crs from ground control points (gcps) if present - if not hasattr(ds.crs, "to_epsg") and ds.gcps is not None: - self.spec.vrt_params["src_crs"] = ds.gcps[-1] + ds_epsg: int | None + try: + ds_epsg = ds.crs.to_epsg() if ds.crs is not None else None + except rasterio.errors.CRSError: + ds_epsg = None # Only make a VRT if the dataset doesn't match the spatial spec we want - if "src_crs" in self.spec.vrt_params or ( - hasattr(ds.crs, "to_epsg") - and self.spec.vrt_params - != { - "crs": ds.crs.to_epsg(), - "transform": ds.transform, - "height": ds.height, - "width": ds.width, - } - ): + if ds_epsg is not None and self.spec.vrt_params != { + "crs": ds_epsg, + "transform": ds.transform, + "height": ds.height, + "width": ds.width, + }: + # Set source crs from ground control points (gcps) if present + src_crs = ( + ds.gcps[-1] if ds.crs is None and ds.gcps is not None else None + ) + with self.gdal_env.open_vrt: vrt = WarpedVRT( ds, sharing=False, resampling=self.resampling, + src_crs=src_crs, **self.spec.vrt_params, ) else: From fca79613303e25124cd30b2646e5f61dcf10ae11 Mon Sep 17 00:00:00 2001 From: Gabe Joseph Date: Mon, 24 Oct 2022 15:27:49 -0400 Subject: [PATCH 7/8] fix up test imports The test is still failing for me though. --- stackstac/tests/test_rio_reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stackstac/tests/test_rio_reader.py b/stackstac/tests/test_rio_reader.py index ecbf8be..1735515 100644 --- a/stackstac/tests/test_rio_reader.py +++ b/stackstac/tests/test_rio_reader.py @@ -1,9 +1,10 @@ import tempfile import numpy as np +import rasterio +import rasterio.enums from rasterio.control import GroundControlPoint from rasterio.crs import CRS -from rasterio.enums import Resampling from rasterio.windows import Window from stackstac.raster_spec import RasterSpec from stackstac.rio_reader import AutoParallelRioReader @@ -41,7 +42,7 @@ def test_dataset_read_with_gcps(): epsg=4326, bounds=(90, -10, 110, 10), resolutions_xy=(10, 10) ), resampling=rasterio.enums.Resampling.bilinear, - dtype=np.float32, + dtype=np.dtype("float32"), fill_value=np.nan, rescale=True, ) From d7b08203ab754295bf5644fa0a661df9fa94e8b9 Mon Sep 17 00:00:00 2001 From: Gabe Joseph Date: Mon, 24 Oct 2022 16:56:01 -0400 Subject: [PATCH 8/8] fix conditional this also gets the test passing --- stackstac/rio_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stackstac/rio_reader.py b/stackstac/rio_reader.py index 5289260..8771ebf 100644 --- a/stackstac/rio_reader.py +++ b/stackstac/rio_reader.py @@ -349,7 +349,7 @@ def _open(self) -> ThreadsafeRioDataset: ds_epsg = None # Only make a VRT if the dataset doesn't match the spatial spec we want - if ds_epsg is not None and self.spec.vrt_params != { + if ds_epsg is None or self.spec.vrt_params != { "crs": ds_epsg, "transform": ds.transform, "height": ds.height,