Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable opening datasets with gcps even if no valid crs is present #182

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
20 changes: 14 additions & 6 deletions stackstac/rio_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,13 +343,21 @@ def _open(self) -> ThreadsafeRioDataset:
"a separate STAC asset), so you'll need to exclude this asset from your analysis."
)

# Set source crs from ground control points (gcps) if present
if not hasattr(ds.crs, "to_epsg") and ds.gcps is not None:
self.spec.vrt_params["src_crs"] = ds.gcps[-1]

# Only make a VRT if the dataset doesn't match the spatial spec we want
if self.spec.vrt_params != {
"crs": ds.crs.to_epsg(),
"transform": ds.transform,
"height": ds.height,
"width": ds.width,
}:
if "src_crs" in self.spec.vrt_params or (
hasattr(ds.crs, "to_epsg")
and self.spec.vrt_params
!= {
"crs": ds.crs.to_epsg(),
"transform": ds.transform,
"height": ds.height,
"width": ds.width,
}
):
with self.gdal_env.open_vrt:
vrt = WarpedVRT(
ds,
Expand Down
52 changes: 52 additions & 0 deletions stackstac/tests/test_rio_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import tempfile

import numpy as np
from rasterio.control import GroundControlPoint
from rasterio.crs import CRS
from rasterio.enums import Resampling
from rasterio.windows import Window
from stackstac.raster_spec import RasterSpec
from stackstac.rio_reader import AutoParallelRioReader


def test_dataset_read_with_gcps():
"""
Ensure that GeoTIFFs with ground control points (gcps) can be read using
AutoParallelRioReader.

Regression test for https://github.com/gjoseph92/stackstac/issues/181.
"""
with tempfile.NamedTemporaryFile(suffix=".tif") as tmpfile:
src_gcps = [
GroundControlPoint(row=0, col=0, x=156113, y=2818720, z=0),
GroundControlPoint(row=0, col=800, x=338353, y=2785790, z=0),
GroundControlPoint(row=800, col=800, x=297939, y=2618518, z=0),
GroundControlPoint(row=800, col=0, x=115698, y=2651448, z=0),
]
crs = CRS.from_epsg(32618)
with rasterio.open(
tmpfile.name,
mode="w",
height=800,
width=800,
count=1,
dtype=np.uint8,
driver="GTiff",
) as source:
source.gcps = (src_gcps, crs)

reader = AutoParallelRioReader(
url=tmpfile.name,
spec=RasterSpec(
epsg=4326, bounds=(90, -10, 110, 10), resolutions_xy=(10, 10)
),
resampling=rasterio.enums.Resampling.bilinear,
dtype=np.float32,
fill_value=np.nan,
rescale=True,
)
array = reader.read(window=Window(col_off=0, row_off=0, width=10, height=10))

np.testing.assert_allclose(
actual=array, desired=np.array([[0.0, 0.0], [0.0, 0.0]], dtype=np.float32)
)
Comment on lines +51 to +53
Copy link
Author

@weiji14 weiji14 Sep 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks a bunch for working on this @weiji14, and thanks for the test too!

Don't thank me yet, this is a really bad test 😅 It only checks that things can run (and that a GeoTIFF with gcps works without an error message), but I don't like this assert statement... Any pointers?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, maybe we could read it back using rasterio with (what we think are) the right parameters, and check that the results match?

Only other thing I can think of would be to write some data into the file (probably like np.linspace(...).reshape(...)). Then we could make a more meaningful assertion about the values we get out.