Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable opening datasets with gcps even if no valid crs is present #182

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
21 changes: 16 additions & 5 deletions stackstac/rio_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import rasterio as rio
import rasterio.errors
from rasterio.vrt import WarpedVRT

from .rio_env import LayeredEnv
Expand Down Expand Up @@ -323,9 +324,7 @@ def _open(self) -> ThreadsafeRioDataset:
with self.gdal_env.open:
with time(f"Initial read for {self.url!r} on {_curthread()}: {{t}}"):
try:
ds = SelfCleaningDatasetReader(
self.url, sharing=False
)
ds = SelfCleaningDatasetReader(self.url, sharing=False)
except Exception as e:
msg = f"Error opening {self.url!r}: {e!r}"
if exception_matches(e, self.errors_as_nodata):
Expand All @@ -343,18 +342,30 @@ def _open(self) -> ThreadsafeRioDataset:
"a separate STAC asset), so you'll need to exclude this asset from your analysis."
)

ds_epsg: int | None
try:
ds_epsg = ds.crs.to_epsg() if ds.crs is not None else None
except rasterio.errors.CRSError:
ds_epsg = None

# Only make a VRT if the dataset doesn't match the spatial spec we want
if self.spec.vrt_params != {
"crs": ds.crs.to_epsg(),
if ds_epsg is None or self.spec.vrt_params != {
"crs": ds_epsg,
"transform": ds.transform,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MIght need to set src_transform in addition to transform as mentioned by @vincentsarago in #181 (comment)?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So looking at the code it might be a bit more complex.

What I do in rio-tiler, is automatically create a WarpedVRT when we have gcps (which in stackstac case should replace the ds) and then I create another WarpedVRT on top of it if I need warping

"height": ds.height,
"width": ds.width,
}:
# Set source crs from ground control points (gcps) if present
src_crs = (
ds.gcps[-1] if ds.crs is None and ds.gcps is not None else None
)

with self.gdal_env.open_vrt:
vrt = WarpedVRT(
ds,
sharing=False,
resampling=self.resampling,
src_crs=src_crs,
gjoseph92 marked this conversation as resolved.
Show resolved Hide resolved
**self.spec.vrt_params,
)
else:
Expand Down
53 changes: 53 additions & 0 deletions stackstac/tests/test_rio_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import tempfile

import numpy as np
import rasterio
import rasterio.enums
from rasterio.control import GroundControlPoint
from rasterio.crs import CRS
from rasterio.windows import Window
from stackstac.raster_spec import RasterSpec
from stackstac.rio_reader import AutoParallelRioReader


def test_dataset_read_with_gcps():
"""
Ensure that GeoTIFFs with ground control points (gcps) can be read using
AutoParallelRioReader.

Regression test for https://github.com/gjoseph92/stackstac/issues/181.
"""
with tempfile.NamedTemporaryFile(suffix=".tif") as tmpfile:
src_gcps = [
GroundControlPoint(row=0, col=0, x=156113, y=2818720, z=0),
GroundControlPoint(row=0, col=800, x=338353, y=2785790, z=0),
GroundControlPoint(row=800, col=800, x=297939, y=2618518, z=0),
GroundControlPoint(row=800, col=0, x=115698, y=2651448, z=0),
]
crs = CRS.from_epsg(32618)
with rasterio.open(
tmpfile.name,
mode="w",
height=800,
width=800,
count=1,
dtype=np.uint8,
driver="GTiff",
) as source:
source.gcps = (src_gcps, crs)

reader = AutoParallelRioReader(
url=tmpfile.name,
spec=RasterSpec(
epsg=4326, bounds=(90, -10, 110, 10), resolutions_xy=(10, 10)
),
resampling=rasterio.enums.Resampling.bilinear,
dtype=np.dtype("float32"),
fill_value=np.nan,
rescale=True,
)
array = reader.read(window=Window(col_off=0, row_off=0, width=10, height=10))

np.testing.assert_allclose(
actual=array, desired=np.array([[0.0, 0.0], [0.0, 0.0]], dtype=np.float32)
)
Comment on lines +51 to +53
Copy link
Author

@weiji14 weiji14 Sep 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks a bunch for working on this @weiji14, and thanks for the test too!

Don't thank me yet, this is a really bad test 😅 It only checks that things can run (and that a GeoTIFF with gcps works without an error message), but I don't like this assert statement... Any pointers?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, maybe we could read it back using rasterio with (what we think are) the right parameters, and check that the results match?

Only other thing I can think of would be to write some data into the file (probably like np.linspace(...).reshape(...)). Then we could make a more meaningful assertion about the values we get out.