add five geotiff function

1. get_header 2. get_imarray 3. geo2pixel 4. pixel2geo 5. tifffile_crop
UTokyo-FieldPhenomics-Lab · Jul 2, 2022 · f02cd19 · f02cd19
1 parent 85627c8
commit f02cd19
Show file tree

Hide file tree

Showing 2 changed files with 548 additions and 0 deletions.
diff --git a/easyidp/geotiff.py b/easyidp/geotiff.py
@@ -0,0 +1,383 @@
+import os
+import pyproj
+import numpy as np
+import tifffile as tf
+
+from pyproj.exceptions import CRSError
+
+
+def get_header(tif_path):
+    """Read the necessary meta infomation from TIFF file
+
+    Parameters
+    ----------
+    tif_path : str
+        the path to the geotiff file
+
+    Returns
+    -------
+    header: dict
+        the container of acquired meta info
+    """
+    with tf.TiffFile(tif_path) as tif:
+        header = {'width': None, 'height': None, 'dim':1, 
+                'scale': None, 'tie_point': None, 'nodata': None, 'proj': None}
+
+        header["height"] = tif.pages[0].shape[0]
+        header["width"] = tif.pages[0].shape[1]
+        if len(tif.pages[0].shape) > 2:
+            header["dim"] = tif.pages[0].shape[2] 
+        header["nodata"] = tif.pages[0].nodata
+
+        # tif.pages[0].geotiff_tags >>> 'ModelPixelScale': [0.0034900000000000005, 0.0034900000000000005, 0.0]
+        header["scale"] = tif.pages[0].geotiff_tags["ModelPixelScale"][0:2]
+
+        # tif.pages[0].geotiff_tags >>> 'ModelTiepoint': [0.0, 0.0, 0.0, 419509.89816000004, 3987344.8286, 0.0]
+        header["tie_point"] = tif.pages[0].geotiff_tags["ModelTiepoint"][3:5]
+
+        # pix4d:
+        #    tif.pages[0].geotiff_tags >>> 'GTCitationGeoKey': 'WGS 84 / UTM zone 54N'
+        if "GTCitationGeoKey" in tif.pages[0].geotiff_tags.keys():
+            proj_str = tif.pages[0].geotiff_tags["GTCitationGeoKey"]
+        # metashape:
+        #     tif.pages[0].geotiff_tags >>> 'PCSCitationGeoKey': 'WGS 84 / UTM zone 54N'
+        elif "PCSCitationGeoKey" in tif.pages[0].geotiff_tags.keys():
+            proj_str = tif.pages[0].geotiff_tags["PCSCitationGeoKey"]
+        else:
+            raise KeyError("Can not find key 'GTCitationGeoKey' or 'PCSCitationGeoKey' in Geotiff tages")
+
+        try:
+            proj = pyproj.CRS.from_string(proj_str)
+            header['proj'] = proj
+        except CRSError as e:
+            print(f'[io][geotiff][GeoCorrd] Generation failed, because [{e}], but you can manual specify it later by \n'
+                    '>>> import pyproj \n'
+                    '>>> proj = pyproj.CRS.from_epsg() # or from_string() or refer official documents:\n'
+                    'https://pyproj4.github.io/pyproj/dev/api/crs/coordinate_operation.html')
+            pass
+
+    return header
+
+
+def get_imarray(tif_path):
+    """Read full map data as numpy array (time and RAM costy, not recommended)
+
+    Parameters
+    ----------
+    geotiff_path
+    geo_head
+
+    Returns
+    -------
+
+    """
+    with tf.TiffFile(tif_path) as tif:
+        data = tif.pages[0].asarray()
+
+    return data
+
+
+def geo2pixel(points_hv, geo_head, return_index=False):
+    """convert point cloud xyz coordinate to geotiff pixel coordinate (horizontal, vertical)
+
+    Parameters
+    ----------
+    points_hv : numpy nx3 array
+        [horizontal, vertical] points
+    geo_head : dict
+        the geotiff head dictionary from get_header()
+
+    Returns
+    -------
+    The ndarray pixel position of these points (horizontal, vertical)
+        Please note: gis coordinate, horizontal is x axis, vertical is y axis, origin at left upper
+        To clip image ndarray, the first columns is vertical pixel (along height),
+            then second columns is horizontal pixel number (along width),
+            the third columns is 3 or 4 bands (RGB, alpha),
+            the x and y is reversed compared with gis coordinates.
+            This function has already do this reverse, so that you can use the output directly.
+
+    Examples
+    --------
+    >>> geo_head = easyric.io.geotiff.get_header('dom_path.tiff')
+    >>> gis_coord = np.asarray([(x1, y1), ..., (xn, yn)])  # x is horizonal, y is vertical
+    >>> photo_ndarray = skimage.io.imread('img_path.jpg')
+    (h, w, 4) ndarray  # please note the axes differences
+    >>> pixel_coord = geo2pixel(gis_coord, geo_head)
+    (horizontal, vertical) ndarray
+    # then you can used the outputs with reverse 0 and 1 axis
+    >>> region_of_interest = photo_ndarray[pixel_coord[:,1], pixel_coord[:,0], 0:3]
+    """
+    gis_xmin = geo_head['tie_point'][0]
+    gis_ymax = geo_head['tie_point'][1]
+
+    gis_ph = points_hv[:, 0]
+    gis_pv = points_hv[:, 1]
+
+
+    # get the pixel index (int)
+    if return_index:
+        # numpy_axis1 = x
+        np_ax_h = (gis_ph - gis_xmin) // geo_head['scale'][0]
+        # numpy_axis0 = y
+        np_ax_v = (gis_ymax - gis_pv) // geo_head['scale'][1]
+
+    # get float coordinate on pixels
+    else:
+        # numpy_axis1 = x
+        np_ax_h = (gis_ph - gis_xmin) / geo_head['scale'][0]
+        # numpy_axis0 = y
+        np_ax_v = (gis_ymax - gis_pv) / geo_head['scale'][1]
+
+    pixel = np.vstack([np_ax_h, np_ax_v]).T
+
+    return pixel
+
+
+def pixel2geo(points_hv, geo_head):
+    """convert geotiff pixel coordinate (horizontal, vertical) to point cloud xyz coordinate (x, y, z)
+
+    Parameters
+    ----------
+    points_hv : numpy nx2 array
+        [horizontal, vertical] points
+    geo_head : dict
+        the geotiff head dictionary from get_header()
+
+    Returns
+    -------
+    The ndarray pixel position of these points (horizontal, vertical)
+    """
+    gis_xmin = geo_head['tie_point'][0]
+    gis_ymax = geo_head['tie_point'][1]
+
+    # the px is numpy axis0 (vertical, h)
+    # py is numpy axis1 (horizontal, w)
+    if np.issubdtype(points_hv.dtype, np.integer):
+        # all integer possible means the pixel index 
+        #    rather than specific coordinates
+        # +0.5 to get the pixel center rather than edge
+        pix_ph = points_hv[:, 0] + 0.5  
+        pix_pv = points_hv[:, 1] + 0.5
+    elif np.issubdtype(points_hv.dtype, np.floating):
+        # all floats possible means it is the pixel coordinates
+        #    rather than pixel index
+        # no need to +0.5 as image center
+        pix_ph = points_hv[:, 0]
+        pix_pv = points_hv[:, 1]
+    else:
+        raise TypeError(f"The `points_hv` only accept numpy ndarray integer and float types")
+
+    gis_px = gis_xmin + pix_ph * geo_head['scale'][0]
+    gis_py = gis_ymax - pix_pv * geo_head['scale'][1]
+
+    gis_geo = np.vstack([gis_px, gis_py]).T
+
+    return gis_geo
+
+
+def tifffile_crop(page, top, left, h, w):  
+    """
+    Extract a crop from a TIFF image file directory (IFD).
+
+    Only the tiles englobing the crop area are loaded and not the whole page.
+    This is usefull for large Whole slide images that can't fit int RAM.
+
+    (0,0)
+      o--------------------------
+      |           ^
+      |           | top
+      |           v
+      | <-------> o=============o  ^
+      |   left    |<---- w ---->|  |
+      |           |             |  h
+      |           |             |  |
+      |           o=============o  v
+
+    Modified from: 
+    https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743#file-get_crop-py-L60
+
+    Previous version: 
+    caas_lite.get_crop(page, i0, j0, h, w)
+    
+    Parameters
+    ----------
+    page : TiffPage
+        TIFF image file directory (IFD) from which the crop must be extracted.
+    top, left: int
+        Coordinates of the top left corner of the desired crop.
+        top = i0 = height_st
+        left = j0 = w_st
+    h: int
+        Desired crop height.
+    w: int
+        Desired crop width.
+        
+    Returns
+    -------
+    out : ndarray of shape (h, w, sampleperpixel)
+        Extracted crop.""
+    """
+    if page.is_tiled:
+        out = _get_tiled_crop(page, top, left, h, w)
+    else:
+        out = _get_untiled_crop(page, top, left, h, w)
+
+    return out
+
+
+def _get_tiled_crop(page, i0, j0, h, w):
+    """
+    The submodule of self.get_crop() for those tiled geotiff
+
+    Copied from: 
+    https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743#file-get_crop-py-L60
+    """
+    if not page.is_tiled:
+        raise ValueError("Input page must be tiled")
+
+    im_width = page.imagewidth
+    im_height = page.imagelength
+
+    if h < 1 or w < 1:
+        raise ValueError("h and w must be strictly positive.")
+
+    i1, j1 = i0 + h, j0 + w
+    if i0 < 0 or j0 < 0 or i1 >= im_height or j1 >= im_width:
+        raise ValueError(f"Requested crop area is out of image bounds.{i0}_{i1}_{im_height}, {j0}_{j1}_{im_width}")
+
+    tile_width, tile_height = page.tilewidth, page.tilelength
+
+    tile_i0, tile_j0 = i0 // tile_height, j0 // tile_width
+    tile_i1, tile_j1 = np.ceil([i1 / tile_height, j1 / tile_width]).astype(int)
+
+    tile_per_line = int(np.ceil(im_width / tile_width))
+
+    # older version: (img_depth, h, w, dim)
+    out = np.empty((page.imagedepth,
+                    (tile_i1 - tile_i0) * tile_height,
+                    (tile_j1 - tile_j0) * tile_width,
+                    page.samplesperpixel), dtype=page.dtype)
+
+    fh = page.parent.filehandle
+
+    for i in range(tile_i0, tile_i1):
+        for j in range(tile_j0, tile_j1):
+            index = int(i * tile_per_line + j)
+
+            offset = page.dataoffsets[index]
+            bytecount = page.databytecounts[index]
+
+            fh.seek(offset)
+            data = fh.read(bytecount)
+            tile, indices, shape = page.decode(data, index)
+
+            im_i = (i - tile_i0) * tile_height
+            im_j = (j - tile_j0) * tile_width
+            out[:, im_i: im_i + tile_height, im_j: im_j + tile_width, :] = tile
+
+    im_i0 = i0 - tile_i0 * tile_height
+    im_j0 = j0 - tile_j0 * tile_width
+
+    # old version: out[:, im_i0: im_i0 + h, im_j0: im_j0 + w, :]
+    return out[0, im_i0: im_i0 + h, im_j0: im_j0 + w, :]
+
+def _get_untiled_crop(page, i0, j0, h, w):
+    """
+    The submodule of self.get_crop(), for those untiled geotiff
+
+    Copied from: 
+    https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743#file-get_crop-py-L60
+    """
+    if page.is_tiled:
+        raise ValueError("Input page must not be tiled")
+
+    im_width = page.imagewidth
+    im_height = page.imagelength
+
+    if h < 1 or w < 1:
+        raise ValueError("h and w must be strictly positive.")
+
+    i1, j1 = i0 + h, j0 + w
+    if i0 < 0 or j0 < 0 or i1 >= im_height or j1 >= im_width:
+        raise ValueError(f"Requested crop area is out of image bounds.{i0}_{i1}_{im_height}, {j0}_{j1}_{im_width}")
+
+    fh = page.parent.filehandle
+
+    # for data/pix4d/maize_tanashi dom:
+    # dom shape: (722, 836)
+    # >>> page.dataoffsets
+    # (2554, 9242, 15930, 22618,  ...)  # 6688 interval
+    # >>> page.databytecounts
+    # (6688, 6688, 6688, 6688,  ...)
+    # >>> len(page.dataoffsets)
+    # 361  # = dom.h / 2 -> read 2 row once
+    row_step = im_height / len(page.dataoffsets)
+
+    # judge whether can be perfect divided
+    if row_step % 1 == 0.0:
+        row_step = int(row_step)
+    else:
+        raise InterruptedError(f"img.ht={page.imagelength} with offset number {len(page.dataoffsets)}, {row_step} row per read, not a integer")
+
+    # commonly, it read 1 row once,
+    # no need to do extra things
+    if row_step == 1: 
+        read_tile_idx = np.arange(i0, i1)
+
+    # sometime it not read once per row, it reads two rows once, etc...
+    # e.g. tile.shape = (1, 2, full_width, channel_num)
+    #      expected ->  (1, 1, full_width, channel_num)
+    # if still just picking the first row, will
+    # result in a horzontal zoom 50% result
+    else:
+        # recalculate the row id to read
+        # >>> idx = array([10, 11, 12, 13, 14])   # orignal line index
+        # >>> idx_dv = np.floor(idx / row_step).astype(int)
+        # array([5, 5, 6, 6, 7])
+        # >>> np.unique(idx_dv)
+        # array([5, 6, 7])   -> known which index to read
+        read_row_id = np.arange(i0, i1)
+        read_tile_idx = np.unique(np.floor(read_row_id / row_step).astype(int))
+
+        # Then decide the start line and end line
+        # >>> idx % 2
+        # array([0, 1, 0, 1, 0], dtype=int32)
+        ## get start id
+        st_line_id = read_row_id[0] % row_step
+        ## get reversed id
+        # line id
+        #  3  |  |  |  |...
+        #  -------------------  last read
+        #  0  |  |  |  |
+        #  1  |  |  |  |...
+        #  2  |  |  |  |
+        #  -> get the [:, 0], [:, -1] or [:, -2] index
+        #     as the `ed_line_id`
+        ed_line_id = read_row_id[-1] % row_step - (row_step-1)
+
+    # clip them vertically first, then clip
+    temp_out = np.empty((page.imagedepth, 0, w, page.samplesperpixel), dtype=page.dtype)
+    for index in read_tile_idx:
+        offset = page.dataoffsets[index]
+        bytecount = page.databytecounts[index]
+
+        fh.seek(offset)
+        data = fh.read(bytecount)
+
+        tile, indices, shape = page.decode(data, index)
+
+        # double check if is row_step per read
+        # shape -> (1, 2, full_width, channel_num)
+        if shape[1] != row_step:
+            raise LookupError(f"the calculated {row_step} row per read does not match {shape[1]} of tifffile decoded.")
+
+        temp_out = np.concatenate([temp_out, tile[:, :,j0:j1,:]], axis=1)
+
+    if row_step == 1:
+        return temp_out[0,:,:,:]
+    else:
+        if ed_line_id == 0:
+            return temp_out[0, st_line_id:, :, :]
+        else:
+            return temp_out[0, st_line_id:ed_line_id, :, :]