# Raster extraction / clipping

This notebook provides code to clip a raster or many rasters to a specified bounding box. The area can be specified in terms of lat/lon degrees (everything is assumed to be mastergrid EPSG:4326), or in terms of pixel coordinates relative to the input rasters.

Common uses include extracting a local cube from one of the global mastergrids cubes, or trimming one pixel off one side of a series of rasters when an ArcGIS operation has resulted in the output being one pixel bigger than you expected.

In [1]:
from osgeo import gdal
import numpy as np
import os
import glob

# Function definitions

In [2]:
from raster_utilities.utils.geotransform_calcs import *

In [3]:
from raster_utilities.io.TiffFile import SingleBandTiffFile
from raster_utilities.io.tiff_management import GetRasterProperties, ReadAOI_PixelLims, SaveLZWTiff
from raster_utilities.aggregation.aggregation_values import SnapTypes


In [4]:
from raster_utilities.utils.geotransform_calcs import SnapAndAlignGeoTransform

In [5]:
def ExtractAlignedSubImage(infile, outputDir, outputName, 
                           longitudeLims, latitudeLims,
                           maintainExtent = False, sanitiseResolution=True, 
                           snapToMastergridType = SnapTypes.NEAREST, 
                           outNDV = None):
    '''
    Extracts and saves a sub-image from another image, given a lat/lon bounding box.
    
    The input bounding box should be specified in degrees.
    If maintainExtent=True then the output image will have the same extent 
    (size) as the original, with the clipped-out area set to nodata. 
    The output nodata value (outNDV) can be reset to something different from 
    the original, by default it will be passed through (this doesn't change the data 
    just the nodata tag, for cases where it is -9999 but not recorded as such)
    '''
    #existingRasterProps = GetRasterProperties(infile)
    inGT, inProj, inNDV, inWidth, inHeight, inRes, inDT = GetRasterProperties(infile)
    pixelLimsToRead = CalculatePixelLims(inGT, longitudeLims, latitudeLims)
    # = ((xmin, xmax), (ymin, ymax))
    # we do this separately rather than f.ReadForLatLonLims so that we have the offsets if 
    # we want to save back into a global extent image
    arr, subsetGT, _, _ = ReadAOI_PixelLims(infile, pixelLimsToRead[0], pixelLimsToRead[1])
    if outNDV is None:
        ndv = inNDV
    else:
        ndv = outNDV
    if maintainExtent:
        # we will still allow snapping of the extent
        # note that this also "corrects" the resolution, so snapping will not occur without
        # correcting the resolution, which seems reasonable.
        newGT = SnapAndAlignGeoTransform(inGT, sanitiseResolution, snapToMastergridType)
        SaveLZWTiff(arr, ndv, newGT, inProj, outputDir, outputName,
                   outShape=(inHeight, inWidth), 
                   outOffset=(pixelLimsToRead[1][0], pixelLimsToRead[0][0]))
    else:
        # the resolution will always be corrected and the extent snapped if requested
        subsetGT = SnapAndAlignGeoTransform(subsetGT, sanitiseResolution, snapType=snapToMastergridType)
        SaveLZWTiff(arr, ndv, subsetGT, inProj, outputDir, outputName)
    

In [5]:
def ExtractAlignedSubImageByPixels(infile, outputDir, outputName, 
                                  xLims, yLims, maintainExtent=False, sanitiseResolution=True,
                                   snapToMastergridType=SnapTypes.NEAREST, 
                                   outNDV=None):
    '''
    Extracts and saves a sub-image from another image, given pixel limit bounding box.
    
    The input bounding box should be specified in pixel offsets which are counted from an 
    origin of (0,0) at the top left corner of the image.
    If maintainExtent=True then the output image will have the same extent 
    (size) as the original, with the clipped-out area set to nodata. 
    The output nodata value (outNDV) can be reset to something different from 
    the original, by default it will be passed through (this doesn't change the data 
    just the nodata tag, for cases where it is -9999 but not recorded as such)
    '''
    inGT, inProj, inNDV, inWidth, inHeight, inRes, inDT = GetRasterProperties(infile)
    arr, subsetGT, _, _ = ReadAOI_PixelLims(infile, xLims, yLims)
    if outNDV is None:
        ndv = inNDV
    else: 
        ndv = outNDV
    if maintainExtent:
        newGT = SnapAndAlignGeoTransform(inGT, sanitiseResolution, snapToMastergridType)
        SaveLZWTiff(arr, ndv, newGT, inProj, outputDir, outputName,
                   outShape=(inHeight, inWidth), 
                   outOffset=(yLims[0], xLims[0]))
    else:
        subsetGT = SnapAndAlignGeoTransform(subsetGT, sanitiseResolution, snapType=snapToMastergridType)
        SaveLZWTiff(arr, ndv, subsetGT, inProj, outputDir, outputName)
        

In [6]:
def AlignExistingImageInPlace(inFile, snapToMastergridType=SnapTypes.NEAREST):
    '''Modifies the geotransform of an existing image, adjusting its origin and/or 
    resolution such that it correctly lines up with the mastergrids templates. 
    No resampling is done, this is just a correction of erroneously-rounded co-ordinates.
    Note that even with SnapTypes.NONE, the resolution will still be corrected e.g. 
    from 0.00833333 to 0.008333333333333'''
    f = SingleBandTiffFile(inFile)
    inGT = f.GetGeoTransform()
    print("{} - previous geotransform was {}".format(inFile, inGT))
    snappedGT = SnapAndAlignGeoTransform(inGT, fixResolution=True, snapType=snapToMastergridType)
    # the SingleBandTiffFile object is currently readonly for properties
    d = gdal.Open(inFile, gdal.GA_Update)
    d.SetGeoTransform(snappedGT)
    d.FlushCache()
    d = None

# Usage

Some examples of usage follow - you don't need to run all of these cells!

### Align an image, where the image is basically correct but doesn't correctly line up with the mastergrids
No resampling - we just correct the geotransform to what it should always have been (e.g. resolution= 1/120 rather than merely 0.0083333)

In [7]:
AlignExistingImageInPlace(r'E:\Data\Harry\Documents\dial-a-map\andre_pop\unadj\hrsl_zaf_pop_1k_arc_matched.tif', SnapTypes.NEAREST)

E:\Data\Harry\Documents\dial-a-map\andre_pop\unadj\hrsl_zaf_pop_1k_arc_matched.tif - previous geotransform was (14.399999999999999, 0.008333333333333333, 0.0, -22.125, 0.0, -0.008333333333333333)
INFO    : | Cellsize already ok, not altering
INFO    : | Origin point was already correctly aligned at (14.399999999999999, -22.125) (x,y)


#### Clip a cube (folder) of images to lat limits varying by month, maintain input extent but replace with nodata

(we do this for the reflectance-based MODIS covariates as they're nonsense in high latitudes in winter (no daylight))

In [5]:
# the N-S limits we want by month. Derived empirically.
EVI_NS_Lims = {
    "01":(60,-60),
    "02":(68,-60),
    "03":(80,-60),
    "04":(80,-60),
    "05":(80,-60),
    "06":(80,-60),
    "07":(80,-60),
    "08":(80,-60),
    "09":(80,-60),
    "10":(68,-60),
    "11":(62,-60),
    "12":(60,-60)
}

In [6]:
extractFromDir = r'C:/temp/dataprep/modis/TCB_5KM_Aggregations/'
extractToDir = r'C:/temp/dataprep/modis/TCB_5KM_Aggregations_Clipped'

In [7]:
#inPattern = (os.path.join(extractFromDir,'*.tif'))
inPattern = (os.path.join(extractFromDir,'TCB*.tif'))
inFiles = glob.glob(inPattern)

In [8]:
# Clip each file to y limits varying according to the above list, don't clip x limits,
# use original filename but a different folder for output, and maintain the extent 
# so the results have the same dimensions but are set to nodata outside the limits
for infile in inFiles:
    mth = os.path.basename(infile).split('.')[2]
    latLims = EVI_NS_Lims[mth]
    ExtractAlignedSubImage(infile, extractToDir, 
                           longitudeLims=(-180, 180), latitudeLims=latLims, 
                           outputName=os.path.basename(infile),
                          maintainExtent=True)
    

#### Clip a folder of images to a fixed extent, output the new extent only

This would be a common use case when e.g. KB wants a bunch of stuff clipping for a CHAI project.

Specify the extents in degrees, in the use_X and use_Y variables

In [20]:

#e8_X = (10,42)
#e8_Y = (-4,-36)
#khm_X = (102,108)
#khm_Y = (15,9)
moz_X = (30,41)
moz_Y = (-10,-27)

use_X = moz_X
use_Y = moz_Y

specify the files to process in the list variable inFiles

In [21]:
extractFromDir = r'\\map-fs1.ndph.ox.ac.uk\map_data\mastergrids\MODIS_Global\MOD11A2_v6_LST\Modelled_Air_Temp_Min\5km\Monthly'
extractToDir = r'C:\temp\test'
#inPattern = (os.path.join(extractFromDir,'*.tif'))
inPattern = (os.path.join(extractFromDir,'*.mean.*.tif'))
inFiles = glob.glob(inPattern)
inFiles = [f for f in inFiles if int(os.path.basename(f).split('.')[1])==2013]

In [None]:
inFiles

process all the files in turn, unless the output already exists

In [None]:
# Clip each file to x and y limits as specified above, use original filename but a 
# different folder for output, and output images with the new (clipped) extent
for infile in inFiles:
    outname = os.path.join(extractToDir, os.path.basename(infile))
    if os.path.exists(outname):
        continue
    ExtractAlignedSubImage(infile, extractToDir, 
                           longitudeLims=use_X, latitudeLims=use_Y, 
                           outputName=os.path.basename(infile),
                          maintainExtent=False)
    

#### process just one file

In [10]:
ExtractAlignedSubImage(r'G:/Supporting/CoastGlobal.tiff',
                      r'C:\Temp\dataprep\E8\Supporting',
                      longitudeLims=e8_X, latitudeLims=e8_Y,
                      outputName="Coast_E8.tif", maintainExtent=False)

In [10]:
ExtractAlignedSubImage(r'C:\Temp\EVI.2016.Annual.mean.5km.Mean.copy.tif',
                      r'C:\Temp',
                      longitudeLims=(43.016042,50.866771), latitudeLims=(-11.657728,-25.833370),
                      outputName='EVI_MDG.2016.Annual.mean.5km.Mean.tif',
                      maintainExtent=False)

#### Clip an image to a fixed pixel extent, i.e. trim a number of pixels off one or more sides

It can be hard to get Arc to match your extent (usually because it thinks of raster cells being represented at their centre, so you end up with one extra across the extent). Do something like this to trim off the rogue extra row or column.

In [7]:
# trim the left and bottom-most pixels off an image we have previously checked to be 1682*1742
ExtractAlignedSubImageByPixels(r'E:\Data\Harry\Documents\dataprep\CHAI\panama_v4_3arcsec\PAN_GUF_100m.tif',
                              r'E:\Data\Harry\Documents\dataprep\CHAI\panama_v4_3arcsec', 'PAN_GUF_100m_clip.tif',
                              xLims=(1,8401), yLims=(1,3601))

In [11]:
ExtractAlignedSubImageByPixels(r'E:\Data\Harry\Documents\dial-a-map\andre_pop\popadj.YYYY.MM.DATA.1km.sum.tif',
                              r'E:\Data\Harry\Documents\dial-a-map\andre_pop', 'popadj_1km_aligned.tif', 
                              xLims=(0,779), yLims=(0,1074), snapExtent=True)

cell size reset to 0.00833333333333


#### Clip a folder tree of images to a fixed pixel extent, i.e. trim a number of pixels off one or more sides

Use os.walk rather than glob to handle folder structures more than one level deep

As with all functions that are not in-place this also will create compressed tiled files so no need to do that separately

In [33]:
inFolderRoot = r'top/Level/input/folder'
outFolderRoot = r'top/Level/output/folder'
# first check that everything in the tree is indeed the same pixel size
for root,dirs,files in os.walk(inFolderRoot):#(r'top\level\input\folder'):
    for f in files:
        if f.endswith('.tif'):
            pathname = os.path.join(root,f)
            props = GetRasterProperties(pathname)
            print (props.width, props.height)

(43201, 16801)
(43201, 16801)
(43201, 16801)
(43201, 16801)
(43201, 16801)


In [36]:
# now run the command for each and mirror the folder structure below an output location
for root,dirs,files in os.walk(inFolderRoot):#(r'top\level\input\folder'):
    for f in files:
        if f.endswith('.tif'):
            pathname = os.path.join(root,f)
            props = GetRasterProperties(pathname)
            outdir = root.replace(inFolderRoot,
                                 outFolderRoot)
            ExtractAlignedSubImageByPixels(pathname,
                              outdir, f,
                              xLims=(0,43200), yLims=(0,16800), snapToMastergridType=SnapTypes.NEAREST)

INFO    : | Cellsize in (x,y): 0.0083333333*-0.0083333333, sanitised to cellsize out: 0.00833333333333*-0.00833333333333
INFO    : | Snapped origin point from (-180.00416666665, 75.00416666665) (x,y) to (-180.0, 75.0) (x,y)
INFO    : | Cellsize in (x,y): 0.0083333333*-0.0083333333, sanitised to cellsize out: 0.00833333333333*-0.00833333333333
INFO    : | Snapped origin point from (-180.00416666665, 75.00416666665) (x,y) to (-180.0, 75.0) (x,y)
INFO    : | Cellsize in (x,y): 0.0083333333*-0.0083333333, sanitised to cellsize out: 0.00833333333333*-0.00833333333333
INFO    : | Snapped origin point from (-180.00416666665, 75.00416666665) (x,y) to (-180.0, 75.0) (x,y)
INFO    : | Cellsize in (x,y): 0.0083333333*-0.0083333333, sanitised to cellsize out: 0.00833333333333*-0.00833333333333
INFO    : | Snapped origin point from (-180.00416666665, 75.00416666665) (x,y) to (-180.0, 75.0) (x,y)
INFO    : | Cellsize in (x,y): 0.0083333333*-0.0083333333, sanitised to cellsize out: 0.00833333333333*-