In [27]:
import os, sys, glob, re, time, subprocess, string # os.getcwd(), os.path.join(), os.listdir(), os.remove(), time.ctime(), glob.glob(), string.zfill(), string.join()
from os.path import exists # exists()
from functools import reduce # reduce()

import geopandas as gpd # read_file(), GeoDataFrame(), sjoin_nearest(), to_crs(), to_file(), .crs, buffer(), dissolve()
import pandas as pd # .dtypes, Series(), concat(), DataFrame(), read_table(), merge(), to_csv(), .loc[], head(), sample(), astype(), unique(), rename(), between(), drop(), fillna(), idxmax(), isna(), isin(), apply(), info(), sort_values(), notna(), groupby(), value_counts(), duplicated(), drop_duplicates()
from shapely.geometry import Point, LineString, Polygon, shape, MultiPoint
from shapely.ops import cascaded_union
from shapely.validation import make_valid  # in apply(make_valid)
import shapely.wkt

import numpy as np # median(), mean(), tolist(), .inf
import fiona, rioxarray # fiona.open()
import rasterio # open(), write_band(), .name, .count, .width, .height. nodatavals, .meta, update(), copy(), write()
from rasterio.plot import show
from rasterio import features # features.rasterize()
from rasterio.features import shapes
from rasterio import mask # rasterio.mask.mask()
from rasterio.enums import Resampling # rasterio.enums.Resampling()
from osgeo import gdal, osr, ogr, gdal_array, gdalconst # Open(), SpatialReference, WarpOptions(), Warp(), GetDataTypeName(), GetRasterBand(), GetNoDataValue(), Translate(), GetProjection(), GetAttrValue()

In [2]:
def ListFromRange(r1, r2):
    return [item for item in range(r1, r2+1)]

In [3]:
# From Stack Exchange @RutgerH
# https://gis.stackexchange.com/questions/163685/reclassify-a-raster-value-to-9999-and-set-it-to-the-nodata-value-using-python-a
def readRaster(filename):
    filehandle = gdal.Open(filename)
    band1 = filehandle.GetRasterBand(1)
    geotransform = filehandle.GetGeoTransform()
    geoproj = filehandle.GetProjection()
    Z = band1.ReadAsArray()
    xsize = filehandle.RasterXSize
    ysize = filehandle.RasterYSize
    return xsize,ysize,geotransform,geoproj,Z

In [4]:
# Default arguments can be changed here, or can be specified below when running the functions.
def writeRaster(filename,geotransform,geoprojection,data, NoDataVal=0, dst_datatype=gdal.GDT_UInt32):
    (x,y) = data.shape
    Dformat = "GTiff"
    driver = gdal.GetDriverByName(Dformat)
    # you can change the dataformat but be sure to be able to store negative values including -9999
    dst_ds = driver.Create(filename,y,x,1,dst_datatype)
    dst_ds.GetRasterBand(1).WriteArray(data)
    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(geoprojection)
    dst_ds.GetRasterBand(1).SetNoDataValue(NoDataVal)
    return 1
    dst_ds = None

In [5]:
# Based on Stack Exchange @Kurt Schwehr:
# https://stackoverflow.com/questions/10454316/how-to-project-and-resample-a-grid-to-match-another-grid-with-gdal-python
def resampleRaster(InRaster_Path, MatchRaster_Path, OutFile_Path, 
                   ResampType = gdal.GRA_Bilinear, NoDataVal = 0):
    print('Loading for %s. %s' % (InRaster_Path, time.ctime()))
    
    RasterObject = gdal.Open(InRaster_Path)
    In_proj = RasterObject.GetProjection()
    [Match_x, Match_y, Match_geo, Match_proj, Match_Z] = readRaster(MatchRaster_Path)
    print('---Specs to match to: \n', 
      Match_proj, '\n', Match_geo, '\n', Match_x, '\n', Match_y, '\n')
        
    OutFile = gdal.GetDriverByName('GTiff').Create(OutFile_Path, Match_x, Match_y, 1, gdalconst.GDT_UInt32)
    OutFile.SetGeoTransform(Match_geo)
    OutFile.SetProjection(Match_proj)
    print('---Created raster file for upsampled version. %s' % time.ctime())
    
    gdal.ReprojectImage(RasterObject, OutFile, In_proj, Match_proj, ResampType)
    print('---Resampled flood values onto an empty raster matching the dimensions of the buildup layer. %s \n\n' % time.ctime())
    
    OutFile.GetRasterBand(1).SetNoDataValue(NoDataVal)
    
    RasterObject = Outfile = None
    return 1

In [6]:
def calcShell(A, B, OutFile, Calculation, OutType = '', 
              C=None, D=None, E=None, F=None, G=None):
    """Raster math using gdal_calc.py.

    The OSgeo package for Python API does not make raster calculations
    easy outside of the shell. This function plugs up to 6 raster files
    into a string which subprocess.call() then commits to the terminal.

        A : str
            File path to the first raster for the calculation.
        B : str
            File path to the second raster for the calculation.
        OutFile : str
            File path where to store the raster generated from the calculation.
        Calculation : str
            Algebra that uses A and B to create a new raster. Use double quotes.
    """
    print('Running for %s. %s' % (A, time.ctime()))
    cmd = 'gdal_calc.py -A ' + A + ' -B ' + B 
    if C is not None:
        cmd = cmd + ' -C' + C 
    if D is not None:
        cmd = cmd + ' -D' + D
    if E is not None:
        cmd = cmd + ' -E' + E
    if F is not None:
        cmd = cmd + ' -F' + F
    if G is not None:
        cmd = cmd + ' -G' + G
    cmd = cmd + OutType + ' --outfile=' + OutFile + ' --overwrite --calc=' + Calculation
    subprocess.call(cmd, shell=True)
    cmd = A = B = C = D = E = F = G = None
    print('Ran in shell. See OutFile folder to inspect results. %s' % time.ctime())

In [7]:
def mosaicShell(A, B, OutFile, Band = 1, 
                  C=None, D=None, E=None, F=None, G=None):
    print('Running for %s. %s' % (A, time.ctime()))
    
    StringFiles = ' '.join([A,B])
    
    for RasterName in [C,D,E,F,G]:
        if RasterName is not None:
            StringFiles = ' '.join([StringFiles, RasterName])
        else:
            pass
        
    cmd = 'gdal_merge.py -o ' + OutFile + ' -of gtiff ' + StringFiles
    
    subprocess.call(cmd, shell=True)
    print('Ran in shell. See OutFile folder to inspect results. %s' % time.ctime())

In [120]:
def burnShell(FolderPath, TemplateRasterName=None, BurnRasterName='Burn.tif', Value=0):
    
    print("Creating empty raster from a tif in %s. %s" % (FolderPath, time.ctime()))
    if TemplateRasterName is None:
        for file in os.listdir(FolderPath): # Walk through the folder and choose the first .tif you come across.
            if file.endswith('.tif'):
                TemplateRasterName = file 
                break # Stop the loop once you've found one.
    TemplateRasterPath = os.path.join(FolderPath, TemplateRasterName)
    BurnRasterPath = os.path.join(FolderPath, BurnRasterName)
    
    cmd = 'gdal_create -if ' + TemplateRasterPath + ' -burn ' + str(Value) + ' ' + BurnRasterPath

    subprocess.call(cmd, shell=True)
    print("Ran raster burn in shell. %s" % time.ctime())

In [123]:
def batchSumShell(FolderPath, BurnRasterName=None):
    print("Adding together the values of all .tif files in %s. %s" % (FolderPath, time.ctime()))
    if BurnRasterName is None:
        BurnRasterName = 'Burn.tif'
    cmd = 'gdal_calc.py -A ' + FolderPath + r'/*.tif --outfile=' + FolderPath + r'/' + BurnRasterName + ' --calc="numpy.sum(A,axis=0)"'
    
    subprocess.call(cmd, shell=True)
    print("Ran batch summation in shell. %s" % time.ctime())

In [8]:
def rioStats(InRasterPath, Band = 1):
    out = rasterio.open(InRasterPath)
    stats = []
    band = out.read(Band)
    stats.append({
        'raster': out.name,
        'bands': out.count,
        'data type': out.dtypes,
        'no data value': out.nodatavals,
        'width': out.width,
        'height': out.height,
        'min': band.min(),
        'mean': band.mean(),
        'median': np.median(band),
        'max': band.max()})
    print("\n", stats)
    
    out = band = None

In [9]:
def SimplifyFileNames(FilesList, folder):
    i = 1
    while i <= len(FilesList):
        for filename in FilesList:
            dst = ''.join(['Crop', str(i), '.tif'])
            src =f"{folder}/{filename}"  # foldername/filename, if .py file is outside folder
            dst =f"{folder}/{dst}"
            os.rename(src, dst)
            
            i += 1

In [10]:
def MaskByZone(MaskPath, DestFolder, SourceFolder, SourceList = None, 
               MaskLayerName = None, dstSRS = 'ESRI:102022'):
    """
    Reduces the size of a raster's valid data cells to vector areas of interest.
    This is useful if the raster data needs to be vectorized later to save space.
    
    The script prepares the vector zones as a list of geometries in the desired
    spatial reference system, then warps each raster in the specified source
    folder to the same SRS. Masking in rasterio then reclassifies any raster cells
    falling outside of a mask polygon as NoData.
    """
    
    ProjSRS = osr.SpatialReference()
    ProjSRS.SetFromUserInput(dstSRS)
    ProjWarp = gdal.WarpOptions(dstSRS = dstSRS)
    
    if SourceList is not None:
        SourceFiles = SourceList
    else:
        SourceFiles = []
        SourceFiles = SourceFiles + [i for i in os.listdir(''.join([SourceFolder, r'/'])) if i.endswith('tif')]
        print(SourceFiles)
    
    
    ### 1. ASSIGN SPATIAL REFERENCE SYSTEM OF VECTOR MASK AND LOAD GEOMETRIES
    Vector = gpd.read_file(filename=MaskPath, layer=MaskLayerName)
    if Vector.crs != dstSRS:
        if MaskLayerName == None:
            MaskPath = MaskPath + '_temp'
        else:
            MaskLayerName = MaskLayerName + '_temp'
        Vector.to_crs(dstSRS).to_file(filename=MaskPath, layer=MaskLayerName)
    Vector = None # We're reloading the geometries with fiona
    
    with fiona.open(MaskPath, mode="r", layer=MaskLayerName) as Vector:
        MaskGeom = [feature["geometry"] for feature in Vector] # Identify the bounding areas of the mask.
    
    
    ### 2. PREPARE DESTINATION FILES
    for FileName in SourceFiles:
        InputRasterPath = os.path.join(ProjectFolder, SourceFolder, FileName)
        
        TempOutputName = 'Temp_' + FileName
        TempOutputPath = os.path.join(ProjectFolder, DestFolder, TempOutputName)
        FinalOutputName = 'Msk_' + FileName
        FinalOutputPath = os.path.join(ProjectFolder, DestFolder, FinalOutputName)

    ### 3. ASSIGN SPATIAL REFERENCE SYSTEM OF RASTER(S)
        InputRasterObject = gdal.Open(InputRasterPath)
        SourceSRS = osr.SpatialReference(wkt=InputRasterObject.GetProjection())
        print('Source projection: ', SourceSRS.GetAttrValue('projcs'))
        print('Destination projection: ', ProjSRS.GetAttrValue('projcs'))

        if SourceSRS.GetAttrValue('projcs') != ProjSRS.GetAttrValue('projcs'):
            Warp = gdal.Warp(TempOutputPath, # Where to store the warped raster
                         InputRasterObject, # Which raster to warp
                         format='GTiff', 
                         options=ProjWarp) # Reproject to Africa Albers Equal Area Conic
            print('Finished gdal.Warp() for %s. %s \n' % (FileName, time.ctime()))

            Warp = None # Close the files
        else:
            pass
        InputRasterObject = None
        
    ### 4. RECLASSIFY AS NODATA IF OUTSIDE OF ZONE.
        if exists(TempOutputPath):
            NewInputPath = TempOutputPath 
            print("We warped the data, so we'll use that file for next step.")
        else:
            NewInputPath = InputRasterPath 
            print("We skipped the warp, so we continue to use the source file.")

        with rasterio.open(NewInputPath) as InputRasterObject:
            MaskedOutputRaster, OutTransform = rasterio.mask.mask(
                InputRasterObject, MaskGeom, crop=True) # Anything outside the mask is reclassed to the raster's NoData value.
            OutMetaData = InputRasterObject.meta.copy()
        print('Finished rasterio.mask.mask() for %s. %s \n' % (FileName, time.ctime()))

        OutMetaData.update({"driver": "GTiff",
                         "height": MaskedOutputRaster.shape[1],
                         "width": MaskedOutputRaster.shape[2],
                         "transform": OutTransform})

        with rasterio.open(FinalOutputPath, "w", **OutMetaData) as dest:
            dest.write(MaskedOutputRaster)
        print('Written to file. %s \n' % time.ctime())
        InputRasterObject = None

        if exists(TempOutputPath):
            try:  # Finally, remove the intermediate file from disk
                os.remove(TempOutputPath)
            except OSError:
                pass
            print('Removed intermediate file. %s \n' % time.ctime())
        else:
            pass


    print('\n \n Finished all items in list. %s' % time.ctime())

In [58]:
ProjectFolder = os.getcwd()
print(ProjectFolder)

Q:\GIS\povertyequity\agro_general


-------------

## 1. Prepare MapSPAM

##### Note: 
I sorted the SPAM files into separate folders (one for each final raster output) prior to using Python.

### 1.1 Combine SPAM crops' values

#### List of eligible folders

In [99]:
SPAMfolders = [dI for dI in os.listdir(ProjectFolder) 
               if os.path.isdir(os.path.join(ProjectFolder,dI)) 
               if re.match(r'.*(_[0-9]{4}_)', dI)]
SPAMfolders

['HarvArea_2000_allTech',
 'HarvArea_2000_Irrigated',
 'HarvArea_2005_allTech',
 'HarvArea_2005_Irrigated',
 'HarvArea_2010_allTech',
 'HarvArea_2010_Irrigated',
 'HarvArea_2017_allTech',
 'HarvArea_2017_Irrigated',
 'PhysArea_2000_allTech',
 'PhysArea_2000_Irrigated',
 'PhysArea_2005_allTech',
 'PhysArea_2005_Irrigated',
 'PhysArea_2010_allTech',
 'PhysArea_2010_Irrigated',
 'PhysArea_2017_allTech',
 'PhysArea_2017_Irrigated',
 'Val_2005_allTech',
 'Val_2005_Irrigated',
 'Val_2010_allTech',
 'Val_2010_Irrigated',
 'Val_2017_allTech',
 'Val_2017_Irrigated']

In [100]:
# Drop folders where data is already summed.
SPAMfolders.remove('Val_2005_allTech')
SPAMfolders.remove('Val_2005_Irrigated')
SPAMfolders.remove('Val_2010_allTech')
SPAMfolders.remove('Val_2010_Irrigated')
SPAMfolders

['HarvArea_2000_allTech',
 'HarvArea_2000_Irrigated',
 'HarvArea_2005_allTech',
 'HarvArea_2005_Irrigated',
 'HarvArea_2010_allTech',
 'HarvArea_2010_Irrigated',
 'HarvArea_2017_allTech',
 'HarvArea_2017_Irrigated',
 'PhysArea_2000_allTech',
 'PhysArea_2000_Irrigated',
 'PhysArea_2005_allTech',
 'PhysArea_2005_Irrigated',
 'PhysArea_2010_allTech',
 'PhysArea_2010_Irrigated',
 'PhysArea_2017_allTech',
 'PhysArea_2017_Irrigated',
 'Val_2017_allTech',
 'Val_2017_Irrigated']

#### Inspect alignment of rasters.

In [86]:
Match_x2, Match_y2, Match_geo2, Match_proj2, Match_Z2 = 0,0,0,0,0

# The first item will always print "Not aligned with previous" because there was no previous item.
for Folder in SPAMfolders:
    print('Folder: %s. %s' % (Folder, time.ctime()))
    SPAMrasters = []
    SPAMrasters = SPAMrasters + [i for i in os.listdir(os.path.join(ProjectFolder, Folder)) if i.endswith('.tif')]
    for Raster in SPAMrasters:
        RasterPath = os.path.join(ProjectFolder, Folder, Raster)
        [Match_x, Match_y, Match_geo, Match_proj, Match_Z] = readRaster(RasterPath)
        if (Match_x != Match_x2) or (Match_y != Match_y2) or (Match_geo != Match_geo2) or (Match_proj != Match_proj2) or (Match_Z.all() != Match_Z2.all()):
            print("Not aligned with previous.")
        else:
            print("Aligned with previous.")
        Match_x2, Match_y2, Match_geo2, Match_proj2, Match_Z2 = Match_x, Match_y, Match_geo, Match_proj, Match_Z

Folder: HarvArea_2000_allTech. Wed Apr 19 16:03:22 2023
Not aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Folder: HarvArea_2000_Irrigated. Wed Apr 19 16:03:28 2023
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned 

Aligned with previous.
Folder: PhysArea_2005_allTech. Wed Apr 19 16:04:18 2023
Not aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previou

Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.
Aligned with previous.


#### Create empty raster to assign sums to.

In [124]:
for Folder in SPAMfolders:
    SumFile = Folder + '.tif'
    Path = os.path.join(ProjectFolder, Folder)
    burnShell(FolderPath = Path, BurnRasterName=SumFile)
    batchSumShell(Path, BurnRasterName=SumFile)

Creating empty raster from a tif in Q:\GIS\povertyequity\agro_general\HarvArea_2000_allTech. Wed Apr 19 17:03:01 2023
Ran raster burn in shell. Wed Apr 19 17:03:01 2023
Adding together the values of all .tif files in Q:\GIS\povertyequity\agro_general\HarvArea_2000_allTech. Wed Apr 19 17:03:01 2023
Ran batch summation in shell. Wed Apr 19 17:03:11 2023
Creating empty raster from a tif in Q:\GIS\povertyequity\agro_general\HarvArea_2000_Irrigated. Wed Apr 19 17:03:11 2023
Ran raster burn in shell. Wed Apr 19 17:03:12 2023
Adding together the values of all .tif files in Q:\GIS\povertyequity\agro_general\HarvArea_2000_Irrigated. Wed Apr 19 17:03:12 2023
Ran batch summation in shell. Wed Apr 19 17:03:22 2023
Creating empty raster from a tif in Q:\GIS\povertyequity\agro_general\HarvArea_2005_allTech. Wed Apr 19 17:03:22 2023
Ran raster burn in shell. Wed Apr 19 17:03:22 2023
Adding together the values of all .tif files in Q:\GIS\povertyequity\agro_general\HarvArea_2005_allTech. Wed Apr 19 17:

### 1.2 Crop to country

#### Crop to country: SPAM should be a slightly buffered country boundary.

In [None]:
MaskByZone(MaskPath='ADM/MRT_ADM0_1kmBuffer.shp', SourceFolder='SPAM', SourceList = ['SPAM_ProdVal_2017_v2.tif'], 
           DestFolder='SPAM', dstSRS = 'ESRI:102022')