In [1]:
import os
os.environ['CPLUS_INCLUDE_PATH']="/usr/include/gdal"
os.environ['C_INCLUDE_PATH']="/usr/include/gdal"

In [None]:
!apt-get update

In [None]:
!apt-get --assume-yes install gdal-bin libgdal-dev python3-dev

In [5]:
!gdal-config --version

2.2.3


In [None]:
!pip install gdal==2.2.3

In [17]:
!python -c "from osgeo import gdal;print(gdal.__file__)"

/opt/conda/lib/python3.7/site-packages/osgeo/gdal.py


In [4]:
import sys, os

In [5]:
sys.path.append('/opt/conda/lib/python3.7/site-packages')

In [6]:
from osgeo import gdal

In [7]:
import re
import apache_beam as beam
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
import apache_beam.runners.interactive.interactive_beam as ib
from apache_beam.io.gcp.gcsio import GcsIO
import tempfile

In [8]:
from getpass import getpass

In [9]:
username='harrygibson'
password=getpass()
YEAR_FROM = 2019
YEAR_TO = 2020
DOY_START = 20
DOY_END = 40
TILE = '*'
BASE_URL = "http://e4ftl01.cr.usgs.gov"
platform = "MOLT"
product = "MOD11A2.006"
product_url = f"{BASE_URL}/{platform}/{product}"
product_url

 ·········


'http://e4ftl01.cr.usgs.gov/MOLT/MOD11A2.006'

In [39]:
BUCKET_PATH = "gs://hsg-dataflow-test/lst_download_dev"
HDF_BUCKET_PATH = BUCKET_PATH + '/hdf' #NB not os.path.join as that breaks on windows with backslash paths


## Functions to parse dates from the MODIS DAAC pages

These are local functions, they don't need to run within the Beam pipeline, there's only a single http fetch and then some local list processing. They're more or less taken from get_modis (https://github.com/jgomezdans/get_modis)

In [10]:
def generate_selected_dates(year_from=2000, year_to=2020, doy_start=1, doy_end=-1):
    import calendar, time
    dates = []
    for year in range(year_from, year_to+1):
        if doy_end == -1:
            if calendar.isleap(year):
                end_day = 367
            else:
                end_day = 366
        else:
            end_day = doy_end
        dates_this_yr = [time.strftime("%Y.%m.%d", time.strptime("%d/%d" % (i, year),
                                                         "%j/%Y")) for i in
                 range(doy_start, end_day)]
        dates.extend(dates_this_yr)
    return dates

In [11]:
def get_existing_files(out_dir):
    # in case we need to do something different to list files on bucket
    return os.listdir(out_dir)

def load_page_text(url):
    import requests, time
    # nasa data pools are unavailable for maintenance on wednesday afternoons
    the_day_today = time.asctime().split()[0]
    the_hour_now = int(time.asctime().split()[3].split(":")[0])
    if the_day_today == "Wed" and 14 <= the_hour_now <= 17:
        LOG.info("Sleeping for %d hours... Yawn!" % (18 - the_hour_now))
        time.sleep(60 * 60 * (18 - the_hour_now))
    resp = requests.get(url)
    return resp.text
    
def parse_modis_dates (product_url, requested_dates, product, out_dir, check_existing_dates=False ):
    """Parse returned MODIS dates.

    This function gets the dates listing for a given MODIS products, and
    extracts the dates for when data is available. Further, it crosses these
    dates with the required dates that the user has selected and returns the
    intersection. Additionally, if the `checkExistingDates` flag is set, we'll check for
    files that might already be present in the system and skip them. Note
    that if a file failed in downloading, it might still be around
    incomplete.

    Parameters
    ----------
    url: str
        A top level product URL such as "http://e4ftl01.cr.usgs.gov/MOTA/MCD45A1.005/"
    dates: list
        A list of required dates in the format "YYYY.MM.DD"
    product: str
        The product name, MOD09GA.005
    out_dir: str
        The output dir
    checkExistingDates: bool
        Whether to check for present files
    Returns
    -------
    A (sorted) list with the dates that will be downloaded.
    """
    import time
    if check_existing_dates:
        product = product_url.strip('/').split('/')[-1]
        product_no_version = product.split(".")[0]
        already_here = fnmatch.filter(get_existing_files(out_dir),
                                      "%s*hdf" % product_no_version)
        already_here_dates = [x.split(".")[-5][1:]
                              for x in already_here]

    html = load_page_text(product_url).split('\n')

    available_dates = []
    for line in html:
        if line.find("href") >= 0 and \
                        line.find("[DIR]") >= 0:
            # Points to a directory
            the_date = line.split('href="')[1].split('"')[0].strip("/")
            if check_existing_dates:
                try:
                    modis_date = time.strftime("%Y%j",
                                               time.strptime(the_date,
                                                             "%Y.%m.%d"))
                except ValueError:
                    continue
                if modis_date in already_here_dates:
                    continue
                else:
                    available_dates.append(the_date)
            else:
                available_dates.append(the_date)

    dates = set(requested_dates)
    available_dates = set(available_dates)
    suitable_dates = list(dates.intersection(available_dates))
    suitable_dates.sort()
    return suitable_dates

# Start the pipeline


### Generate a list of the dates for which data are required and available

This will be the input to our pipeline (possibly along with tiles). We will set up three test cases:
* four arbitrary dates
* all dates in one year (2019)
* all dates available

In each case we will generate a list of the string dates then make a pipeline to build them into a PCollection comprising the URLs of the page for the date

In [18]:
real_dates = generate_selected_dates(YEAR_FROM, YEAR_TO, DOY_START, DOY_END)
dates_to_download = parse_modis_dates(product_url, real_dates, product, "C:\\temp")

In [20]:
four_date_page_urls = (beam.Pipeline(InteractiveRunner()) | 'relevant_dates' >> beam.Create(dates_to_download)
                   | 'date_page_urls' >> beam.Map(lambda d: product_url + '/' + d)
                 )

In [21]:
ib.show(four_date_page_urls)

In [30]:
all_2019_dates = generate_selected_dates(2019, 2019, 1, 366)
downloadable_for_2019 = parse_modis_dates(product_url, all_2019_dates, product, False)
whole_year_date_page_urls = (beam.Pipeline(InteractiveRunner()) | 'relevant_dates' >> beam.Create(downloadable_for_2019)
                 | 'date_page_urls' >> beam.Map(lambda d: product_url + '/' + d))

In [31]:
ib.show(whole_year_date_page_urls)

In [32]:
all_available_dates = generate_selected_dates(2000, 2020, 1, 366)
downloadable_all_time = parse_modis_dates(product_url, all_available_dates, product, False)
all_date_page_urls = (beam.Pipeline(InteractiveRunner()) | 'relevant_dates' >> beam.Create(downloadable_all_time)
                 | 'date_page_urls' >> beam.Map(lambda d: product_url + '/' + d))

In [33]:
ib.show(all_date_page_urls)

We'll also set up three test cases for which tiles should be downloaded:

* A block of four tiles - we'll test this for all available dates
* All tiles in Africa - we'll test this for a whole year
* All tiles globally - we'll test this for four dates only

In [34]:
four_tiles = ['h17v03', 'h18v03', 'h17v04', 'h18v04']

#africa: h16_23 v5_12
import itertools
africa_tiles = [f'h{pair[0]:02}v{pair[1]:02}' for pair in (itertools.product(range(16,24), range(5,13)))]

all_tiles = '*'

### Select which one we're doing

MODIFY THIS NEXT CELL

In [35]:
DATES_TO_TEST = [four_date_page_urls, whole_year_date_page_urls, all_date_page_urls][2]
TILES_TO_TEST = [four_tiles, africa_tiles, all_tiles][0]

## Get the HDF urls from the date pages

For each date, we need to load the page and parse the URLs to the HDF files. Define a PTransform to do this

In [37]:
class GetHdfUrlsForDate(beam.PTransform):
                
    def load_page_text_to_lines(self, url):
        import requests
        resp = requests.get(url)
        lines = resp.text.split('\n')
        return [(l, url) for l in lines]
        #return beam.Create(lines)
    
    def parse_hdf_from_line(self, textline, baseurl):
        if textline.find('.hdf"') != -1:
            return baseurl + '/' + textline.split('<a href="')[1].split('">')[0]
        
    def expand(self, pcoll):
        return (pcoll
                | "Load_page_lines" >> beam.FlatMap(self.load_page_text_to_lines)
                | "discover_hdf_urls" >> beam.MapTuple(lambda line, url:self.parse_hdf_from_line(line, url))
                | "remove_non_matching" >> beam.Filter(lambda l: l is not None)
               )
    

## Filter to only the required HDF URLs

Only keep those which are for a required tile and which we have not already downloaded to the given bucket path.

Ideally this should be developed to take account of the version part of their filename.

Define a PTransform for this which takes the bucket path and required tiles as side inputs

In [40]:
class check_existing_files(beam.PTransform):
    def __init__(self, hdf_bucket_path, req_tile_list="*"):
        gcs = GcsIO()
        self._existing = [os.path.basename(l) for l in list(gcs.list_prefix(hdf_bucket_path).keys())]
        self._required_tiles = req_tile_list
        
    def checktile(self, url):
        thistile = os.path.basename(url).split('.')[2]
        return self._required_tiles == "*" or thistile in self._required_tiles
    
    def expand(self, pcoll):
        stripped_existing = (pcoll | "remove_existing" >> beam.Filter(lambda l: os.path.basename(l) 
                                                        not in self._existing))
        if self._required_tiles == "*":
            return stripped_existing
        else:
            return stripped_existing | "remove_unrequired" >> beam.Filter(lambda l: self.checktile(l))
        

# Download the files

Now we can actually download those files. Define a PTransform which does this. We don't return the actual downloaded data in the pipeline, we save it to the bucket as a "side effect" and return the bucket path.

The PTransform class maintains an authenticated session object as a member variable as i guess this might be better than logging in for every tile

In [42]:
class DownloadHdfsToBucket(beam.PTransform):
   
    def __init__(self, user, pw, hdf_bucket_path):
        import requests
        self._session = requests.Session()
        self._session.auth = (user, pw)
        self._hdf_bucket_path = hdf_bucket_path
        self._chunk_size = 8 * 1024 * 1024
    
    def download_file(self, url):
        import requests, tempfile, os
        req = self._session.request('get', url)
        resp = self._session.get(req.url, stream=True)
        product, datestr, fname = url.split('/')[-3:]
        bucketfilename = '/'.join([self._hdf_bucket_path, product, datestr, fname])
        gcs = GcsIO()
        with gcs.open(bucketfilename, 'w') as fp:
        #with open(tempfilename, 'wb') as fp:
            for chunk in resp.iter_content(chunk_size=self._chunk_size):
                if chunk:
                    fp.write(chunk)
            fp.flush()
            #os.fsync(fp)
        return bucketfilename
    
    def expand(self, pcoll):
        return(pcoll | beam.Map(self.download_file))

## (Virtually) mosaic the files

Build a GDAL vrt for each day's files to mosaic them together. This requires knowledge of which layer needs to be extracted from the HDF. We will have to modify this (or rather, pass in a string template for formatting) to make it more flexible.

This transform needs to be called once for every day (not once for every HDF). At the minute we hack the date back out of the bucket path. It might be better to modify the download function to return tuples of (date, list_of_tiles) then it can just pass through into this in a single pipeline.

Although GDAL can in theory read inputs straight from bucket storage (/vsigs/) this isn't necessarily built in and needs a different authentication flow so would be a lot more complex. Instead we copy the files back to worker's local storage and work on them there.

In [25]:
class CreateVrtsForDays(beam.PTransform):
    def __init__(self, bucketpath):
        self._hdfpath = bucketpath
        gcs = GcsIO()
        self._existing = [l for l in list(gcs.list_prefix(hdf_bucket_path).keys())]
            
    def get_tilenames_for_day(self, day):
        return [f for f in self._existing if f.split('/')[-2] == day]
    
    def get_tmp_folder_for_day(self, day):
        tmpfolder = tempfile.gettempdir()
        workfolder = os.path.join(tmpfolder, day)
        return workfolder
    
    def localise_day_files(self, day):
        files = self.get_tilenames_for_day(day)
        tempfolder = self.get_tmp_folder_for_day(day)
        localpaths = []
        gcs = GcsIO()
        if not os.path.isdir(tempfolder):
            os.makedirs(tempfolder)
        for f in files:
            localname = os.path.join(tempfolder, os.path.basename(f))
            with gcs.open(f) as gcsfile, open(localname, 'wb') as localfile:
                localfile.write(gcsfile.read())
                localpaths.append(localname)
        return (day, localpaths)
    
    def build_lst_vrt_files(self,  day, paths):
        daytemplate = 'HDF4_EOS:EOS_GRID:"{}":MODIS_Grid_8Day_1km_LST:LST_Day_1km'
        nighttemplate = 'HDF4_EOS:EOS_GRID:"{}":MODIS_Grid_8Day_1km_LST:LST_Night_1km'
        daypaths = [daytemplate.format(f) for f in paths]
        nightpaths = [nighttemplate.format(f) for f in paths]
        thisfolder = os.path.dirname(paths[0])
        #day = os.path.basename(thisfolder)
        dayvrtfile = os.path.join(thisfolder, "LST_Day.{}.vrt".format(day))
        nightvrtfile = os.path.join(thisfolder, "LST_Night.{}.vrt".format(day))
        dayvrt = gdal.BuildVRT(dayvrtfile, daypaths)
        dayvrt.FlushCache()
        dayvrt = None
        nightvrt = gdal.BuildVRT(nightvrtfile, nightpaths)
        nightvrt.FlushCache()
        nightvrt = None     
        return (dayvrtfile, nightvrtfile)
        
    def expand(self, pcoll):
        return (pcoll |  beam.Map(self.localise_day_files)
             | "build_day_night_vrts" >> beam.MapTuple(lambda d, p: self.build_lst_vrt_files(d,p)) 
             | "flat_list_of_vrts" >> beam.FlatMap(lambda t: t)
            )
         # groups by day

### Define a function to run a given calculation against a GDAL raster

This is based loosely on gdal_calc.py, it runs in blocks and saves to a sparse output raster. It uses numexpr to allow multithreaded computation of the actual calculation step and it also does multithreaded writing of the output. 

The calculation should be provided as a string with the input data being called 'band_data' e.g. for MODIS LST use "band_data * 0.02 + (-273.15)"
e.g. `run_singleband_calculation("/tmp/2020.02.02/LST_Day.2020.02.02.vrt", "/tmp/2020.02.02/test_celsius_output.tif", "band_data * 0.02 + (-273.15)")`

In [34]:
import numexpr as ne
def run_singleband_calculation(input_singleband_file, out_file, calc, out_type='Float32'):
    '''Calc must be the calculation to apply to the data from input_singleband_file, specified as a string which will be 
    eval'd against the data which will exist in a variable called band_data. i.e. to specify doubling the data then subtracting 
    three, provide calc="(band_data * 2.0) - 3.0"'''
    #input_datasets = []
    #myBands = []
    #myDataType = []
    #myDataTypeNum = []
    #myNDV = []
    DimensionsCheck = None
    
    ds = gdal.Open(input_singleband_file, gdal.GA_ReadOnly)
    if not ds:
        raise IOError("Error opening input file {}".format(input_file))
    input_dataset = ds
    inputDataType = (gdal.GetDataTypeName(ds.GetRasterBand(1).DataType))
    inputDataTypeNum = (ds.GetRasterBand(1).DataType)
    inputNDV = (ds.GetRasterBand(1).GetNoDataValue())
    
    DimensionsCheck = [ds.RasterXSize, ds.RasterYSize]

    if os.path.isfile(out_file):
        os.remove(out_file)
    # gdal_calc does this but it isn't valid as two int datasets can result in a float!
    # outType = gdal.GetDataTypeName(max(myDataTypeNum))
    
    #create the output file
    outDriver = gdal.GetDriverByName("GTiff")
    cOpts =  ["TILED=YES", "SPARSE_OK=TRUE", "BLOCKXSIZE=1024", "BLOCKYSIZE=1024", "BIGTIFF=YES", "COMPRESS=LZW", "NUM_THREADS=ALL_CPUS"]
    outDS = outDriver.Create(out_file, DimensionsCheck[0], DimensionsCheck[1], 1, gdal.GetDataTypeByName(out_type), cOpts)
    outDS.SetGeoTransform(input_dataset.GetGeoTransform())
    outDS.SetProjection(input_dataset.GetProjection())
    DefaultNDVLookup = {'Byte': 255, 'UInt16': 65535, 'Int16': -32767, 'UInt32': 4294967293, 'Int32': -2147483647, 'Float32': 3.402823466E+38, 'Float64': 1.7976931348623158E+308}
    outBand = outDS.GetRasterBand(1)
    outNDV = DefaultNDVLookup[out_type]
    outBand.SetNoDataValue(outNDV)
    outBand = None
    
    # vrt file reports a block size of 128*128 but the underlying hdf block size is 1200*100
    # so hard code this or some clean multiple : this minimises disk access
    myBlockSize = [4800,4800]
    nXValid = myBlockSize[0]
    nYValid = myBlockSize[1]
    nXBlocks = (int)((DimensionsCheck[0] + myBlockSize[0] - 1) / myBlockSize[0]);
    nYBlocks = (int)((DimensionsCheck[1] + myBlockSize[1] - 1) / myBlockSize[1]);
    
    for x in range(0, nXBlocks):
        if x == nXBlocks-1:
            nXValid = DimensionsCheck[0] - x * myBlockSize[0]
        
        myX = x * myBlockSize[0]
        
        nYValid = myBlockSize[1]
        myBufSize = nXValid * nYValid
        
        for y in range(0, nYBlocks):
            if y == nYBlocks-1:
                nYValid = DimensionsCheck[1] - y * myBlockSize[1]
                myBufSize = nXValid * nYValid
                
            myY = y * myBlockSize[1]
            band_data = input_dataset.GetRasterBand(1).ReadAsArray(xoff=myX, yoff=myY, 
                                                                       win_xsize=nXValid, win_ysize=nYValid)
            nodata_locs = band_data == inputNDV
            
            try:
                result = ne.evaluate(calc)
            except:
                raise
            
            # apply ndv (set nodata cells to zero then add nodata value to these cells)
            result = ((1 * (nodata_locs==0))*result + (outNDV * nodata_locs))
            
            outBand = outDS.GetRasterBand(1)
            outBand.WriteArray(result, xoff=myX, yoff=myY)
    return out_file

## Create the calculated output data

Define a PTransform to run that calculation (in fact we will move the function into the PTransform)

This will output a file to the worker's local storage, which will still be in the original sinusoidal projection and because it's only going to be read once in the next step, we use the sparse option (incompatible with some reader software) to help keep file size down


In [35]:
class TranslateVrtToLstTiff(beam.PTransform):
    
    def get_out_name(self, vrtname):
        return vrtname.replace('.vrt', '.sinusoidal.tif')
    
    def expand(self, pColl):
        lst_calc = "band_data * 0.02 + (-273.15)"
        return pColl | beam.Map(lambda v: run_singleband_calculation(v, self.get_out_name(v), lst_calc))
        

## Reproject the calculated raster

In [75]:
class create_projected_tiffs(beam.PTransform):
    
    def __init__(self, ForceGlobalExtent = False):
        self._forceglobal = ForceGlobalExtent
        
    def warpfile(self, sinusFile):
        cOpts = ["TILED=YES", "BIGTIFF=YES", "COMPRESS=LZW", "NUM_THREADS=ALL_CPUS"]
        if self._forceglobal:
            wo = gdal.WarpOptions(format='GTiff', 
                          outputBounds=[-180, -90, 180, 90], 
                          xRes=1/120.0, yRes=-1/120.0, dstSRS='EPSG:4326',
                          creationOptions=cOpts, multithread=True, dstNodata=-9999, warpMemoryLimit=2048)
        else:
            wo = gdal.WarpOptions(format='GTiff', 
                          xRes=1/120.0, yRes=-1/120.0, dstSRS='EPSG:4326',
                          targetAlignedPixels="YES",
                          creationOptions=cOpts, multithread=True, dstNodata=-9999, warpMemoryLimit=2048)
            
        outname = sinusFile.replace('.sinusoidal', '')
        gdal.Warp(outname, sinusFile, options=wo)
        return outname
    
    def expand(self, pColl):
        return pColl | beam.Map(self.warpfile)


In [81]:
day_bucket_path = bucketpath + '/' + "output/lst_day"
night_bucket_path = bucketpath + '/' + "output/lst_night"

Finally make a PTransform that will upload the output back to the bucket with a mastergrids-formatted filename, then remove the temp files from the worker

In [106]:
class cleanup(beam.DoFn):
   
    def process(self, finaltif):
        date = os.path.basename(os.path.dirname(finaltif))
        parts = os.path.basename(finaltif).split('.')
        outname = parts[0] + "_Unfilled." + parts[1] + "." + parts[2]+parts[3]+".Data.1km.Data.tif"
        if parts[0].find("Day")>0:
            gsPath = day_bucket_path + "/" + outname
        elif parts[0].find("Night")>0:
            gsPath = night_bucket_path + "/" + outname
        else:
            return
        gcs = GcsIO()
        with gcs.open(gsPath, 'w') as gcsfile, open(finaltif, 'rb') as localfile:
            gcsfile.write(localfile.read())
        os.remove(finaltif)
        yield gsPath
        

Note that for actually running this we will need to move the vrt, calculate, reproject, upload steps into a single PTransform. Otherwise they might (will) get run on separate workers with non-shared local storage and bad things

For now, with interactive running, here's a pipeline to put it all together.

In [None]:

hdf_urls = DATES_TO_TEST | GetUrlsForDate()
ib.show(hdf_urls)

required_for_download = hdf_urls | check_existing_files(hdf_bucket_path, TILES_TO_TEST)

download_results = need_to_download | DownloadHdfsToBucket(username, password, HDF_BUCKET_PATH)

# need to wait here

vrts = DATES_TO_TEST | CreateVrtsForDay(hdf_bucket_path)
uploaded_tiffs = vrts | TranslateVrtToLstTiff() | create_projected_tiffs() | beam.ParDo(cleanup())

### TBD - multiband

In [79]:
def run_multiband_calculation(input_singleband_files, outfile, calc):
    input_datasets = []
    myBands = []
    myDataType = []
    myDataTypeNum = []
    myNDV = []
    DimensionsCheck = None
    
    for input_file in input_singleband_file:
        ds = gdal.Open(input_file, gdal.GA_ReadOnly)
        if not ds:
            raise IOError("Error opening input file {}".format(input_file))
        input_datasets.append(ds)
        myDataType.append(gdal.GetDataTypeName(ds.GetRasterBand(1).DataType))
        myDataTypeNum.append(ds.GetRasterBand(1).DataType)
        myNDV.append(ds.GetRasterBand(1).GetNoDataValue())
        if DimensionsCheck:
            if DimensionsCheck != [ds.RasterXSize, ds.RasterYSize]:
                raise Exception("Error! Dimensions of file %s (%i, %i) are different from other files (%i, %i).  Cannot proceed" %
                                    (input_file, ds.RasterXSize, ds.RasterYSize, DimensionsCheck[0], DimensionsCheck[1]))
        else:
            DimensionsCheck = [ds.RasterXSize, ds.RasterYSize]
    
    if os.path.isfile(outfile):
        os.remove(outfile)
    outType = gdal.GetDataTypeName(max(myDataTypeNum))
    outDriver = gdal.GetDriverByName("GTiff")
    outDS = outDriver.Create(outfile, DimensionsCheck[0], DimensionsCheck[1], 1, gdal.GetDataTypeByName('Float32'), ["TILED=YES" "SPARSE_OK=TRUE" "BLOCKXSIZE=1024" "BLOCKYSIZE=1024"])
    outDS.SetGeoTransform(input_datasets[0].GetGeoTransform())
    outDS.SetProjection(input_datasets[0].GetProjection())
    DefaultNDVLookup = {'Byte': 255, 'UInt16': 65535, 'Int16': -32767, 'UInt32': 4294967293, 'Int32': -2147483647, 'Float32': 3.402823466E+38, 'Float64': 1.7976931348623158E+308}
    outBand = outDS.GetRasterBand(1)
    outNDV = DefaultNDVLookup[outType]
    outBand.SetNoDataValue(outNDV)
    outBand = None
    
    # vrt file reports a block size of 128*128 but the underlying hdf block size is 1200*100
    # so hard code this or some clean multiple : this minimises disk access
    myBlockSize = [4800,4800]
    nXValid = myBlockSize[0]
    nYValid = myBlockSize[1]
    nXBlocks = (int)((DimensionsCheck[0] + myBlockSize[0] - 1) / myBlockSize[0]);
    nYBlocks = (int)((DimensionsCheck[1] + myBlockSize[1] - 1) / myBlockSize[1]);
    
    for x in range(0, nXBlocks):
        if x == nXBlocks-1:
            nXValid = DimensionsCheck[0] - x * myBlockSize[0]
        
        myX = x * myBlockSize[0]
        
        nYValid = myBlockSize[1]
        myBufSize = nXValid * nYValid
        
        for y in range(0, nYBlocks):
            if y == nYBlocks-1:
                nYValid = DimensionsCheck[1] - y * myBlockSize[1]
                myBufSize = nXValid * nYValid
                
            myY = y * myBlockSize[1]
            band_data = input_datasets[0].GetRasterBand(1).ReadAsArray(xoff=myX, yoff=myY, 
                                                                       win_xsize=nXValid, win_ysize=nYValid)
            nodata_locs = band_data == myNDV[0]
            
            try:
                result = ne.evaluate(calc)
            except:
                raise
            
            # apply ndv (set nodata cells to zero then add nodata value to these cells)
            result = ((1 * (nodata_locs==0))*result + (outNDV * nodata_locs))
            
            outBand = outDS.GetRasterBand(1)
            outBand.WriteArray(result, xoff=myX, yoff=myY)