# Population Reallocator

Contains code to match population or similar "count" datasets to a fixed coastline. With data of this type we cannot simply discard any input data values that do not fall within the land template file that we are using, in case of mismatch. Those values represent population, who must be reallocated to another "land" pixel to maintain totals.

In [1]:
from osgeo import gdal
import numpy as np
import rasterio
import glob
import os


In [2]:
%load_ext cython

In [8]:
import pyximport
pyximport.install()


(None, None)

ImportError: No module named CoastlineMatching

In [3]:
# file in mastergrids coords (resolution = 0.00833333)
lsTemplateFile = r'C:\Users\zool1301\Documents\MODIS\Supporting_Data\ls1k'

# files in true (accurate) coords (resolution = 0.008333333333333 or multiple thereof)
ls_Accurate_1kFile = r'G:\Supporting\CoastGlobal.tiff'
ls_Accurate_5kFile = r'G:\Supporting\CoastGlobal_5k.tif'
ls_Accurate_10kFile = r'G:\Supporting\CoastGlobal_10k.tif'

In [4]:
#popDir = r'C:\Users\zool1301\Documents\Other_Data\Population\Worldpop_GPWv4_Comb_201507'
#popDir = r'C:\Users\zool1301\Documents\Other_Data\Population\GPWv4'

# file to be matched
inPopFN = 'worldpop_gpwv4_mosaic_export_5k.tif'
# output file
outPopFN = 'worldpop_gpwv4_mosaic_export_5k_MG_Reallocated.tif'
# output file to hold data that could not be allocated to a land pixel within the 
# given search radius
outFailFN = 'worldpop_gpwv4_mosaic_export_5k_MG_ReallocationFailures.tif'

inPopFile = os.path.join(popDir, inPopFN)
outPopFile = os.path.join(popDir, outPopFN)
outFailFile = os.path.join(popDir, outFailFN)


### Match multiple files to mastergrids (GPWv4, GPWv3, GRUMP, Worldpop series)

In [8]:
#popDir = r'C:\Users\zool1301\Documents\Other_Data\Population\WorldPop\WholeContinentPop2010\WorldPop-Africa'
popDir = r'G:\DataPrep\population\GRUMP\tif'
#r'C:\Users\zool1301\Documents\Other_Data\Population\WorldPop\AgeStructuresAsia'
popFiles = glob.glob(os.path.join(popDir, "*.tif"))
for inFN in popFiles:
    outFN = os.path.join(popDir, 
                         #"MG_Matched", 
                         os.path.basename(os.path.splitext(inFN)[0])
                             +"_MG_Matched.tif"
                         )
    outFailFN = outFN.replace("_MG_Matched.tif", "_MG_Failures.tif")
    if os.path.exists(outFN):
        print "Already done "+inFN
        continue
    print inFN
    popDS = gdal.Open(inFN)
    b = popDS.GetRasterBand(1)
    ndvPop = b.GetNoDataValue()
    gtPop = popDS.GetGeoTransform()
    projPop = popDS.GetProjection()
    
    popData = b.ReadAsArray()
    popDS = None
    
    landDS = gdal.Open(ls_Accurate_1kFile)
    b = landDS.GetRasterBand(1)
    ndvMask = b.GetNoDataValue()
    gtLand = landDS.GetGeoTransform()
    
    # Check that the resolutions are the same
    # (If the rounding issue has not first been corrected then the assertion will
    # fail: in this case comment them out and satisfy yourself first that the pixel 
    # coordinates do match i.e. that the rounding doesn't lead to > 0.5 cell error)
    assert round(gtPop[1], 15) == round(gtLand[1], 15) 
    assert round(gtPop[5], 15) == round(gtLand[5], 15)
    
    # the population dataset is not global; where does it sit in the global image?
    landOffsetW = int(round((gtPop[0] - gtLand[0]) / gtLand[1]))
    landOffsetN = int(round((gtPop[3]-gtLand[3]) / gtLand[5]))
    
    #print (landOffsetN, landOffsetW)
    landData= b.ReadAsArray(landOffsetW, landOffsetN, popData.shape[1], popData.shape[0])
    
    failedLocs = reallocateToUnmasked(popData, landData, ndvPop)
    
    writeTiffFile(popData, outFN, gtPop, projPop, ndvPop)
    writeTiffFile(np.asarray(failedLocs), outFailFN, gtPop, projPop, None, gdal.GDT_Byte)
    

G:\DataPrep\population\GRUMP\tif\glup00ag.tif
Reallocated 178058496.0 total pop from 1078095 cells to nearby land cell
Clipped (set to nodata) 518919561 zero-value cells in the sea
Failed to reallocate 1784.53796387 total pop from 336 cells to nearby land cell
G:\DataPrep\population\GRUMP\tif\glup00g.tif
Reallocated 175044608.0 total pop from 1078182 cells to nearby land cell
Clipped (set to nodata) 518919474 zero-value cells in the sea
Failed to reallocate 1765.0012207 total pop from 336 cells to nearby land cell
G:\DataPrep\population\GRUMP\tif\glup90ag.tif
Reallocated 155500400.0 total pop from 1114555 cells to nearby land cell
Clipped (set to nodata) 518883101 zero-value cells in the sea
Failed to reallocate 1650.81408691 total pop from 336 cells to nearby land cell
G:\DataPrep\population\GRUMP\tif\glup90g.tif
Reallocated 153110864.0 total pop from 1129507 cells to nearby land cell
Clipped (set to nodata) 518868149 zero-value cells in the sea
Failed to reallocate 1639.00061035 tota

In [21]:
inDirStack = r'C:\Users\zool1301\Documents\Other_Data\Population\WorldPop\AgeStructures\AgeStructuresAfrica\MG_Matched'
fnFormat = 'ap{0}v4_A*_MG_Matched.tif'
for yr in ['00','05','10','15']:
    inPattern = fnFormat.format(yr)
    inStack = glob.glob(os.path.join(inDirStack, inPattern))
    first = True
    for inFN in inStack:
        ds = gdal.Open(inFN)
        b = ds.GetRasterBand(1)
        arr = b.ReadAsArray()
        if first:
            first = False
            ndvPop = b.GetNoDataValue()
            gtPop = ds.GetGeoTransform()
            projPop = ds.GetProjection()
            sumArr = arr
        else:
            assert ndvPop == b.GetNoDataValue()
            assert gtPop == ds.GetGeoTransform()
            assert projPop == ds.GetProjection()
            assert arr.shape == sumArr.shape
            sumArr[arr != ndvPop] += arr[arr != ndvPop]
    outFN = "Africa{0}_ManualTotal.tif".format(yr)
    writeTiffFile(sumArr, os.path.join(inDirStack,outFN), gtPop, projPop, ndvPop)
    

### Or do a single file manually

In [None]:
popDS = gdal.Open(inPopFile)
b = popDS.GetRasterBand(1)
ndvPop = b.GetNoDataValue()
gtPop = popDS.GetGeoTransform()
projPop = popDS.GetProjection()

#popData = b.ReadAsArray()
popOffsetN = int((50 - gtPop[3]) / gtLand[5])
popHeight = int((50 - -60) / gtPop[1])
popData = b.ReadAsArray(0, popOffsetN, 8640, popHeight)

In [29]:
gtPop

(-17.533398320000007, 0.00833333, 0.0, 37.54996098, 0.0, -0.00833333)

In [27]:
landDS = gdal.Open(ls_Accurate_5kFile)
b = landDS.GetRasterBand(1)
ndvMask = b.GetNoDataValue()
gtLand = landDS.GetGeoTransform()
gtLand

(-180.0, 0.041666666666665, 0.0, 90.0, 0.0, -0.041666666666665)

In [None]:
# Are the resolutions the same?
assert gtPop[1] == gtLand[1]
assert gtPop[5] == gtLand[5]

In [30]:
# the population dataset is not global; where does it sit in the global image?
landOffsetW = int(round((gtPop[0] - gtLand[0]) / gtLand[1]))
#landOffsetN = int((gtPop[3]-gtLand[3]) / gtLand[5])
landOffsetN = int(round((50-gtLand[3]) / gtLand[5]))
landOffsetN, landOffsetW

(960, 3899)

In [None]:
landHeight = (50 - -60) / gtLand[1]

In [None]:
landHeight

In [None]:
# read the required portion of the land data
landData= b.ReadAsArray(landOffsetW, landOffsetN, popData.shape[1], popData.shape[0])
#landData= b.ReadAsArray(landOffsetW, landOffsetN, 8640, 2640)

In [None]:
# check the totals match - do this before and after
np.logical_and(np.not_equal(popData,ndvPop), np.not_equal(popData,0)).sum()

In [None]:
# perform the reallocation
failedLocs = reallocateToUnmasked(popData, landData, ndvPop)

In [None]:
# check the totals match - do this before and after
np.logical_and(np.not_equal(popData,ndvPop), np.not_equal(popData,0)).sum()

In [None]:
# write the outputs
writeTiffFile(popData, outPopFile, gtPop, projPop, ndvPop)
writeTiffFile(failedLocs, outFailFile, gtPop, projPop, None, gdal.GDT_Byte)

### Tiff writer utility function

In [7]:
def writeTiffFile(dataArray, fileName, geoTransform, projection, 
                 noDataValue=None, dataType=gdal.GDT_Float32):
    outDrv = gdal.GetDriverByName("GTiff")
    outRaster = outDrv.Create(fileName, dataArray.shape[1], dataArray.shape[0], 1,
                              dataType,
                              ["COMPRESS=LZW", "TILED=YES", "SPARSE_OK=TRUE", "BIGTIFF=YES"])
    outRaster.SetGeoTransform(geoTransform)
    outRaster.SetProjection(projection)
    band = outRaster.GetRasterBand(1)
    assert band is not None
    if noDataValue is not None:
        band.SetNoDataValue(noDataValue)
    band.WriteArray(dataArray)
    band = None
    outRaster = None

## Cython function to perform the reallocation

In [2]:
%%cython --compile-args=/openmp --link-args=/openmp --force --annotate
cimport cython
import numpy as np
cimport openmp
from cython.parallel import parallel, prange
from libc.math cimport sqrt

cpdef reallocateToUnmasked(float[:,::1] data, char[:,::1] lsMask, float _NDV = np.inf):
    ''' 
    Reallocates data falling in masked area to nearest non-masked pixel
    
    For use in preparing population datasets for MAP use with standardised land-sea template.
    
    Rather than just clipping population datasets to MAP's One True Coastline (TM), any population
    falling in pixels that are "sea" according to MAP's One True Coastline (TM)
    must be forcibly relocated Bikini-Atoll-style to the nearest "land" pixel according to MAP's
    One True Coastline (TM), in order to maintain population counts.
    
    Input data must be a float array. Input mask must be a byte array of the same shape as 
    the data array, with a value of 1 on "land" (unmasked areas), and any other value 
    elsewhere. 
    
    The input data array is modified in-place. The returned object is a new array flagging 
    (with a value of 1) locations where the reallocation failed because there was no
    unmasked (land) pixel within the search radius. At these locations, the data will be 
    unmodified despite being in the sea.
    '''
    
    cdef:
        Py_ssize_t xShapeIn, yShapeIn
        Py_ssize_t xIn, yIn, xNbr, yNbr, nbrIndex_prv
        float localValue
        int[:,::1] nbrIntCoords
        char[:,::1] failedLocs
        int reallocatedCells, failedReallocations, clippedZeros 
        float failedReallocationPop,  reallocatedTotalPop
        char reallocatedOK
        int _MAX_NEIGHBOURS_TO_CHECK = 31400
        
    yShapeIn = data.shape[0]
    xShapeIn = data.shape[1]
    assert lsMask.shape[0] == yShapeIn
    assert lsMask.shape[1] == xShapeIn
    
     # Generate the neighbour spiral search table out to "a bit" further than needed
    _SEARCH_RADIUS = 100 # <int> ((sqrt(_MAX_NEIGHBOURS_TO_CHECK / 3.14)) + 5)
    diam = _SEARCH_RADIUS * 2 + 1
    inds = np.indices([diam,diam]) - _SEARCH_RADIUS
    distTmp = np.sqrt((inds ** 2).sum(0))
    npTmpTable = ((inds.T).reshape(diam**2, 2))
    npTmpTable = np.append(npTmpTable, distTmp.ravel()[:,None],axis=1)
    # sort the table by distance then x then y (the arguments are last-sort-first)
    order = np.lexsort((npTmpTable[:,1],npTmpTable[:,0],npTmpTable[:,2]))
    npTmpTable = np.take(npTmpTable,order,axis=0)
    # transfer to a C-side object transposed to have three rows and many columns and in 
    # C-contiguous layout, so that cython can access individual nbr coord sets more quickly
    nbrTable = np.copy((npTmpTable[npTmpTable[:,2] <= _SEARCH_RADIUS]).T,order='c')
    # cast the columns that will be used as array indices to int type once here, rather 
    # than casting repeatedly inside the inner loop
    nbrIntCoords = np.asarray(nbrTable[0:2,:]).astype(np.int32)
    
    reallocatedCells = 0
    clippedZeros = 0
    reallocatedTotalPop = 0
    failedReallocations = 0
    failedReallocationPop = 0
    
    failedLocs = np.zeros_like(lsMask)
    
    for yIn in range (yShapeIn):
        for xIn in range (xShapeIn):
            if lsMask[yIn, xIn] == 1:
                # we are on land so no need to do anything
                # TODO maybe we want to see if we need to set the data to e.g. 0
                # if it is nodata, here
                continue
            if data[yIn, xIn] == _NDV:
                # there is nothing to reallocate
                continue
            if data[yIn, xIn] == 0:
                # we are in the sea but there is nothing to reallocate
                # set it to nodata instead of zero for clarity, though
                data[yIn, xIn] = _NDV
                clippedZeros += 1
                continue
            # otherwise we are in sea but have nonzero data. 
            # Find nearest land to reallocate it
            reallocatedOK = 0
            for nbrIndex_prv in range(1, _MAX_NEIGHBOURS_TO_CHECK):
                # use int-type coords array to avoid cast op in tight loop
                xNbr = xIn + nbrIntCoords[0, nbrIndex_prv]
                yNbr = yIn + nbrIntCoords[1, nbrIndex_prv]
                if (xNbr >= 0 and xNbr < xShapeIn and 
                    yNbr >= 0 and yNbr < yShapeIn and
                    lsMask[yNbr, xNbr] == 1):
                    if data[yNbr, xNbr] == _NDV or data[yNbr, xNbr] < 0:
                        data[yNbr, xNbr] = data[yIn, xIn]
                    else:
                        data[yNbr, xNbr] += data[yIn, xIn]
                    data[yIn, xIn] = _NDV
                    reallocatedOK = 1
                    reallocatedCells += 1
                    reallocatedTotalPop += data[yNbr, xNbr]
                    break
            if reallocatedOK == 0:
                failedReallocations += 1
                failedReallocationPop += data[yIn, xIn]
                failedLocs[yIn, xIn] = 1
    print ("Reallocated {0!s} total pop from {1!s} cells to nearby land cell".format(
        reallocatedTotalPop,reallocatedCells))
    print ("Clipped (set to nodata) {0!s} zero-value cells in the sea".format(clippedZeros))
    print ("Failed to reallocate {0!s} total pop from {1!s} cells to nearby land cell".format(
        failedReallocationPop, failedReallocations))
    return np.asarray(failedLocs)
    

#### Linear interpolate 5-yearly grids to annual

In [None]:
pop2005File = r'C:\Users\zool1301\Documents\Other_Data\Population\GPWv3\Futures\GPWv3_FE_2005_MGMatched.tif'
pop2010File = r'C:\Users\zool1301\Documents\Other_Data\Population\GPWv3\Futures\GPWv3_FE_2010_MGMatched.tif'
pop2015File = r'C:\Users\zool1301\Documents\Other_Data\Population\GPWv3\Futures\GPWv3_FE_2015_MGMatched.tif'
d = gdal.Open(pop2005File)
pop2005 = d.GetRasterBand(1).ReadAsArray()
d = gdal.Open(pop2010File)
pop2010 = d.GetRasterBand(1).ReadAsArray()
d = gdal.Open(pop2015File)
pop2015 = d.GetRasterBand(1).ReadAsArray()


In [4]:
popDir = r'\\map-fs1.ndph.ox.ac.uk\map_data\mastergrids\Other_Global_Covariates\Population\Worldpop_GPWv4_Hybrid_201601'

pop2000File = os.path.join(popDir, 'Global_Pop_1km_Adj_MGMatched_2000_Hybrid.tif')
pop2005File = os.path.join(popDir, 'Global_Pop_1km_Adj_MGMatched_2005_Hybrid.tif')
pop2010File = os.path.join(popDir, 'Global_Pop_1km_Adj_MGMatched_2010_Hybrid.tif')
pop2015File = os.path.join(popDir, 'Global_Pop_1km_Adj_MGMatched_2015_Hybrid.tif')

d = gdal.Open(pop2000File)
pop2000 = d.GetRasterBand(1).ReadAsArray()
d = gdal.Open(pop2005File)
pop2005 = d.GetRasterBand(1).ReadAsArray()
d = gdal.Open(pop2010File)
pop2010 = d.GetRasterBand(1).ReadAsArray()
d = gdal.Open(pop2015File)
pop2015 = d.GetRasterBand(1).ReadAsArray()


In [18]:
globalGT = d.GetGeoTransform()
globalProj = d.GetProjection()
ndv = d.GetRasterBand(1).GetNoDataValue()

In [5]:
pop2015.shape

(17400L, 43200L)

In [8]:
stack = np.empty(shape=(16,17400,43200), dtype=np.float32)

MemoryError: 

In [9]:
del(stack)

In [7]:
stack[0] = np.copy(pop2000)
stack[5] = np.copy(pop2005)
stack[10] = np.copy(pop2010)
stack[15] = np.copy(pop2015)
gotYrs = [2000,2005,2010,2015]
baseYr = 2000
popDiff = stack[5] - stack[0]
for i in range(1,16):
    #print i
    yr = i + baseYr
    if yr in gotYrs:
        print yr
        prevYr = yr
        nextYr = gotYrs[gotYrs.index(yr)+1]
        popDiff = stack
    else:
        thisYrOffset = yr-prevYr
        

ValueError: could not broadcast input array from shape (17400,43200) into shape (3480,8640)

In [10]:
outDir = r'E:\Temp\pop'
fnTemplate = 'Global_Pop_1km_Adj_MGMatched_{0!s}-Interp_Hybrid.tif'

In [None]:
os.path.j

In [27]:
end = pop2005
start = pop2000
base = 2000

popDiffPerYr = (end - start) / 5.0
popShape = popDiffPerYr.shape
grubby = np.logical_or(start==ndv, end==ndv)

for i in range (base+1,base+5):
    offset = i - base
    print i
    thisYr = (offset*popDiffPerYr) + start
    thisYr[grubby] = ndv
    outDrv = gdal.GetDriverByName('GTiff')
    outPopFile = os.path.join(outDir, fnTemplate.format(i) )
    dataRaster = outDrv.Create(outPopFile, popShape[1], popShape[0], 1, gdal.GDT_Float32,
                                           ["COMPRESS=LZW", "TILED=YES", "SPARSE_OK=TRUE", "BIGTIFF=YES"])
   # failRaster = outDrv.Create(outFailFN, popShape[1], popShape[0], 1, gdal.GDT_Byte,
   #                                        ["COMPRESS=LZW", "TILED=YES", "SPARSE_OK=TRUE", "BIGTIFF=YES"])

    dataRaster.SetGeoTransform(globalGT)
    dataRaster.SetProjection(globalProj)
    #failRaster.SetGeoTransform(globalGT)
    #failRaster.SetProjection(globalProj)

    bnd = dataRaster.GetRasterBand(1)
    assert bnd is not None
    bnd.SetNoDataValue(ndv)
    bnd.WriteArray(thisYr)
    bnd = None
    dataRaster = None

2001
2002
2003
2004


### Worldpop / GRUMP combination work

Aggregate worldpop age-structures data for a year into three age bins

In [None]:
ap2kFiles = glob.glob(r'\\map-fs1.ndph.ox.ac.uk\map_data\mastergrids\Other_Global_Covariates\Population\WorldPop\AgeStructures\Africa\2000\*.tif')
outDir = r'C:\Users\zool1301.NDPH\Documents\Dial-A-Map\pop-1990-invention'

import re
# we'll be creating three age-bin datasets for this one worldpop year
ap0005 = None
ap0515 = None
ap1599 = None
everData = None
for f in ap2kFiles:
    fnParts = os.path.basename(f).split('_')
    maybeAge = fnParts[1]
    if re.match('A\d', maybeAge):
        print maybeAge
        d = gdal.Open(f)
        b = d.GetRasterBand(1)
        arr = b.ReadAsArray()
        ndv = b.GetNoDataValue()
        startage = maybeAge[1:3]
        if everData is None:
            everData = np.zeros(arr.shape,np.bool)
            gt = d.GetGeoTransform()
            proj = d.GetProjection()
        everData = np.logical_or(everData, arr != ndv)
        
        if startage == '00':
            if ap0005 is None:
                ap0005 = np.zeros_like(arr)
            ap0005[arr != ndv] += arr[arr != ndv]
        if startage == '05':
            if ap0515 is None:
                ap0515 = np.zeros_like(arr)
            ap0515[arr != ndv] += arr[arr != ndv]
        else:
            if ap1599 is None:
                ap1599 = np.zeros_like(arr)
            ap1599[arr != ndv] += arr[arr != ndv]

# calculate the proportions            
apTot = ap0005+ap0515+ap1599
# will give runtime warning due to divide by zero if there's permanent nodata anywhere
ap0005_prop = ap0005 / apTot
ap0515_prop = ap0515 / apTot
ap1599_prop = ap1599 / apTot
# make sure those places are set to nodata 
ap0005_prop[everData == False] = ndv
ap0515_prop[everData == False] = ndv
ap1599_prop[everData == False] = ndv

writeTiffFile(ap0005_prop, os.path.join(outDir, 'ap2000_0005_prop.tif'), gt, proj, ndv)
writeTiffFile(ap0515_prop, os.path.join(outDir, 'ap2000_0515_prop.tif'), gt, proj, ndv)
writeTiffFile(ap1599_prop, os.path.join(outDir, 'ap2000_1599_prop.tif'), gt, proj, ndv)

writeTiffFile(ap0005, os.path.join(outDir, r'ap2000_0005.tif') , gt, proj, ndv)
writeTiffFile(ap0515, os.path.join(outDir, r'ap2000_0515.tif'), gt, proj, ndv)
writeTiffFile(ap1599, os.path.join(outDir, r'ap2000_1599.tif'), gt, proj, ndv)

writeTiffFile(apTot, os.path.join(outDir, r'ap2000_Tot.tif'), gt, proj, ndv)

Calculate just the proportions based on Dan's aggregations of the same

In [57]:
dan0005_fn = r'\\map-fs1.ndph.ox.ac.uk\map_data\cubes\5km\AfriPop\population_surfaces_for_Pf_incidence\00-05\2000_00-05.tif'
dan0515_fn = r'\\map-fs1.ndph.ox.ac.uk\map_data\cubes\5km\AfriPop\population_surfaces_for_Pf_incidence\05-15\2000_05-15.tif'
dan15pl_fn = r'\\map-fs1.ndph.ox.ac.uk\map_data\cubes\5km\AfriPop\population_surfaces_for_Pf_incidence\15+\2000_15+.tif'

d = gdal.Open(dan0005_fn)
b = d.GetRasterBand(1)
gt = d.GetGeoTransform()
proj = d.GetProjection()
ndv = b.GetNoDataValue()

dan0005 = b.ReadAsArray()

d = gdal.Open(dan0515_fn)
b = d.GetRasterBand(1)
dan0515 = b.ReadAsArray()

d = gdal.Open(dan15pl_fn)
b = d.GetRasterBand(1)
dan15pl = b.ReadAsArray()

danTot = dan0005+dan0515+dan15pl
dan0005_prop = dan0005 / danTot
dan0515_prop = dan0515 / danTot
dan15pl_prop = dan15pl / danTot

writeTiffFile(dan0005_prop, r'C:\Users\zool1301.NDPH\Documents\Dial-A-Map\pop-1990-invention\dan2000_0005_prop.tif', gt, proj, ndv)
writeTiffFile(dan0515_prop, r'C:\Users\zool1301.NDPH\Documents\Dial-A-Map\pop-1990-invention\dan2000_0515_prop.tif', gt, proj, ndv)
writeTiffFile(dan15pl_prop, r'C:\Users\zool1301.NDPH\Documents\Dial-A-Map\pop-1990-invention\dan2000_15pl_prop.tif', gt, proj, ndv)

writeTiffFile(danTot, r'C:\Users\zool1301.NDPH\Documents\Dial-A-Map\pop-1990-invention\dan2000_total.tif', gt, proj, ndv)