---

## 0. PSEUDOCODE / OVERVIEW

##### Before Python:
Projected most data to same equal area projection.
<br>Exported version of GRID3 that exclude cities (built-up area).

##### Prep data
Merge: GRID3_BFA, GRID3_MLI, GRID3_NER, GRID3_TCD
<br> Export GRID3 without Built-Up Area class (projection = Africa Albers) = GRID3_noBuiltUp.shp

<br> Reproject ADM3.shp to Africa Albers
<br> Reproject LZ.shp to Africa Albers
<br> Field calculator: create numeric classes for OECD_ZONE (new field: OECD_ZNUM)

##### Rasterize
Rasterize ADM3.shp. (Value = OBJECTID) = ADM3.tif
<br> Rasterize LZ.shp. (Value = OECD_ZNUM) = LZ.tif

##### Concatenation
Raster math: 
<br> LZ.tif + (ADM3.tif * 10) = LZ_ADM3.tif

##### Vector joins
Polygonize LZ_ADM3.tif
<br> Result: LZ_ADM3_ply.shp

<br> Spatial join: (one to one)
<br> Target: LZ_ADM3_ply.shp
<br> Join: GRID3_noBuiltUp.shp
<br> Statistics: popUN --> sum
<br> Result: LZ_ADM3_G3.shp

##### Group by largest population
Calculate field:
<br> LZ_ADM3_G3['gridstring'] = LZ_ADM3_G3['gridcode'].astype(str).str.zfill(length_of_maximum_value)
<br> LZ_ADM3_G3['ADM_ID'] = LZ_ADM3_G3['gridstring'].str[:-len_ADM].astype(int)
<br> LZ_ADM3_G3['LZ_NUM'] = LZ_ADM3_G3['gridstring'].str[-len_ADM:].astype(int)

<br> Largest = LZ_ADM3_G3.loc[LZ_ADM3_G3.groupby(["ADM3"])["popUN"].idxmax()]

##### Finalize
Spatial join: (one to one)
<br> Target: ADM3_EqAarea.shp
<br> Join: Largest.shp
<br> Result: LZ_ADM3.shp

## 1. PREPARE WORKSPACE

### 1.1 Load all packages.

In [1]:
# Built-in:
# dir(), print(), range(), format(), int(), len(), list(), max(), min(), zip(), sorted(), sum(), open(), del, = None, try except, with as, for in, if elif else
# Also: list.append(), list.insert(), list.remove(), count(), startswith(), endswith(), contains(), replace()

import os, sys, glob, re, time, subprocess, string # os.getcwd(), os.path.join(), os.listdir(), os.remove(), time.ctime(), glob.glob(), string.zfill(), string.join()
from os.path import exists # exists()
from functools import reduce # reduce()

import geopandas as gpd # read_file(), GeoDataFrame(), sjoin_nearest(), to_crs(), to_file(), .crs, buffer(), dissolve()
import pandas as pd # .dtypes, Series(), concat(), DataFrame(), read_table(), merge(), to_csv(), .loc[], head(), sample(), astype(), unique(), rename(), between(), drop(), fillna(), idxmax(), isna(), isin(), apply(), info(), sort_values(), notna(), groupby(), value_counts(), duplicated(), drop_duplicates()
from shapely.geometry import Point, LineString, Polygon, shape, MultiPoint
from shapely.ops import cascaded_union
from shapely.validation import make_valid  # in apply(make_valid)
import shapely.wkt

import numpy as np # median(), mean(), tolist(), .inf
import fiona, rioxarray # fiona.open()
import rasterio # open(), write_band(), .name, .count, .width, .height. nodatavals, .meta, update(), copy(), write()
from rasterio.plot import show
from rasterio import features # features.rasterize()
from rasterio.features import shapes
from rasterio import mask # rasterio.mask.mask()
from rasterio.enums import Resampling # rasterio.enums.Resampling()
from osgeo import gdal, osr, ogr, gdal_array, gdalconst # Open(), SpatialReference, WarpOptions(), Warp(), GetDataTypeName(), GetRasterBand(), GetNoDataValue(), Translate(), GetProjection(), GetAttrValue()

In [2]:
ProjectFolder = os.getcwd()
print(ProjectFolder)

Q:\GIS\povertyequity\PTI_Sahel\LivelihoodZones_forPython


### 1.2 User-defined functions.

In [3]:
def ListFromRange(r1, r2):
    return [item for item in range(r1, r2+1)]

In [4]:
# From Stack Exchange @RutgerH
# https://gis.stackexchange.com/questions/163685/reclassify-a-raster-value-to-9999-and-set-it-to-the-nodata-value-using-python-a
def readRaster(filename):
    filehandle = gdal.Open(filename)
    band1 = filehandle.GetRasterBand(1)
    geotransform = filehandle.GetGeoTransform()
    geoproj = filehandle.GetProjection()
    Z = band1.ReadAsArray()
    xsize = filehandle.RasterXSize
    ysize = filehandle.RasterYSize
    return xsize,ysize,geotransform,geoproj,Z

In [5]:
# Default arguments can be changed here, or can be specified below when running the functions.
def writeRaster(filename,geotransform,geoprojection,data, NoDataVal=0, dst_datatype=gdal.GDT_UInt32):
    (x,y) = data.shape
    Dformat = "GTiff"
    driver = gdal.GetDriverByName(Dformat)
    # you can change the dataformat but be sure to be able to store negative values including -9999
    dst_ds = driver.Create(filename,y,x,1,dst_datatype)
    dst_ds.GetRasterBand(1).WriteArray(data)
    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(geoprojection)
    dst_ds.GetRasterBand(1).SetNoDataValue(NoDataVal)
    return 1
    dst_ds = None

In [6]:
def calcShell(A, OutFile, Calculation, OutType = '', 
              B=None, C=None, D=None, E=None, F=None, G=None):
    """Raster math using gdal_calc.py.

    The OSgeo package for Python API does not make raster calculations
    easy outside of the shell. This function plugs up to 6 raster files
    into a string which subprocess.call() then commits to the terminal.

        A : str
            File path to the first raster for the calculation.
        B : str
            File path to the second raster for the calculation.
        OutFile : str
            File path where to store the raster generated from the calculation.
        Calculation : str
            Algebra that uses A and B to create a new raster. Use double quotes.
    """
    print('Running for %s. %s' % (A, time.ctime()))
    cmd = 'gdal_calc.py -A ' + A
    if B is not None:
        cmd = cmd + ' -B ' + B 
    if C is not None:
        cmd = cmd + ' -C ' + C 
    if D is not None:
        cmd = cmd + ' -D ' + D
    if E is not None:
        cmd = cmd + ' -E ' + E
    if F is not None:
        cmd = cmd + ' -F ' + F
    if G is not None:
        cmd = cmd + ' -G ' + G
    cmd = cmd + OutType + ' --outfile=' + OutFile + ' --overwrite --calc=' + Calculation
    subprocess.call(cmd, shell=True)
    cmd = A = B = C = D = E = F = G = None
    print('Ran in shell. See OutFile folder to inspect results. %s' % time.ctime())

In [7]:
def mosaicShell(A, B, OutFile, Band = 1, OutType = '',
                  C=None, D=None, E=None, F=None, G=None):
    print('Running for %s. %s' % (A, time.ctime()))
    
    StringFiles = ' '.join([A,B])
    
    for RasterName in [C,D,E,F,G]:
        if RasterName is not None:
            StringFiles = ' '.join([StringFiles, RasterName])
        else:
            pass
        
    cmd = 'gdal_merge.py -o ' + OutFile + OutType + ' -of gtiff ' + StringFiles
    
    subprocess.call(cmd, shell=True)
    print('Ran in shell. See OutFile folder to inspect results. %s' % time.ctime())

In [8]:
def RasterToShapefile(InRasterPath, OutFilePath = 'RastToShp.shp', Band=1, 
                      OutName='RastToShp', VariableName='value', Driver = 'ESRI Shapefile'):
    """Raster tiff to vector polygon shapefile.
    Can also be used for other file types like geopackage, but note that this code
    currently does not account for writing into an existing file. It will write over
    the file if specified as the file path.
    
    """
    Raster = gdal.Open(InRasterPath)
    RasterBand = Raster.GetRasterBand(Band)
    
    OutDriver = ogr.GetDriverByName(Driver)
    InProj = Raster.GetProjectionRef()
    SpatRef = osr.SpatialReference()
    SpatRef.ImportFromWkt(InProj)
    print(InProj, '\n\n', SpatRef)
    
    if exists(OutFilePath):
        OutFile = ogr.Open(OutFilePath)
    else:
        OutFile = OutDriver.CreateDataSource(OutFilePath)
    OutLayer = OutFile.CreateLayer(OutName, srs = SpatRef, geom_type = ogr.wkbPolygon)
    OutField = ogr.FieldDefn(VariableName, ogr.OFTInteger)
    OutLayer.CreateField(OutField)
    OutField = OutLayer.GetLayerDefn().GetFieldIndex(VariableName)
    print('\n', OutFile, '\n', OutLayer, '\n', OutField)
    
    print('Vectorizing. Input: %s. %s' % (InRasterPath, time.ctime()))
    gdal.Polygonize(RasterBand, None, OutLayer, 0, [], callback=None)
    print('Completed polygons. Stored as: %s. %s' % (OutFilePath, time.ctime()))

    del Raster, RasterBand, OutFile, OutLayer

In [9]:
def rioStats(InRasterPath, Band = 1):
    out = rasterio.open(InRasterPath)
    stats = []
    band = out.read(Band)
    stats.append({
        'raster': out.name,
        'bands': out.count,
        'data type': out.dtypes,
        'no data value': out.nodatavals,
        'width': out.width,
        'height': out.height,
        'min': band.min(),
        'mean': band.mean(),
        'median': np.median(band),
        'max': band.max()})
    print("\n", stats)
    
    out = band = None

In [10]:
def ShapeToRaster(Shapefile, ValueVar, MetaRasterPath, OutFilePath = 'ShpToRast.tif', Band=1, NewDType=None):
    """
    Polygon spatial object to raster tiff.
    """
    # Copy and update the metadata from another raster for the output
    MetaRaster = rasterio.open(MetaRasterPath)
    meta = MetaRaster.meta.copy()
    meta.update(compress='lzw')
    if NewDType is not None:
        meta.update(dtype=NewDType)
    MetaRaster.meta

    print("Rasterizing dataset. %s" % time.ctime())
    with rasterio.open(OutFilePath, 'w+', **meta) as out:
        out_arr = out.read(Band)

        # this is where we create a generator of geom, value pairs to use in rasterizing
        shapes = ((geom,value) for geom, value in zip(Shapefile.geometry, Shapefile[ValueVar]))

        burned = features.rasterize(shapes=shapes, fill=0, out=out_arr, transform=out.transform)
        out.write_band(1, burned)
    out = burned = shapes = None
    
    print("Finished rasterizing. Checking contents. %s" % time.ctime())
    rioStats(OutFilePath)

In [11]:
def MaskByZone(MaskPath, SourceFolder, DestFolder, SourceList = None,
               MaskLayerName = None, dstSRS = 'ESRI:102022'):
    """
    Reduces the size of a raster's valid data cells to vector areas of interest.
    This is useful if the raster data needs to be vectorized later to save space.
    
    The script prepares the vector zones as a list of geometries in the desired
    spatial reference system, then warps each raster in the specified source
    folder to the same SRS. Masking in rasterio then reclassifies any raster cells
    falling outside of a mask polygon as NoData.
    """
    
    ProjSRS = osr.SpatialReference()
    ProjSRS.SetFromUserInput(dstSRS)
    ProjWarp = gdal.WarpOptions(dstSRS = dstSRS)
    
    if SourceList is not None:
        SourceFiles = SourceList
    else:
        SourceFiles = []
        SourceFiles = SourceFiles + [i for i in os.listdir(''.join([SourceFolder, r'/'])) if i.endswith('tif')]
        print(SourceFiles)

    
    ### 1. ASSIGN SPATIAL REFERENCE SYSTEM OF VECTOR MASK AND LOAD GEOMETRIES
    Vector = gpd.read_file(filename=MaskPath, layer=MaskLayerName)
    if Vector.crs != dstSRS:
        if MaskLayerName == None:
            MaskPath = MaskPath + '_temp'
        else:
            MaskLayerName = MaskLayerName + '_temp'
        Vector.to_crs(dstSRS).to_file(filename=MaskPath, layer=MaskLayerName)
    Vector = None # We're reloading the geometries with fiona
    
    with fiona.open(MaskPath, mode="r", layer=MaskLayerName) as Vector:
        MaskGeom = [feature["geometry"] for feature in Vector] # Identify the bounding areas of the mask.
    
    
    ### 2. PREPARE DESTINATION FILES
    for FileName in SourceFiles:
    
        InputRasterPath = os.path.join(ProjectFolder, SourceFolder, FileName)
        
        Sensor = re.search('[A-Z]+_', FileName)
        if Sensor is None:
            Sensor = ''
        else:
            Sensor = Sensor.group(0)

        Year = re.search('\d{4}', FileName)
        if Year is None:
            Year = ''
        else:
            Year = Year.group(0)

        if FileName.endswith('avg.tif') == True:
            IndicType = '_avg'
        elif FileName.endswith('cfc.tif') == True:
            IndicType = '_cfc'
        else:
            IndicType = ''

        TempOutputName = 'Temp_' + Sensor + Year + IndicType + '.tif'
        TempOutputPath = os.path.join(ProjectFolder, DestFolder, TempOutputName)
        FinalOutputName = 'Msk_' + Sensor + Year + IndicType + '.tif'
        FinalOutputPath = os.path.join(ProjectFolder, DestFolder, FinalOutputName)

    ### 3. ASSIGN SPATIAL REFERENCE SYSTEM OF RASTER(S)
        InputRasterObject = gdal.Open(InputRasterPath)
        SourceSRS = osr.SpatialReference(wkt=InputRasterObject.GetProjection())
        print('Source projection: ', SourceSRS.GetAttrValue('projcs'))
        print('Destination projection: ', ProjSRS.GetAttrValue('projcs'))

        if SourceSRS.GetAttrValue('projcs') != ProjSRS.GetAttrValue('projcs'):
            Warp = gdal.Warp(TempOutputPath, # Where to store the warped raster
                         InputRasterObject, # Which raster to warp
                         format='GTiff', 
                         options=ProjWarp) # Reproject to Africa Albers Equal Area Conic
            print('Finished gdal.Warp() for %s. %s \n' % (FileName, time.ctime()))

            Warp = None # Close the files
        else:
            pass
        InputRasterObject = None
        
    ### 4. RECLASSIFY AS NODATA IF OUTSIDE OF SETTLEMENT BUFFER ZONE.
        if exists(TempOutputPath):
            NewInputPath = TempOutputPath 
            print("We warped the data, so we'll use that file for next step.")
        else:
            NewInputPath = InputRasterPath 
            print("We skipped the warp, so we continue to use the source file.")

        with rasterio.open(NewInputPath) as InputRasterObject:
            MaskedOutputRaster, OutTransform = rasterio.mask.mask(
                InputRasterObject, MaskGeom, crop=True) # Anything outside the mask is reclassed to the raster's NoData value.
            OutMetaData = InputRasterObject.meta.copy()
        print('Finished rasterio.mask.mask() for %s. %s \n' % (FileName, time.ctime()))

        OutMetaData.update({"driver": "GTiff",
                         "height": MaskedOutputRaster.shape[1],
                         "width": MaskedOutputRaster.shape[2],
                         "transform": OutTransform})

        with rasterio.open(FinalOutputPath, "w", **OutMetaData) as dest:
            dest.write(MaskedOutputRaster)
        print('Written to file. %s \n' % time.ctime())
        InputRasterObject = None

        if exists(TempOutputPath):
            try:  # Finally, remove the intermediate file from disk
                os.remove(TempOutputPath)
            except OSError:
                pass
            print('Removed intermediate file. %s \n' % time.ctime())
        else:
            pass


    print('\n \n Finished all years in list. %s' % time.ctime())

---

## 2. PREPARE DATA
All datasets are already projected to: Africa Albers Equal Area Conic

### 2.1 Merge and subset GRID3 Sahel datasets

In [16]:
fiona.listlayers('LZ.gpkg')

['GRID3_NER_rural',
 'GRID3_BFA_rural',
 'GRID3_MLI_rural',
 'GRID3_TCD_rural',
 'LZ_harmonized',
 'ADM3',
 'ADM2',
 'ADM1']

In [13]:
G3B, G3M, G3N, G3T = gpd.read_file('LZ.gpkg', layer=1), gpd.read_file('LZ.gpkg', layer=2), gpd.read_file('LZ.gpkg', layer=0), gpd.read_file('LZ.gpkg', layer=3)
G3B.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 614846 entries, 0 to 614845
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   OBJECTID    614846 non-null  int64   
 1   type        614846 non-null  object  
 2   population  614846 non-null  float64 
 3   pop_un_adj  614846 non-null  float64 
 4   geometry    614846 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 23.5+ MB


In [14]:
G3N.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 320968 entries, 0 to 320967
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   OBJECTID    320968 non-null  int64   
 1   type        320968 non-null  object  
 2   population  320968 non-null  float64 
 3   pop_un_adj  320968 non-null  float64 
 4   geometry    320968 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 12.2+ MB


In [15]:
G3list = [G3B, G3M, G3N, G3T]
G3 = pd.concat(G3list)
print(G3.info(), G3.head(10))

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1709337 entries, 0 to 353241
Data columns (total 5 columns):
 #   Column      Dtype   
---  ------      -----   
 0   OBJECTID    int64   
 1   type        object  
 2   population  float64 
 3   pop_un_adj  float64 
 4   geometry    geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 78.2+ MB
None    OBJECTID                   type   population   pop_un_adj  \
0        17  Small Settlement Area    69.158031    63.314946   
1        18  Small Settlement Area  1457.134585  1334.022913   
2        19  Small Settlement Area   132.393655   121.207863   
3        20  Small Settlement Area   151.230414   138.453126   
4        21  Small Settlement Area    71.164482    65.151876   
5        22  Small Settlement Area   229.821181   210.403846   
6        23  Small Settlement Area   170.399199   156.002360   
7        24  Small Settlement Area  1171.680225  1072.686279   
8        25  Small Settlement Area   369.2

In [17]:
G3[G3['type'].str.startswith("B")] # Double-checking that Built Up Area was removed from all files before the merge.

Unnamed: 0,OBJECTID,type,population,pop_un_adj,geometry


In [18]:
G3.to_file(driver='GPKG', filename='LZ.gpkg', layer='GRID3_Sahel_rural')

### 2.2 Merge and empty out WorldPop Sahel datasets for raster snapping

In [38]:
mosaicShell(A='bfa_ppp_2020_constrained.tif', B='mli_ppp_2020_UNadj_constrained.tif', 
           C='ner_ppp_2020_UNadj_constrained.tif', D='tcd_ppp_2020_UNadj_constrained.tif',
           OutFile='sahel_ppp_2020_UNadj_constrained.tif')

Running for bfa_ppp_2020_constrained.tif. Sun Apr 30 16:03:34 2023
Ran in shell. See OutFile folder to inspect results. Sun Apr 30 16:05:26 2023


In [54]:
rioStats('sahel_ppp_2020_UNadj_constrained.tif')


 [{'raster': 'sahel_ppp_2020_UNadj_constrained.tif', 'bands': 1, 'data type': ('float32',), 'no data value': (None,), 'width': 43495, 'height': 21070, 'min': 0.0, 'mean': 0.09134865, 'median': 0.0, 'max': 1414.3038}]


In [62]:
xsize,ysize,geotransform,geoproj,Z = readRaster('sahel_ppp_2020_UNadj_constrained.tif')

In [63]:
Z[Z!=0] = 0 # Not using the population data, so let's reclassify all values to the NoData value.

In [64]:
writeRaster('Empty_Sahel_100m.tif',geotransform,geoproj,Z, NoDataVal=0, dst_datatype=gdal.GDT_UInt32)

1

In [65]:
inRaster = gdal.Open('Empty_Sahel_100m.tif')
outPath = 'Empty_Sahel_100m_EqArea.tif'
warp = gdal.Warp(outPath,inRaster,dstSRS='ESRI:102022')
warp = None # Closes the files

### 2.3 Create numeric classes for livelihood zones

In [34]:
LZ = gpd.read_file('LZ.gpkg', layer='LZ_harmonized')
LZ.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   OBJECTID   63 non-null     int64   
 1   LZNAMEEN   63 non-null     object  
 2   LZNAMEFR   63 non-null     object  
 3   OECD_ZONE  63 non-null     object  
 4   geometry   63 non-null     geometry
dtypes: geometry(1), int64(1), object(3)
memory usage: 2.6+ KB


In [35]:
LZ['OECD_ZNUM'] = LZ.groupby('OECD_ZONE').grouper.group_info[0]+1
LZ.sample(20)

Unnamed: 0,OBJECTID,LZNAMEEN,LZNAMEFR,OECD_ZONE,geometry,OECD_ZNUM
50,14,Center-eastern Millet and Livestock,Centre-est mil et élevage,Agro-Pastoral,"MULTIPOLYGON (((-2940358.653 1787933.704, -294...",1
42,6,"Western Groundnut, Sorghum, and Maize","Ouest arachide, sorgho et maïs",Rainfed Agriculture,"MULTIPOLYGON (((-3721867.182 1657723.291, -372...",7
55,0,Eastern Rainfed Cereals and Market Gardening,Zone est: céréales pluviales et maraïchage,Rainfed Agriculture,"MULTIPOLYGON (((-450402.789 1403340.854, -4241...",7
7,50,"Southwest fruits, cotton, and cereals","Sud-ouest fruits, coton et céréales",Cash Crops and Rice,"MULTIPOLYGON (((-3156819.184 1344660.029, -315...",2
11,345,Rainfed Sorghum Belt,,Rainfed Agriculture,"MULTIPOLYGON (((-268959.162 1469573.378, -2689...",7
30,0,"North-west sorghum, maize, soybeans, and rice",,Irrigated and River Valley Agriculture,"MULTIPOLYGON (((-2222630.398 1373293.900, -222...",5
20,577,Not Zoned (Desert),Pas de Zone (Désert)\r\n),Desert,"MULTIPOLYGON (((-2170700.285 2128289.259, -217...",3
56,0,Western Agropastoral and Fishing,Zone ouest agropastorale et de pêche,Irrigated and River Valley Agriculture,"MULTIPOLYGON (((-1176415.584 1696549.161, -117...",5
46,10,Central Sorghum and Millet,Centre sorgho et mil,Rainfed Agriculture,"MULTIPOLYGON (((-3493051.086 1650491.189, -349...",7
3,46,North transhumant pastoralism and millet,Nord élevage transhumant et mil,Agro-Pastoral,"MULTIPOLYGON (((-2790099.917 1681066.838, -279...",1


## 3. RASTERIZE

In [70]:
ADM3, ADM2, ADM1 = gpd.read_file('LZ.gpkg', layer='ADM3'), gpd.read_file('LZ.gpkg', layer='ADM2'), gpd.read_file('LZ.gpkg', layer='ADM1')
ADM3['FID'], ADM2['FID'], ADM1['FID'] = ADM3.index, ADM2.index, ADM1.index
ADM3.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1433 entries, 0 to 1432
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   admin3Name  1433 non-null   object  
 1   admin3Pcod  1433 non-null   object  
 2   admin2Pcod  1433 non-null   object  
 3   admin1Pcod  1433 non-null   object  
 4   admin0Pcod  1433 non-null   object  
 5   ADM3_CODE   1433 non-null   object  
 6   ADM2_CODE   1433 non-null   object  
 7   ADM1_CODE   1433 non-null   object  
 8   geometry    1433 non-null   geometry
 9   FID         1433 non-null   int64   
dtypes: geometry(1), int64(1), object(8)
memory usage: 112.1+ KB


In [72]:
ShapeToRaster(ADM3, ValueVar='FID', MetaRasterPath='Empty_Sahel_100m_EqArea.tif', 
              OutFilePath='ADM3.tif', Band=1, NewDType='int32')

Rasterizing dataset. Sun Apr 30 17:41:15 2023
Finished rasterizing. Checking contents. Sun Apr 30 17:41:41 2023

 [{'raster': 'ADM3.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 531.3980212416258, 'median': 204.0, 'max': 1432}]

 [{'raster': 'ADM3.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 531.3980212416258, 'median': 204.0, 'max': 1432}]


In [73]:
ShapeToRaster(ADM2, ValueVar='FID', MetaRasterPath='Empty_Sahel_100m_EqArea.tif', 
              OutFilePath='ADM2.tif', Band=1, NewDType='int32')

Rasterizing dataset. Sun Apr 30 17:41:57 2023
Finished rasterizing. Checking contents. Sun Apr 30 17:42:20 2023

 [{'raster': 'ADM2.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 66.34791939605425, 'median': 25.0, 'max': 274}]

 [{'raster': 'ADM2.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 66.34791939605425, 'median': 25.0, 'max': 274}]


In [74]:
ShapeToRaster(ADM1, ValueVar='FID', MetaRasterPath='Empty_Sahel_100m_EqArea.tif', 
              OutFilePath='ADM1.tif', Band=1, NewDType='int32')

Rasterizing dataset. Sun Apr 30 17:42:37 2023
Finished rasterizing. Checking contents. Sun Apr 30 17:42:59 2023

 [{'raster': 'ADM1.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 13.607009924438062, 'median': 7.0, 'max': 52}]

 [{'raster': 'ADM1.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 13.607009924438062, 'median': 7.0, 'max': 52}]


In [75]:
ShapeToRaster(LZ, ValueVar='OECD_ZNUM', MetaRasterPath='Empty_Sahel_100m_EqArea.tif', 
              OutFilePath='LZ.tif', Band=1, NewDType='int32')

Rasterizing dataset. Sun Apr 30 17:43:42 2023
Finished rasterizing. Checking contents. Sun Apr 30 17:44:02 2023

 [{'raster': 'LZ.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 1.883359035411637, 'median': 1.0, 'max': 8}]

 [{'raster': 'LZ.tif', 'bands': 1, 'data type': ('int32',), 'no data value': (0.0,), 'width': 43003, 'height': 22754, 'min': 0, 'mean': 1.883359035411637, 'median': 1.0, 'max': 8}]


## 4. CONCATENATION

In [76]:
# Calculate number of digits for each dataset.
LZ_rio = rasterio.open('LZ.tif').read(1)
ADM1_rio = rasterio.open('ADM1.tif').read(1)
ADM2_rio = rasterio.open('ADM2.tif').read(1)
ADM3_rio = rasterio.open('ADM3.tif').read(1)

len_LZ = len(str(LZ_rio.max()))
len_ADM1 = len(str(ADM1_rio.max()))
len_ADM2 = len(str(ADM2_rio.max()))
len_ADM3 = len(str(ADM3_rio.max()))
print(len_LZ, len_ADM1, len_ADM2, len_ADM3)

LZ_rio = ADM1_rio = ADM2_rio = ADM3_rio = None

1 2 3 4


In [79]:
# Calculations
# The number of digits in the largest index value of Variable B becomes
# the number of zeroes we tack onto Variable A in the serial.

Calc = "(A*" + str(10**len_LZ) + ")+B" 

calcShell(A='ADM1.tif', B='LZ.tif', OutFile='ADM1_LZ.tif', Calculation=Calc)
calcShell(A='ADM2.tif', B='LZ.tif', OutFile='ADM2_LZ.tif', Calculation=Calc)
calcShell(A='ADM3.tif', B='LZ.tif', OutFile='ADM3_LZ.tif', Calculation=Calc)

Running for ADM1.tif. Sun Apr 30 18:25:06 2023
Ran in shell. See OutFile folder to inspect results. Sun Apr 30 18:26:40 2023
Running for ADM2.tif. Sun Apr 30 18:26:40 2023
Ran in shell. See OutFile folder to inspect results. Sun Apr 30 18:28:00 2023
Running for ADM3.tif. Sun Apr 30 18:28:00 2023
Ran in shell. See OutFile folder to inspect results. Sun Apr 30 18:29:40 2023


## 5. VECTOR JOINS

### 5.1 Polygonize admin and livelihood combos.

In [None]:
RasterToShapefile('ADM1_LZ.tif', OutFilePath='ADM1_LZ.shp', Driver='ESRI Shapefile')
RasterToShapefile('ADM2_LZ.tif', OutFilePath='ADM2_LZ.shp', Driver='ESRI Shapefile')
RasterToShapefile('ADM3_LZ.tif', OutFilePath='ADM3_LZ.shp', Driver='ESRI Shapefile')

In [12]:
ADM1_LZ = gpd.read_file('ADM1_LZ.shp')
ADM2_LZ = gpd.read_file('ADM2_LZ.shp')
ADM3_LZ = gpd.read_file('ADM3_LZ.shp')

print(ADM1_LZ.info(), 
      ADM2_LZ.info(), 
      ADM3_LZ.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3958 entries, 0 to 3957
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   value     3958 non-null   int64   
 1   geometry  3958 non-null   geometry
dtypes: geometry(1), int64(1)
memory usage: 62.0 KB
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 6349 entries, 0 to 6348
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   value     6349 non-null   int64   
 1   geometry  6349 non-null   geometry
dtypes: geometry(1), int64(1)
memory usage: 99.3 KB
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 9980 entries, 0 to 9979
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   value     9980 non-null   int64   
 1   geometry  9980 non-null   geometry
dtypes: geometry(1), int64(1)
memory usage: 156.1 KB
None None None


### 5.2 Split concatenated IDs

In [13]:
# # If starting from here...
# len_LZ = 1
# len_ADM1 = 2
# len_ADM2 = 3
# len_ADM3 = 4

In [14]:
ADM1_LZ['valstring'] = ADM1_LZ['value'].astype(str).str.zfill(3)
ADM1_LZ['ADM1_CODE'] = ADM1_LZ['valstring'].str[:-len_LZ].astype(int)
ADM1_LZ['LZ'] = ADM1_LZ['valstring'].str[-len_LZ:].astype(int)

ADM2_LZ['valstring'] = ADM2_LZ['value'].astype(str).str.zfill(4)
ADM2_LZ['ADM2_CODE'] = ADM2_LZ['valstring'].str[:-len_LZ].astype(int)
ADM2_LZ['LZ'] = ADM2_LZ['valstring'].str[-len_LZ:].astype(int)

ADM3_LZ['valstring'] = ADM3_LZ['value'].astype(str).str.zfill(5)
ADM3_LZ['ADM3_CODE'] = ADM3_LZ['valstring'].str[:-len_LZ].astype(int)
ADM3_LZ['LZ'] = ADM3_LZ['valstring'].str[-len_LZ:].astype(int)

print(ADM1_LZ.sample(5), ADM2_LZ.sample(5), ADM3_LZ.sample(5))

           value                                           geometry  \
1486         286  POLYGON ((-1429221.977 1864453.146, -1427542.0...   
1693         117  POLYGON ((-2792847.044 1598412.309, -2792670.2...   
1068 -2147483647  POLYGON ((-1162031.745 1757382.540, -1161943.3...   
1305         188  POLYGON ((-3037226.198 1683732.817, -3036960.9...   
1503         117  POLYGON ((-2812563.597 1627677.686, -2812386.7...   

        valstring  ADM1_CODE  LZ  
1486          286         28   6  
1693          117         11   7  
1068  -2147483647 -214748364   7  
1305          188         18   8  
1503          117         11   7              value                                           geometry  \
4699         452  POLYGON ((-3449063.425 1452350.674, -3448975.0...   
2505         537  POLYGON ((-3749320.880 1614769.091, -3749232.4...   
2724         127  POLYGON ((-2790990.328 1596113.519, -2790725.0...   
3152 -2147483647  POLYGON ((-2494181.060 1565610.332, -2494092.6...   
269     

### 5.3 Spatial join with GRID3 (rural only)

In [16]:
# # If starting from here...
# G3 = gpd.read_file(filename='LZ.gpkg', layer='GRID3_Sahel_rural')

In [18]:
G3_ADM = gpd.sjoin(G3, ADM1_LZ[['ADM1_CODE', 'LZ', 'geometry']], how='left')
print('Done with ADM1.')
G3_ADM.info()

Done with ADM1.
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1718863 entries, 0 to 1709336
Data columns (total 8 columns):
 #   Column       Dtype   
---  ------       -----   
 0   OBJECTID     int64   
 1   type         object  
 2   population   float64 
 3   pop_un_adj   float64 
 4   geometry     geometry
 5   index_right  int64   
 6   ADM1_CODE    int32   
 7   LZ           int32   
dtypes: float64(2), geometry(1), int32(2), int64(2), object(1)
memory usage: 104.9+ MB


In [19]:
G3_ADM.sample(20)

Unnamed: 0,OBJECTID,type,population,pop_un_adj,geometry,index_right,ADM1_CODE,LZ
813275,198611,Hamlet,2.441029,2.124485,"MULTIPOLYGON (((-3144295.559 1430155.467, -314...",3377,20,7
412843,413141,Hamlet,13.088088,11.98229,"MULTIPOLYGON (((-2789892.759 1499474.903, -278...",2892,9,7
574465,574830,Hamlet,13.84599,12.676158,"MULTIPOLYGON (((-2505768.921 1540631.298, -250...",2478,11,1
27127,27144,Hamlet,38.429897,35.182998,"MULTIPOLYGON (((-3043716.365 1248291.755, -304...",3940,2,2
1196631,161732,Hamlet,17.597507,17.768718,"MULTIPOLYGON (((-2531281.046 1684283.321, -253...",1681,27,1
333996,334233,Hamlet,10.873146,9.954486,"MULTIPOLYGON (((-2535768.293 1384897.771, -253...",3834,7,7
1426655,70714,Hamlet,8.13925,8.053419,"MULTIPOLYGON (((-819330.303 1291222.122, -8193...",3917,33,1
1083897,48821,Small Settlement Area,188.512478,190.34656,"MULTIPOLYGON (((-2478631.938 1569923.537, -247...",3289,27,7
108300,108398,Hamlet,7.509912,6.875408,"MULTIPOLYGON (((-2985693.089 1337880.288, -298...",3848,8,2
1412447,56495,Small Settlement Area,219.26664,216.954413,"MULTIPOLYGON (((-1042841.321 1169726.060, -104...",3945,40,2


In [17]:
G3_ADM = gpd.sjoin(G3_ADM, ADM2_LZ[['ADM2_CODE', 'LZ', 'geometry']], how='left')
print('Done with ADM2.')
G3_ADM = pd.DataFrame(gpd.sjoin(G3_ADM, ADM3_LZ[['ADM3_CODE', 'LZ', 'geometry']], how='left')).drop(columns='geometry')
G3_ADM.info()

Done with ADM1.


ValueError: 'index_left' and 'index_right' cannot be names in the frames being joined

In [None]:
Largest = LZ_ADM3_G3.loc[LZ_ADM3_G3.groupby(["ADM3"])["popUN"].idxmax()]