# Raster Match

By Cascade Tuholske, June 2020
Notebook reprojects popgrid rasters into the same CRS, size and projection so the stack. <br>

Updated CPT Sep 2020 to set all meta data to GPWv4

In [2]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio 
import rioxarray as rio
from glob import glob

In [3]:
# File paths and FN
data_in = '/Users/cascade/Github/PopGridCompare/data/raw/'
data_out = '/Users/cascade/Github/PopGridCompare/data/interim/'

wp_path = data_in+'WorldPop16/ppp_2016_1km_Aggregated.tif'
ls_path = data_in+'LandScan-Global-2015/lspop2015/w001001.adf'
esri_path = data_in+'ESRI_WPE_2016_Pop/WPE_1KM_2016_Pop.tif'
ghs_path = data_in+'GHS-Pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif'
gpw_path = data_in+'gpw_v4/gpw-v4-population-count-rev11_2015_30_sec_tif/gpw_v4_population_count_rev11_2015_30_sec.tif'
modis_path = data_out+'MODIS2015_LCType2_1km-urban.tif'
# smod_path = data_out+'smod15-rural-urban.tif'

## Open files and check meta data

In [None]:
# smod15-rural-urban.tif is the GHSL-SMOD 2015 reclassified to urban/rural 
# smod : width = 36082 height = 18000 epsg = 4326
# smod = rio.open_rasterio(smod_path)
# smod

In [5]:
# MODIS2015_LCType2_1km-urban.tif is 2015 MODIS from GEE reclassified to urban/rural 
# modis : height: 17590, width: 40031, MODIS Sinusoidal
modis = rio.open_rasterio(modis_path)
modis

<xarray.DataArray (band: 1, y: 17590, x: 40031)>
[704145290 values with dtype=uint8]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 9.672e+06 9.671e+06 ... -7.916e+06 -7.917e+06
  * x            (x) float64 -2.001e+07 -2.001e+07 ... 2.001e+07 2.002e+07
    spatial_ref  int64 0
Attributes:
    transform:     (1000.0000000010791, 0.0, -20015109.354, 0.0, -999.9999999...
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref

In [6]:
# GPW v4 : width = 43200 height = 21600 epsg = 4326
gpw = rio.open_rasterio(gpw_path)
gpw

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
[933120000 values with dtype=float32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    RepresentationType:  ATHEMATIC
    transform:           (0.00833333333333333, 0.0, -180.0, 0.0, -0.008333333...
    _FillValue:          -3.4028230607370965e+38
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref

In [7]:
# World Pop 2016 (?_): width = 43200 height = 18720 epsg = 4326
wp = rio.open_rasterio(wp_path)
wp

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
[808704000 values with dtype=float32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    transform:     (0.0083333333, 0.0, -180.001249265, 0.0, -0.0083333333, 83...
    _FillValue:    -3.4028234663852886e+38
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref

In [8]:
# ESRI 2016 : width = 40074 height = 14285 epsg = 4326
esri = rio.open_rasterio(esri_path)
esri

<xarray.DataArray (band: 1, y: 14285, x: 40074)>
[572457090 values with dtype=int32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 73.32 73.31 73.3 73.29 ... -54.98 -54.99 -55.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:      336720
    STATISTICS_MEAN:         401.47917136019
    STATISTICS_MINIMUM:      1
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       1594.7670745165
    transform:               (0.008983196, 0.0, -179.99191516916386, 0.0, -0....
    _FillValue:              -2147483647.0
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref

In [9]:
# LS 2015 : width = 43200 height = 21600 epsg = 4326
ls = rio.open_rasterio(ls_path)
ls

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
[933120000 values with dtype=int32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:  38537
    STATISTICS_MEAN:     16.167777587702
    STATISTICS_MINIMUM:  0
    STATISTICS_STDDEV:   233.963508769
    transform:           (0.0083333333333333, 0.0, -180.0, 0.0, -0.0083333333...
    _FillValue:          -2147483647.0
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref

In [10]:
# GHS 2015 : width = 43200 height = 21600 epsg = 4326
ghs = rio.open_rasterio(ghs_path)
ghs

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
[933120000 values with dtype=float64]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_COVARIANCES:  268645.1471199603
    STATISTICS_MAXIMUM:      459434.61914063
    STATISTICS_MEAN:         32.932334417347
    STATISTICS_MINIMUM:      0
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       518.30989486982
    transform:               (0.008333333333333333, 0.0, -180.0, 0.0, -0.0083...
    _FillValue:              -200.0
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref

# Reset zero values

In [11]:
#print('smod na value is', smod.data[0][0][0])
print('modis na value is', modis.data[0][0][0])
print('GPW na value is', gpw.data[0][0][0])
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

modis na value is 0
GPW na value is -3.402823e+38
World Pop na value is -3.4028235e+38
LandScan na value is -2147483647
ESRI na value is -2147483647
GHS na value is -200.0


In [13]:
#### Try setting all NAs and 0s to Zero
# smod.data = np.where(smod.data < 1, 0, smod.data)
modis.data = np.where(modis.data < 1, 0, modis.data)
gpw.data = np.where(gpw.data < 1, 0, gpw.data)
wp.data = np.where(wp.data < 1, 0, wp.data)
ls.data = np.where(ls.data < 1, 0, ls.data)
esri.data = np.where(esri.data < 1, 0, esri.data)
ghs.data = np.where(ghs.data < 1, 0, ghs.data)


In [14]:
#print('smod na value is', smod.data[0][0][0])
print('modis na value is', modis.data[0][0][0])
print('GPW na value is', gpw.data[0][0][0])
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

modis na value is 0
GPW na value is 0.0
World Pop na value is 0.0
LandScan na value is 0
ESRI na value is 0
GHS na value is 0.0


## Test Matching and Reprojecting

trying: https://corteva.github.io/rioxarray/html/examples/reproject_match.html

In [30]:
# modis doesn't have an assigned fill value so we make one
modis.attrs['_FillValue'] = 0

In [31]:
# Check NA values
# print(smod.attrs['_FillValue'])
print(modis.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

0
-3.4028230607370965e+38
-2147483647.0
-2147483647.0
-3.4028234663852886e+38
-200.0


In [32]:
# Change NA values
na_val = 0
# smod.attrs['_FillValue'] = na_val
modis.attrs['_FillValue'] = na_val
gpw.attrs['_FillValue'] = na_val
esri.attrs['_FillValue'] = na_val
ls.attrs['_FillValue'] = na_val
wp.attrs['_FillValue'] = na_val
ghs.attrs['_FillValue'] = na_val

In [33]:
# Check NA values
#print(smod.attrs['_FillValue'])
print(modis.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

0
0
0
0
0
0


In [34]:
# Reproject all datasets to GPWv4
#smod_match = smod.rio.reproject_match(gpw)
modis_match = modis.rio.reproject_match(gpw)
ls_match = ls.rio.reproject_match(gpw)
esri_match = esri.rio.reproject_match(gpw)
wp_match = wp.rio.reproject_match(gpw)
ghs_match = ghs.rio.reproject_match(gpw)

In [35]:
# Check NA valuesb
#print(smod_match.attrs['_FillValue'])
print(modis_match.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri_match.attrs['_FillValue'])
print(ls_match.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs_match.attrs['_FillValue'])

0
0
0
0
0
0


In [36]:
# Check em
#smod_match
modis_match

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0],
        ...,
        [0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0]]], dtype=uint8)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    transform:     (0.00833333333333333, 0.0, -180.0, 0.0, -0.008333333333333...
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref
    _FillValue:    0

In [37]:
gpw

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]], dtype=float32)
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    RepresentationType:  ATHEMATIC
    transform:           (0.00833333333333333, 0.0, -180.0, 0.0, -0.008333333...
    _FillValue:          0
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref

In [38]:
ls_match

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0],
        ...,
        [0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0]]], dtype=int32)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:  38537
    STATISTICS_MEAN:     16.167777587702
    STATISTICS_MINIMUM:  0
    STATISTICS_STDDEV:   233.963508769
    transform:           (0.00833333333333333, 0.0, -180.0, 0.0, -0.008333333...
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref
    _FillValue:          0

In [39]:
esri_match

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0],
        ...,
        [0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0]]], dtype=int32)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:      336720
    STATISTICS_MEAN:         401.47917136019
    STATISTICS_MINIMUM:      1
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       1594.7670745165
    transform:               (0.00833333333333333, 0.0, -180.0, 0.0, -0.00833...
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref
    _FillValue:              0

In [40]:
wp_match

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]], dtype=float32)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    transform:     (0.00833333333333333, 0.0, -180.0, 0.0, -0.008333333333333...
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref
    _FillValue:    0

In [41]:
ghs_match

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]])
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_COVARIANCES:  268645.1471199603
    STATISTICS_MAXIMUM:      459434.61914063
    STATISTICS_MEAN:         32.932334417347
    STATISTICS_MINIMUM:      0
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       518.30989486982
    transform:               (0.00833333333333333, 0.0, -180.0, 0.0, -0.00833...
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref
    _FillValue:              0

In [43]:
# Try writing them and then checking them
modis.rio.to_raster(data_out+'modis15-rural-urban-matched.tif') #named different for easy of use later
gpw.rio.to_raster(data_out+'GPWv4_matched.tif')
esri_match.rio.to_raster(data_out+'ESRI16_matched.tif')
ls_match.rio.to_raster(data_out+'LS15_matched.tif')
wp_match.rio.to_raster(data_out+'WP16_matched.tif')
ghs_match.rio.to_raster(data_out+'GHS15_matched.tif')

# Urban / Rural SMOD
ocean = 10, rural = 111, urban = 222 <br>
Mask urban and rural populations from gridded pop rasters

In [None]:
# Make a rural raster
value = 111
urban_rural = smod_match.copy()
urban_rural.data = np.where(urban_rural.data == value, 1, urban_rural.data)
urban_rural.data = np.where(urban_rural.data != 1, 0, urban_rural.data)
urban_rural.rio.to_raster(data_out+'smod15-rural-matched.tif')

In [None]:
# Make a urban raster
value = 222
urban_rural = smod_match.copy()
urban_rural.data = np.where(urban_rural.data == value, 1, urban_rural.data)
urban_rural.data = np.where(urban_rural.data != 1, 0, urban_rural.data)
urban_rural.rio.to_raster(data_out+'smod15-urban-matched.tif')

In [None]:
# Git matched tif files
rst_fns = glob('/Users/cascade/Github/PopGridCompare/data/interim/*_matched.tif')
rst_fns

In [None]:
def raster_mask(rst_fn, urban_arr, rural_arr, data_out):
    """ Writes out masked rural and urban populations from pop rasters
    Args:
        rst_nm = pop raster file name + path
        urban_fn = smod urban raster name + path 
        rural_fn = smod rural raster name + path
        data_out = path to write out new rsts
    """
    
    # split for naming
    rst_nm = rst_fn.split('interim/')[1].split('.tif')[0]
    print(rst_nm)
    
    # open pop rasters and get array
    arr = rasterio.open(rst_fn).read(1)
    
    # multiply
    arr_urban = arr * urban_arr
    arr_rural = arr * rural_arr

    # meta data
    meta = rasterio.open(rst_fn).meta

    # write out urban
    meta['dtype'] = arr_urban.dtype
    out_fn = data_out+rst_nm+'_urban.tif'
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, arr_urban)
    
    # write out rural 
    meta['dtype'] = arr_rural.dtype
    out_fn = data_out+rst_nm+'_rural.tif'
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, arr_rural)
    print('done \n')

In [None]:
# Get smod urban and rural data

smod_urban_fn = '/Users/cascade/Github/PopGridCompare/data/interim/smod15-urban-matched.tif'
smod_rural_fn = '/Users/cascade/Github/PopGridCompare/data/interim/smod15-rural-matched.tif'

smod_urban = rasterio.open(smod_urban_fn).read(1)
smod_rural = rasterio.open(smod_rural_fn).read(1)

In [None]:
# Run it
data_out = '/Users/cascade/Github/PopGridCompare/data/interim/'
for rst_fn in rst_fns:
    raster_mask(rst_fn, smod_urban, smod_rural, data_out)