# Raster Match

By Cascade Tuholske, June 2020 <br>
1. Take MODIS 2015 [MGUM](https://doi.org/10.1016/j.jag.2020.102255) landcover data at 250m and resample it to 1-km 
2. Notebook reprojects popgrid rasters into the same CRS, size and projection so the stack.
3. Make urban and rural rasters for each gridded product

Updated CPT Sep 2020 to set all meta data to GPWv4 <br>
Replaces Raster Match NB Dec 2020<br>
For SMOD check Raster Match in old dir <br>

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio 
import rioxarray as rio
from glob import glob
from rasterio.enums import Resampling

In [2]:
# file paths 
data_in = '/Users/cascade/Github/PopGridCompare/data/raw/'
data_out = '/Users/cascade/Github/PopGridCompare/data/interim/'

# Step 1

In [3]:
def resample(fn_in, fn_out, scale_factor, method):
    
    """ Resamples a raster and save it out
    Args:
        fn_in = file path and name of tif input as str
        fn_out = file path and name of tif output as str 
        scale_factor = factor to up or down scale a pixel as float
        method = method to resample (rasterio object), see rasterio documentation
    """
    
    with rasterio.open(fn_in) as dataset:

        # resample data to target shape
        data = dataset.read(
            out_shape=(
                dataset.count,
                int(dataset.height * scale_factor),
                int(dataset.width * scale_factor)
            ),
            resampling=method
        )

        # scale image transform
        transform = dataset.transform * dataset.transform.scale(
            (dataset.width / data.shape[-1]),
            (dataset.height / data.shape[-2])
        )
    
    # meta data to write out
    out_meta = dataset.meta

    # Update meta data
    out_meta.update({"driver": "GTiff",
             "height": data.shape[1],
             "width": data.shape[2],
             "transform": transform})

    # write image 
    with rasterio.open(fn_out, "w", **out_meta) as dest:
        dest.write(data)

In [4]:
# FN
modis_in = data_in+'MGUP_annual_2001_2018/MGUP_2015.tif'
modis_out = data_out+'MGUP_2015-1km.tif'

In [5]:
resample(modis_in, modis_out, 0.5, Resampling.mode)

# Step 2

In [6]:
# FNs
wp_path = data_in+'WorldPop16/ppp_2016_1km_Aggregated.tif'
ls_path = data_in+'LandScan-Global-2015/lspop2015/w001001.adf'
esri_path = data_in+'ESRI_WPE_2016_Pop/WPE_1KM_2016_Pop.tif'
ghs_path = data_in+'GHS-Pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif'
gpw_path = data_in+'gpw_v4/gpw-v4-population-count-rev11_2015_30_sec_tif/gpw_v4_population_count_rev11_2015_30_sec.tif'
modis_path = data_out+'MGUP_2015-1km.tif'
# smod_path = data_out+'smod15-rural-urban.tif'

## Open files and check meta data

In [7]:
# smod15-rural-urban.tif is the GHSL-SMOD 2015 reclassified to urban/rural 
# smod : width = 36082 height = 18000 epsg = 4326
# smod = rio.open_rasterio(smod_path)
# smod

In [8]:
# MODIS2015_LCType2_1km-urban.tif is 2015 MODIS from GEE reclassified to urban/rural 
# modis : height: 31982: 80148, MODIS Sinusoidal
modis = rio.open_rasterio(modis_path)
modis

In [9]:
# GPW v4 : width = 43200 height = 21600 epsg = 4326
gpw = rio.open_rasterio(gpw_path)
gpw

In [10]:
# World Pop 2016 (?_): width = 43200 height = 18720 epsg = 4326
wp = rio.open_rasterio(wp_path)
wp

In [11]:
# ESRI 2016 : width = 40074 height = 14285 epsg = 4326
esri = rio.open_rasterio(esri_path)
esri

In [12]:
# LS 2015 : width = 43200 height = 21600 epsg = 4326
ls = rio.open_rasterio(ls_path)
ls

In [13]:
# GHS 2015 : width = 43200 height = 21600 epsg = 4326
ghs = rio.open_rasterio(ghs_path)
ghs

# Reset zero values

In [14]:
#print('smod na value is', smod.data[0][0][0])
print('modis na value is', modis.data[0][0][0])
print('GPW na value is', gpw.data[0][0][0])
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

modis na value is 0
GPW na value is -3.402823e+38
World Pop na value is -3.4028235e+38
LandScan na value is -2147483647
ESRI na value is -2147483647
GHS na value is -200.0


In [15]:
#### Try setting all NAs and 0s to Zero
# smod.data = np.where(smod.data < 1, 0, smod.data)
modis.data = np.where(modis.data < 1, 0, modis.data)
gpw.data = np.where(gpw.data < 1, 0, gpw.data)
wp.data = np.where(wp.data < 1, 0, wp.data)
ls.data = np.where(ls.data < 1, 0, ls.data)
esri.data = np.where(esri.data < 1, 0, esri.data)
ghs.data = np.where(ghs.data < 1, 0, ghs.data)


In [16]:
#print('smod na value is', smod.data[0][0][0])
print('modis na value is', modis.data[0][0][0])
print('GPW na value is', gpw.data[0][0][0])
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

modis na value is 0
GPW na value is 0.0
World Pop na value is 0.0
LandScan na value is 0
ESRI na value is 0
GHS na value is 0.0


## Matching and Reprojecting

trying: https://corteva.github.io/rioxarray/html/examples/reproject_match.html

In [17]:
# modis doesn't have an assigned fill value so we make one
modis.attrs['_FillValue'] = 0

In [18]:
# Check NA values
# print(smod.attrs['_FillValue'])
print(modis.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

0
-3.4028230607370965e+38
-2147483647.0
-2147483647.0
-3.4028234663852886e+38
-200.0


In [19]:
# Change NA values
na_val = 0
# smod.attrs['_FillValue'] = na_val
modis.attrs['_FillValue'] = na_val
gpw.attrs['_FillValue'] = na_val
esri.attrs['_FillValue'] = na_val
ls.attrs['_FillValue'] = na_val
wp.attrs['_FillValue'] = na_val
ghs.attrs['_FillValue'] = na_val

In [20]:
# Check NA values
#print(smod.attrs['_FillValue'])
print(modis.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

0
0
0
0
0
0


In [21]:
# Reproject all datasets to GPWv4
#smod_match = smod.rio.reproject_match(gpw)
modis_match = modis.rio.reproject_match(gpw)
ls_match = ls.rio.reproject_match(gpw)
esri_match = esri.rio.reproject_match(gpw)
wp_match = wp.rio.reproject_match(gpw)
ghs_match = ghs.rio.reproject_match(gpw)

In [22]:
# Check NA valuesb
#print(smod_match.attrs['_FillValue'])
print(modis_match.attrs['_FillValue'])
print(gpw.attrs['_FillValue'])
print(esri_match.attrs['_FillValue'])
print(ls_match.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs_match.attrs['_FillValue'])

0
0
0
0
0
0.0


In [23]:
# Check em
#smod_match
modis_match

In [24]:
gpw

In [25]:
ls_match

In [26]:
esri_match

In [27]:
wp_match

In [28]:
ghs_match

In [29]:
# Try writing them and then checking them
modis_match.rio.to_raster(data_out+'MGUP15_2015-km-matched.tif') #named different for easy of use later
gpw.rio.to_raster(data_out+'GPWv4_matched.tif')
esri_match.rio.to_raster(data_out+'ESRI16_matched.tif')
ls_match.rio.to_raster(data_out+'LS15_matched.tif')
wp_match.rio.to_raster(data_out+'WP16_matched.tif')
ghs_match.rio.to_raster(data_out+'GHS15_matched.tif')

## Step 3

In [30]:
def raster_mask(rst_fn, urban_arr, rural_arr, data_out):
    """ Writes out masked rural and urban populations from pop rasters
    Args:
        rst_nm = pop raster file name + path
        urban_fn = smod urban raster name + path 
        rural_fn = smod rural raster name + path
        data_out = path to write out new rsts
    """
    
    # split for naming
    rst_nm = rst_fn.split('interim/')[1].split('.tif')[0]
    print(rst_nm)
    
    # open pop rasters and get array
    arr = rasterio.open(rst_fn).read(1)
    
    # multiply
    arr_urban = arr * urban_arr
    arr_rural = arr * rural_arr

    # meta data
    meta = rasterio.open(rst_fn).meta

    # write out urban
    meta['dtype'] = arr_urban.dtype
    out_fn = data_out+rst_nm+'_urban.tif'
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, arr_urban)
    
    # write out rural 
    meta['dtype'] = arr_rural.dtype
    out_fn = data_out+rst_nm+'_rural.tif'
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, arr_rural)
    print('done \n')

In [32]:
# Make a rural raster (MGUP value == 0)
value = 0
urban_rural = modis_match.copy()
urban_rural.data = np.where(urban_rural.data == value, 1, urban_rural.data)
urban_rural.data = np.where(urban_rural.data != 1, 0, urban_rural.data)
urban_rural.rio.to_raster(data_out+'MGUP15-rural-matched.tif')

In [33]:
# Make a urban raster (MGUP value == 1)
value = 1
urban_rural = modis_match.copy()
urban_rural.data = np.where(urban_rural.data == value, 1, urban_rural.data)
urban_rural.data = np.where(urban_rural.data != 1, 0, urban_rural.data)
urban_rural.rio.to_raster(data_out+'MGUP15-urban-matched.tif')

In [40]:
# Git matched tif files, drop MGUP
rst_fns = glob('/Users/cascade/Github/PopGridCompare/data/interim/*_matched.tif')
rst_fns

['/Users/cascade/Github/PopGridCompare/data/interim/GHS15_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/LS15_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/WP16_matched.tif']

In [41]:
# # Get modis urban and rural data

modis_urban_fn = '/Users/cascade/Github/PopGridCompare/data/interim/MGUP15-urban-matched.tif'
modis_rural_fn = '/Users/cascade/Github/PopGridCompare/data/interim/MGUP15-rural-matched.tif'

modis_urban = rasterio.open(modis_urban_fn).read(1)
modis_rural = rasterio.open(modis_rural_fn).read(1)

In [42]:
# # Run it
data_out = '/Users/cascade/Github/PopGridCompare/data/interim/'
for rst_fn in rst_fns:
    raster_mask(rst_fn, modis_urban, modis_rural, data_out)

GHS15_matched
done 

LS15_matched
done 

GPWv4_matched
done 

ESRI16_matched
done 

WP16_matched
done 

