# Pop Grid Compare

Goal of this notebook is to load all availabe popgrid date products with the same spatial resolution, projections, and CRS, stack them, and indentify areas of geographic variance.

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio 
import rioxarray as rio

In [2]:
# File paths and FN
data_in = '/Users/cascade/Github/PopGridCompare/data/raw/'
data_out = '/Users/cascade/Github/PopGridCompare/data/interim/'

wp_path = data_in+'WorldPop16/ppp_2016_1km_Aggregated.tif'
ls_path = data_in+'LandScan-Global-2015/lspop2015/w001001.adf'
esri_path = data_in+'ESRI_WPE_2016_Pop/WPE_1KM_2016_Pop.tif'
ghs_path = data_in+'GHS-Pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif'

## Open files and check meta data

In [3]:
# World Pop 2016 (?_): width = 43200 height = 18720 epsg = 4326
wp = rio.open_rasterio(wp_path)
wp

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
[808704000 values with dtype=float32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    transform:     (0.0083333333, 0.0, -180.001249265, 0.0, -0.0083333333, 83...
    _FillValue:    -3.4028234663852886e+38
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref

In [4]:
# ESRI 2016 : width = 40074 height = 14285 epsg = 4326
esri = rio.open_rasterio(esri_path)
esri

<xarray.DataArray (band: 1, y: 14285, x: 40074)>
[572457090 values with dtype=int32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 73.32 73.31 73.3 73.29 ... -54.98 -54.99 -55.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:      336720
    STATISTICS_MEAN:         401.47917136019
    STATISTICS_MINIMUM:      1
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       1594.7670745165
    transform:               (0.008983196, 0.0, -179.99191516916386, 0.0, -0....
    _FillValue:              -2147483647.0
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref

In [5]:
# LS 2015 : width = 43200 height = 21600 epsg = 4326
ls = rio.open_rasterio(ls_path)
ls


<xarray.DataArray (band: 1, y: 21600, x: 43200)>
[933120000 values with dtype=int32]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:  38537
    STATISTICS_MEAN:     16.167777587702
    STATISTICS_MINIMUM:  0
    STATISTICS_STDDEV:   233.963508769
    transform:           (0.0083333333333333, 0.0, -180.0, 0.0, -0.0083333333...
    _FillValue:          -2147483647.0
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref

In [6]:
# GHS 2015 : width = 43200 height = 21600 epsg = 4326
ghs = rio.open_rasterio(ghs_path)
ghs

<xarray.DataArray (band: 1, y: 21600, x: 43200)>
[933120000 values with dtype=float64]
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 90.0 89.99 89.98 89.97 ... -89.98 -89.99 -90.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    STATISTICS_COVARIANCES:  268645.1471199603
    STATISTICS_MAXIMUM:      459434.61914063
    STATISTICS_MEAN:         32.932334417347
    STATISTICS_MINIMUM:      0
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       518.30989486982
    transform:               (0.008333333333333333, 0.0, -180.0, 0.0, -0.0083...
    _FillValue:              -200.0
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref

# Reset zero values

In [7]:
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

World Pop na value is -3.4028235e+38
LandScan na value is -2147483647
ESRI na value is -2147483647
GHS na value ie -200.0


In [8]:
#### Try setting all NAs and 0s to Zero
wp.data = np.where(wp.data < 1, 0, wp.data)
ls.data = np.where(ls.data < 1, 0, ls.data)
esri.data = np.where(esri.data < 1, 0, esri.data)
ghs.data = np.where(ghs.data < 1, 0, ghs.data)


In [9]:
print('World Pop na value is', wp.data[0][0][0])
print('LandScan na value is', ls.data[0][0][0])
print('ESRI na value is', esri.data[0][0][0])
print('GHS na value is', ghs.data[0][0][0])

World Pop na value is 0.0
LandScan na value is 0
ESRI na value is 0
GHS na value is 0.0


## Test Matching and Reprojecting

trying: https://corteva.github.io/rioxarray/html/examples/reproject_match.html

In [10]:
# Check NA values
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

-2147483647.0
-2147483647.0
-3.4028234663852886e+38
-200.0


In [11]:
# Change NA values
na_val = 0
esri.attrs['_FillValue'] = na_val
ls.attrs['_FillValue'] = na_val
wp.attrs['_FillValue'] = na_val
ghs.attrs['_FillValue'] = na_val

In [12]:
# Check NA values
print(esri.attrs['_FillValue'])
print(ls.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs.attrs['_FillValue'])

0
0
0
0


In [14]:
# Match Landscan/ESRI to WorldPop write it out

ls_match = ls.rio.reproject_match(wp)
esri_match = esri.rio.reproject_match(wp)


In [24]:
# Match GHS to Worldpop
ghs_match = ghs.rio.reproject_match(wp)

In [26]:
# Check NA valuesb
print(esri_match.attrs['_FillValue'])
print(ls_match.attrs['_FillValue'])
print(wp.attrs['_FillValue'])
print(ghs_match.attrs['_FillValue'])

0
0
0
0


In [27]:
# Check em
ls_match

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
array([[[0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0],
        ...,
        [0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0]]], dtype=int32)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:  38537
    STATISTICS_MEAN:     16.167777587702
    STATISTICS_MINIMUM:  0
    STATISTICS_STDDEV:   233.963508769
    transform:           (0.0083333333, 0.0, -180.001249265, 0.0, -0.00833333...
    scale_factor:        1.0
    add_offset:          0.0
    grid_mapping:        spatial_ref
    _FillValue:          0

In [28]:
esri_match

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
array([[[0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0],
        ...,
        [0, 0, ..., 0, 0],
        [0, 0, ..., 0, 0]]], dtype=int32)
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_MAXIMUM:      336720
    STATISTICS_MEAN:         401.47917136019
    STATISTICS_MINIMUM:      1
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       1594.7670745165
    transform:               (0.0083333333, 0.0, -180.001249265, 0.0, -0.0083...
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref
    _FillValue:              0

In [29]:
wp

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]], dtype=float32)
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
    spatial_ref  int64 0
Attributes:
    transform:     (0.0083333333, 0.0, -180.001249265, 0.0, -0.0083333333, 83...
    _FillValue:    0
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref

In [30]:
ghs_match

<xarray.DataArray (band: 1, y: 18720, x: 43200)>
array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.],
        ...,
        [0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]])
Coordinates:
  * x            (x) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0
  * y            (y) float64 84.0 83.99 83.98 83.97 ... -71.98 -71.99 -72.0
  * band         (band) int64 1
    spatial_ref  int64 0
Attributes:
    STATISTICS_COVARIANCES:  268645.1471199603
    STATISTICS_MAXIMUM:      459434.61914063
    STATISTICS_MEAN:         32.932334417347
    STATISTICS_MINIMUM:      0
    STATISTICS_SKIPFACTORX:  1
    STATISTICS_SKIPFACTORY:  1
    STATISTICS_STDDEV:       518.30989486982
    transform:               (0.0083333333, 0.0, -180.001249265, 0.0, -0.0083...
    scale_factor:            1.0
    add_offset:              0.0
    grid_mapping:            spatial_ref
    _FillValue:              0

In [31]:
# Try writing them and then checking them
esri_match.rio.to_raster(data_out+'ESRI16_matched.tif')
ls_match.rio.to_raster(data_out+'LS15_matched.tif')



In [32]:
wp.rio.to_raster(data_out+'WP16_matched.tif')

In [33]:
ghs_match.rio.to_raster(data_out+'GHS15_match.tif')

## Subtract 'EM

In [None]:
# This produces strange results

wp_ls = wp - ls_match
wp_esri = wp - esri_match
ls_esri = ls_match - esri_match

In [None]:
# write them to files
wp_ls.rio.to_raster(data_out+'wp_ls.tif')
wp_esri.rio.to_raster(data_out+'wp_esri.tif')
ls_esri.rio.to_raster(data_out+'ls_esri.tif')

In [None]:
rgb_array = xr.concat([wp, esri_match, ls_match], dim = 'band')

In [None]:
#rgb_array.rio.to_raster(data_out+'all_rgb.tif')