# Other
Notebook to dig up stats for the MS <br>
Cascade Tuholske, Feb 2021

In [70]:
# Dependencies
import geopandas as gpd
import pandas as np
import numpy as np
import glob
import rasterio
import matplotlib.pyplot as plt

### Admin Units

In [4]:
print('How many admin units are there?')
df_in = '/Users/cascade/Github/PopGridCompare/data/raw/GPWv4-boundaries/gwpv4_zwe_admin2.shp'
df = gpd.read_file(df_in)
print('For ZWE', len(df))

How many admin units are there?
For ZWE 91


In [5]:
print('How many admin units are there?')
df_in = '/Users/cascade/Github/PopGridCompare/data/raw/GPWv4-boundaries/gwpv4_npl_admin4.shp'
df = gpd.read_file(df_in)
print('For NPL', len(df))

How many admin units are there?
For NPL 3990


In [6]:
print('How many admin units are there?')
df_in = '/Users/cascade/Github/PopGridCompare/data/raw/GPWv4-boundaries/gwpv4_mwi_admin3.shp'
df = gpd.read_file(df_in)
print('For mwi', len(df))

How many admin units are there?
For mwi 12647


In [8]:
print('How many admin units are there?')
df_in = '/Users/cascade/Github/PopGridCompare/data/raw/GPWv4-boundaries/gwpv4_moz_admin3.shp'
df = gpd.read_file(df_in)
print('For moz', len(df))

How many admin units are there?
For moz 413


In [9]:
print('How many admin units are there?')
df_in = '/Users/cascade/Github/PopGridCompare/data/raw/GPWv4-boundaries/gwpv4_ecu_admin3.shp'
df = gpd.read_file(df_in)
print('For ecu', len(df))

How many admin units are there?
For ecu 1047


## Counts

In [102]:
PATH = '/Users/cascade/Github/PopGridCompare/data/interim/'

In [105]:
rst_list = glob.glob(PATH+'agg_*.tif')

In [129]:
# How many pixels are occupied 
for fn in rst_list:
    print(fn.split('interim/')[1])
    data = rasterio.open(fn).read(1)
    tot = len(data.flatten())
    print('tot', tot)
    print('counts', np.unique(data, return_counts=True))
    count = np.unique(data, return_counts=True)[1]
    agree = count[0] + count[4]
    print('agree',agree)
    pct = agree / tot * 100
    print('Pct agree', pct, '\n')

agg_MMZ.tif
tot 3937046
counts (array([0, 1, 2, 3, 4, 5], dtype=int32), array([2418131,   67470,  148693,  984142,  247488,   71122]))
agree 2665619
Pct agree 67.7060669344478 

agg_NPL.tif
tot 482154
counts (array([0, 1, 2, 3, 4, 5], dtype=int32), array([300927,   7734,  14501,  27775,  64255,  66962]))
agree 365182
Pct agree 75.73970142319673 

agg_ECU.tif
tot 550046
counts (array([0, 1, 2, 3, 4, 5], dtype=int32), array([295855,  20371,  43893, 132826,  46933,  10168]))
agree 342788
Pct agree 62.3198787010541 



## Range Outliers

In [131]:
def open_rst(rst_fn):
    "function opens a raster and makes it into a xarray object"
    
    rst = xr.DataArray(rasterio.open(rst_fn).read(1), dims = ['y', 'x']) # y and x are our 2-d labels
    
    return rst

In [132]:
def rst_range(rst_fns, fn_out = None, save = False):
    """ Calc pixel-level range on stacked arrays and writes it out as a raster, returns the range, max and min arrs
    fst_fns = list of rasters 
    fn_out = file name and path to write out range raster
    save = if save == true, will write raster, default = False
    
    """
    
    arr_list = [] # empty list
    
    # open and stack arrays
    for rst_fn in rst_fns:
        
      
        rst_arr = rasterio.open(rst_fn).read(1)
        arr_list.append(rst_arr)
    
    arr3d = np.dstack([arr_list])
        
    # mask zeros
    arr3d[arr3d == -999] = 0 # Set NAN to zeros
        
    # Get range
    arr_min = np.amin(arr3d, axis = 0)
    arr_max = np.amax(arr3d, axis = 0)
    arr_range = arr_max - arr_min
    arr_out = arr_range.data
    
    # get meta data & write raster 
    out_meta = rasterio.open(rst_fns[0]).meta
    out_meta.update({'dtype' : 'float64'})
    
    if save == True: 
        with rasterio.open(fn_out, "w", **out_meta) as dest:
            dest.write(arr_out, 1)

    return arr_range, arr_max, arr_min

### NPL

In [133]:
# File Paths & FNs --- Update by geography urban/rural for MMZ, ECU, NPL
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

# args
country = 'rural_NPL'

# get rst names
rst_fns = glob.glob('/Users/cascade/Github/PopGridCompare/data/interim/*'+country+'.tif')
for rst in rst_fns: print(rst) # check them
    
# make data
data_range, data_max, data_min = rst_range(rst_fns, fn_out = None, save = False)

/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_NPL.tif


In [148]:
# date range set 0s
range_zeros = np.where(data_range == 0, -999, data_range)
range_zeros50K = range_zeros > 50000
range_zeros50K = range_zeros50K.astype(int)
min1000 = (data_min > 0) & (data_min <= 1000)
min1000 = min1000.astype(int)
print(country)
print('How many min 1000 have range over 50000?', np.unique(min1000 * range_zeros50K, return_counts = True))

rural_MMZ
How many min 1000 have range over 5000? (array([0, 1]), array([3937019,      27]))


### MMZ

In [149]:
# File Paths & FNs --- Update by geography urban/rural for MMZ, ECU, NPL
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

# args
country = 'rural_MMZ'

# get rst names
rst_fns = glob.glob('/Users/cascade/Github/PopGridCompare/data/interim/*'+country+'.tif')
for rst in rst_fns: print(rst) # check them
    
# make data
data_range, data_max, data_min = rst_range(rst_fns, fn_out = None, save = False)

# date range set 0s
range_zeros = np.where(data_range == 0, -999, data_range)
range_zeros50K = range_zeros > 50000
range_zeros50K = range_zeros50K.astype(int)
min1000 = (data_min > 0) & (data_min <= 1000)
min1000 = min1000.astype(int)
print(country)
print('How many min 1000 have range over 50000?', np.unique(min1000 * range_zeros50K, return_counts = True))

/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_MMZ.tif
/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_MMZ.tif
/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_MMZ.tif
/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_MMZ.tif
/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_MMZ.tif
rural_MMZ
How many min 1000 have range over 50000? (array([0, 1]), array([3937019,      27]))


### ECU

In [151]:
# File Paths & FNs --- Update by geography urban/rural for MMZ, ECU, NPL
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

# args
country = 'rural_ECU'

# get rst names
rst_fns = glob.glob('/Users/cascade/Github/PopGridCompare/data/interim/*'+country+'.tif')
for rst in rst_fns: print(rst) # check them
    
# make data
data_range, data_max, data_min = rst_range(rst_fns, fn_out = None, save = False)

# date range set 0s
range_zeros = np.where(data_range == 0, -999, data_range)
range_zeros50K = range_zeros > 25000
range_zeros50K = range_zeros50K.astype(int)
min1000 = (data_min > 0) & (data_min <= 1000)
min1000 = min1000.astype(int)
print(country)
print('How many min 1000 have range over 25000?', np.unique(min1000 * range_zeros50K, return_counts = True))

/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_ECU.tif
/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_ECU.tif
/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_ECU.tif
/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_ECU.tif
/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_ECU.tif
rural_ECU
How many min 1000 have range over 50000? (array([0, 1]), array([550038,      8]))


### Find Rural 1000

In [157]:
# File Paths & FNs --- Update by geography urban/rural for MMZ, ECU, NPL
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

# args
country = 'rural_MMZ'

# get rst names
rst_fns = glob.glob('/Users/cascade/Github/PopGridCompare/data/interim/*'+country+'.tif')

print(country)

# Get number of rural pixels > 10,000
for rst in rst_fns:
    print(rst)
    data = rasterio.open(rst).read(1)
    data = data > 10000
    data = data.astype(int)
    print(np.unique(data, return_counts = True))

rural_MMZ
/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_MMZ.tif
(array([0, 1]), array([3936977,      69]))
/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_MMZ.tif
(array([0, 1]), array([3937036,      10]))
/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_MMZ.tif
(array([0, 1]), array([3936930,     116]))
/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_MMZ.tif
(array([0, 1]), array([3937030,      16]))
/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_MMZ.tif
(array([0, 1]), array([3936518,     528]))


### Nepal Outlier

In [160]:
# File Paths & FNs --- Update by geography urban/rural for MMZ, ECU, NPL
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

# args
country = 'rural_NPL'

# get rst names
rst_fns = glob.glob('/Users/cascade/Github/PopGridCompare/data/interim/*'+country+'.tif')

for rst in rst_fns: print(rst)

/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_NPL.tif
/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_NPL.tif


In [167]:
# GHS 
ghs = rasterio.open(rst_fns[2]).read(1)
ghs = ghs > 110000
ghs = ghs.astype(int)

In [168]:
np.unique(ghs, return_counts = True)

(array([0, 1]), array([482153,      1]))

In [169]:
for rst in rst_fns:
    data = rasterio.open(rst).read(1)
    out = data * ghs
    print(rst)
    print(np.unique(out, return_counts = True), '\n')

/Users/cascade/Github/PopGridCompare/data/interim/WP16_rural_NPL.tif
(array([ -0.        , 729.90893555]), array([482153,      1])) 

/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_rural_NPL.tif
(array([ 0, 28]), array([482153,      1])) 

/Users/cascade/Github/PopGridCompare/data/interim/GHS15_rural_NPL.tif
(array([    -0.        , 117461.80763054]), array([482153,      1])) 

/Users/cascade/Github/PopGridCompare/data/interim/LS15_rural_NPL.tif
(array([  0, 103]), array([482153,      1])) 

/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_rural_NPL.tif
(array([  -0.        , 2392.41113281]), array([482153,      1])) 



# Idai costs
https://media.ifrc.org/ifrc/wp-content/uploads/sites/5/2019/09/2019-IFRC-CODN-EN.pdf <br>
Similarly, estimates of populations living in flood inundated areas ranged from WPE-16 <br>
identifying 817 thousand people (88% rural) to 1.28 million people (99% rural) by GPW-15.
- $112 / person

In [8]:
# Floods
print('Costs low', 112 * 817 * 10**3 / 10**6)

Costs low 91.504


In [9]:
# Floods
print('Costs', 112 * 1.28 * 10**6 / 10**6)

Costs 143.36
