# Zonal Stats

The objective of this notebook is to calculate zonal stats for a raster, given a set of polygons

1. Total population
2. Catagorical Values

### Dependencies

In [1]:
from rasterstats import zonal_stats
import rasterio
import geopandas as gpd
import operator
import pandas as pd

### Functions

In [2]:
def zone_stat(raster, band, polygon):
    """
    This function will calculate the zonal stats for each polygon within a raster
    requires gpd_df, raster, object and nodata value
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], nodata = 0, stats = 'sum')
    return zone_stat

In [3]:
def zone_mode(raster, band, polygon):
    """ 
    This function will find the mode class within a polygon overlayed on top
    of a classified raster 
    requires gpd_df, raster, object
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], categorical=True, category_map=cmap)
    return zone_stat

### Data In

In [4]:
# data_raw = '/Users/cascade/Github/NTL/data/raw/'
# data_temp = '/Users/cascade/Github/NTL/temp_data/'
# data_interim = '/Users/cascade/Github/NTL/data/interim/'
# ms_data = '/Users/cascade/Github/NTL/temp_data/MS_Data/'
# erl_data = '/Users/cascade/Github/NTL/temp_data/ERL_data/'
# downloads = '/Users/cascade/Downloads/'


data_interim = '../../../data/interim/'
erl_v2_data = '../../../temp_data/ERL19v2/'
downloads = '/Users/cascade/Downloads/'

In [5]:
poly_file = 'LS2015_polyFINAL'
poly_gpd = gpd.read_file(erl_v2_data+poly_file+'.shp')

In [7]:
file_out = poly_file+'_PopTotERLv2'

In [6]:
# Use Zeros raster in analysis because it gets ride of any negative values that are used as NaN
# GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros ... NaN and Neg values have been changed to zeros,
# and thus GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros & 2015 version are OK to USE 2019-02-21


zeros_file = data_interim+'LS15_w001001_Clip_zeros.tif'

#ghs_path = '/Users/cascade/Github/NTL/data/raw/ghs-pop/GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0/'
#zeros_file = ghs_path+'GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0.tif'


#raster = rasterio.open(raster_in)

In [8]:
raster_in = rasterio.open(zeros_file)

In [9]:
raster_in.crs

CRS({'init': 'epsg:4326'})

In [10]:
poly_gpd.head()

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166..."
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166..."
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0..."
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3..."
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675..."


### Analysis

In [11]:
poly_gpd.head()

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166..."
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166..."
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0..."
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3..."
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675..."


In [12]:
zonalstats = zone_stat(raster_in, 1, poly_gpd)

In [13]:
poly_gpd['PopTot'] = gpd.GeoDataFrame.from_dict(zonalstats)
poly_gpd

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166...",101883.0
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166...",19993.0
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0...",4665640.0
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3...",90771.0
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675...",27491.0
5,253291208,1606,Algeria,Lakhdaria,town,36.563944,3.596907,no,"POLYGON ((3.6 36.591667, 3.608333 36.591667, 3...",57006.0
6,253292622,596,Algeria,Draâ Ben Khedda,town,36.733332,3.958769,no,"POLYGON ((3.975 36.775, 3.983333 36.775, 3.983...",29810.0
7,253292625,133,Algeria,Dellys,town,36.915798,3.913104,no,"POLYGON ((3.875 36.925, 3.916667 36.925, 3.916...",17492.0
8,258799889,11560,Algeria,El Menia,town,30.583668,2.883089,no,"POLYGON ((2.866667 30.616667, 2.891667 30.6166...",46515.0
9,262963952,7872,Algeria,Benaceur,town,33.110521,6.442111,no,"POLYGON ((6.425 33.116667, 6.45 33.116667, 6.4...",10678.0


In [14]:
# check for zeros or strange data points

neg_df = poly_gpd[poly_gpd.PopTot < 0]
neg_df

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot


In [15]:
# For GHS-2000 one point (FID 19267) is too small for zonal stats to work
# For GHS-2015 one point (FID 37189) is too small for zonal stats to work
# For WP-2015 one point (FID 8198) is too small for zonal stats to work
# For LS-2015 two points (FID 6111 & 10778) are too small for zonal stats to work

null = poly_gpd[poly_gpd.PopTot.isna()]
null

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot
4779,298699084,6111,Tanzania,Bugarama,town,-3.29088,29.5463,yes,"(POLYGON ((30.475 -2.571002484627931, 30.475 -...",
4787,44929726,10778,Ethiopia,Mandera,town,3.93842,41.85732,yes,"(POLYGON ((41.833333 3.95, 41.8369839160398 3....",


In [16]:
null.to_file(downloads+'LS2015_erros.shp', driver = 'ESRI Shapefile')

### AEZ Class 

In [17]:
cmap = {
    
101: 'Temperate / arid',
102: 'Temperate / Semi-arid',
103: 'Temperate / sub-humid',
104: 'Temperate / humid',
211: 'Subtropic - warm / arid',
212: 'Subtropic - warm / semiarid',
213: 'Subtropic - warm / subhumid',
214: 'Subtropic - warm / humid',
221: 'Subtropic - cool / arid',
222: 'Subtropic - cool / semiarid',
223: 'Subtropic - cool / subhumid',
224: 'Subtropic - cool / humid',
311: 'Tropic - warm / arid',
312: 'Tropic - warm / semiarid',
313: 'Tropic - warm / subhumid',
314: 'Tropic - warm / humid',
321: 'Tropic - cool / arid',
322: 'Tropic - cool / semiarid',
323: 'Tropic - cool / subhumid',
324: 'Tropic - cool / humid',
400: 'Boreal'
    
}

In [18]:
#Load

aezraster_zeros = rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif')


In [19]:
aezraster_zeros.crs

CRS({'init': 'epsg:4326'})

#### Reproject polygons if needed

In [None]:
# CRS≈
# above can be reprojected as 'i'init': 'esri:54009'}'
# raster_in.meta['crs']

In [None]:
# poly_gpd.crs

In [None]:
# poly_gpd.crs = {'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True}

In [None]:
# Reproject 

# poly_gpd = poly_gpd.to_crs({'init': 'epsg:4326'})
# poly_gpd.head()

#### Find aez_zone

In [20]:
# Find aez_zone

aez_class = zone_mode(aezraster_zeros, 1, poly_gpd)



In [21]:
foo = {}
cat =[]

for i in aez_class:
    if i == foo:
        mode = 'NoClass'
    else:
            mode = (max(i.items(), key=operator.itemgetter(1))[0])
    cat.append((mode))

In [22]:
len(cat)

4828

In [23]:
poly_gpd['aez_class'] = gpd.GeoDataFrame.from_dict(cat)
poly_gpd

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot,aez_class
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166...",101883.0,Tropic - cool / arid
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166...",19993.0,Subtropic - warm / arid
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0...",4665640.0,Subtropic - warm / subhumid
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3...",90771.0,Subtropic - warm / subhumid
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675...",27491.0,Subtropic - warm / subhumid
5,253291208,1606,Algeria,Lakhdaria,town,36.563944,3.596907,no,"POLYGON ((3.6 36.591667, 3.608333 36.591667, 3...",57006.0,Subtropic - warm / subhumid
6,253292622,596,Algeria,Draâ Ben Khedda,town,36.733332,3.958769,no,"POLYGON ((3.975 36.775, 3.983333 36.775, 3.983...",29810.0,Subtropic - warm / subhumid
7,253292625,133,Algeria,Dellys,town,36.915798,3.913104,no,"POLYGON ((3.875 36.925, 3.916667 36.925, 3.916...",17492.0,Subtropic - warm / subhumid
8,258799889,11560,Algeria,El Menia,town,30.583668,2.883089,no,"POLYGON ((2.866667 30.616667, 2.891667 30.6166...",46515.0,Subtropic - warm / arid
9,262963952,7872,Algeria,Benaceur,town,33.110521,6.442111,no,"POLYGON ((6.425 33.116667, 6.45 33.116667, 6.4...",10678.0,Subtropic - warm / arid


In [24]:
# Group by rainfall zone

arid = (['Temperate / arid', 
         'Subtropic - warm / arid', 
         'Subtropic - cool / arid', 
         'Tropic - warm / arid',
         'Tropic - cool / arid'], 'Arid')

semi_arid = (['Temperate / Semi-arid', 
              'Subtropic - warm / semiarid', 
              'Subtropic - cool / semiarid',
              'Tropic - warm / semiarid', 
              'Tropic - cool / semiarid'], 'Semi-arid')    

sub_humid = (['Temperate / sub-humid', 
              'Subtropic - warm / subhumid', 
              'Subtropic - cool / subhumid',
              'Tropic - warm / subhumid', 
              'Tropic - cool / subhumid'], 'Sub-humid')

humid = (['Temperate / humid', 
          'Subtropic - warm / humid', 
          'Subtropic - cool / humid', 
          'Tropic - warm / humid',
          'Tropic - cool / humid'], 'Humid')

boreal = (['Boreal'], 'Boreal')

na = (['NoClass', '0', 0], 'NA') # had to add 0.0

rain_list = [arid, semi_arid, sub_humid, humid, boreal, na]

In [25]:
def rain_zone(gpd_df, rain_list):
    "function adds a new col to a gpd_df based on rain fall zone"
    arr =[]
    for rain_zone in rain_list:
        for aez in rain_zone[0]:
            for i, row in gpd_df.iterrows():
                if row['aez_class'] == aez:
                    arr.append(rain_zone[1])
                
    gpd_df['rain_zone'] = arr
    
    return gpd_df

In [26]:
poly_gpd = rain_zone(poly_gpd, rain_list)

In [27]:
poly_gpd.head()

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot,aez_class,rain_zone
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166...",101883.0,Tropic - cool / arid,Arid
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166...",19993.0,Subtropic - warm / arid,Arid
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0...",4665640.0,Subtropic - warm / subhumid,Arid
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3...",90771.0,Subtropic - warm / subhumid,Arid
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675...",27491.0,Subtropic - warm / subhumid,Arid


#### Regions

In [None]:
### List of African Countries from the UN in OSM wiki

Northern_Africa = (['Algeria', 'Egypt', 'Libya', 'Morocco', 'Tunisia', 'Western Sahara'], 'Northern_Africa')

Eastern_Africa = ([
    'Burundi',
    'Comoros',
    'Djibouti',
    'Eritrea',
    'Ethiopia',
    'Kenya',
    'Madagascar',
    'Malawi',
    'Mauritius',
    #Mayotte,
    'Mozambique',
    'Réunion',
    'Rwanda',
    'Somalia',
    'Sudan',
    'Uganda',
    'Tanzania',
    'Zambia',
    'Zimbabwe'], 'Eastern_Africa')
    
Middle_Africa = ([
    'Angola',
    'Cameroon',
    'Central African Republic',
    'Chad',
    'Congo-Brazzaville',
    'Democratic Republic of the Congo', # Democratic Republic of the Congo needs to be used, not Congo-Kinshasa
    'Equatorial Guinea',
    'Gabon',
    'Sao Tome and Principe'], 'Middle_Africa')
    
Southern_Africa = ([
    'Botswana',
    'Lesotho',
    'Namibia',
    'South Africa',
    'Swaziland'], 'Southern_Africa')
    
Western_Africa = ([
    'Benin',
    'Burkina Faso',
    'Cape Verde',
    'Côte d\'Ivoire',
    'Gambia',
    'Ghana',
    'Guinea',
    'Guinea-Bissau',
    'Liberia',
    'Mali',
    'Mauritania',
    'Niger',
    'Nigeria',
    'Senegal',
    'Sierra Leone',
    'Togo'], 'Western_Africa')

In [None]:
def region(gpd_df, regions_list):
    "Function adds a new col to a df based on region"
    arr =[]
    for region in regions_list:
        for country in region[0]:
            for i, row in gpd_df.iterrows():
                if row['country'] == country:
                    #row['region'] = region[1] 
                    #df_copy.iloc[i] = row
                    #region[1]
                    arr.append(region[1])
    gpd_df['region'] = arr
    
    return gpd_df

In [None]:
regions = [Northern_Africa, Western_Africa, Eastern_Africa, Southern_Africa, Middle_Africa]

#### If needed, change crs back for gpd 

In [None]:
# print(poly_gpd.crs)
# poly_gpd = poly_gpd.to_crs({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True})
# poly_gpd.head()

#### write files out

In [28]:
poly_gpd.head()

Unnamed: 0,osm_id,FID,country,osm_name,osm_type,lat,lon,clippoly,geometry,PopTot,aez_class,rain_zone
0,89369215,14962,Algeria,Tamanrasset,town,22.785454,5.532446,no,"POLYGON ((5.516667 22.816667, 5.533333 22.8166...",101883.0,Tropic - cool / arid,Arid
1,89980948,14115,Algeria,In Salah,town,27.195033,2.482613,no,"POLYGON ((2.466667 27.216667, 2.483333 27.2166...",19993.0,Subtropic - warm / arid,Arid
2,252600742,396,Algeria,Boumerdès,town,36.758882,3.470596,no,"POLYGON ((2.95 36.816667, 3.025 36.816667, 3.0...",4665640.0,Subtropic - warm / subhumid,Arid
3,253167052,633,Algeria,Thenia,town,36.724986,3.556935,no,"POLYGON ((3.691667 36.766667, 3.7 36.766667, 3...",90771.0,Subtropic - warm / subhumid,Arid
4,253167208,360,Algeria,Zemmouri,town,36.786406,3.601221,no,"POLYGON ((3.658333 36.825, 3.675 36.825, 3.675...",27491.0,Subtropic - warm / subhumid,Arid


In [29]:
# write files out

poly_gpd.to_file(erl_v2_data+file_out+".shp", driver='ESRI Shapefile')
poly_gpd.to_csv(erl_v2_data+file_out+'.csv')

### zero out missing data for aezraster


In [None]:
# aezraster = rasterio.open(data_interim+'ssa-aez09-raster.tif')

In [None]:
# aezraster.meta

In [None]:
# import numpy as np

# np.unique(aezraster.read(1))

In [None]:
# maskaez = aezraster.read(1)
# maskaez[maskaez <= 0] = 0

In [None]:
# aez_kwargs = aezraster.meta
# aez_kwargs

In [None]:
# Update kwargs (change in data type)
# kwargs.update(dtype=rasterio.float32, count = 1)

# with rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif', 'w', **aez_kwargs) as dst:
#         dst.write_band(1, maskaez.astype(rasterio.float64))

In [None]:
# import numpy as np
# np.unique(maskaez)

#### Mask out zeros for rasters

If you haven't be sure to make a new raster where NaN and neg. values are set to zero

In [None]:
# kwargs = raster_in.meta
# kwargs

In [None]:
# make mask of nodata as zeros
# mask = raster_in.read(1)
# mask[mask <= 0] = 0

In [None]:
# Update kwargs (change in data type)
# kwargs.update(dtype=rasterio.float32, count = 1)

# with rasterio.open(data_interim+'GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros.tif', 'w', **kwargs) as dst:
#         dst.write_band(1, mask.astype(rasterio.float32))