# Zonal Stats

The objective of this notebook is to calculate zonal stats for a raster, given a set of polygons

1. Total population
2. Catagorical Values

### Dependencies

In [1]:
from rasterstats import zonal_stats
import rasterio
import geopandas as gpd
import operator

### Functions

In [None]:
def zone_stat(raster, band, polygon):
    """
    This function will calculate the zonal stats for each polygon within a raster
    requires gpd_df, raster, object and nodata value
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], nodata = 0, stats = 'sum')
    return zone_stat

In [None]:
def zone_mode(raster, band, polygon):
    """ 
    This function will find the mode class within a polygon overlayed on top
    of a classified raster 
    requires gpd_df, raster, object
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], categorical=True, category_map=cmap)
    return zone_stat

In [None]:
cmap = {
    
101: 'Temperate / arid',
102: 'Temperate / Semi-arid',
103: 'Temperate / sub-humid',
104: 'Temperate / humid',
211: 'Subtropic - warm / arid',
212: 'Subtropic - warm / semiarid',
213: 'Subtropic - warm / subhumid',
214: 'Subtropic - warm / humid',
221: 'Subtropic - cool / arid',
222: 'Subtropic - cool / semiarid',
223: 'Subtropic - cool / subhumid',
224: 'Subtropic - cool / humid',
311: 'Tropic - warm / arid',
312: 'Tropic - warm / semiarid',
313: 'Tropic - warm / subhumid',
314: 'Tropic - warm / humid',
321: 'Tropic - cool / arid',
322: 'Tropic - cool / semiarid',
323: 'Tropic - cool / subhumid',
324: 'Tropic - cool / humid',
400: 'Boreal'
    
}
    
    

### Data In

In [None]:
data_raw = '/Users/cascade/Github/NTL/data/raw/'
data_temp = '/Users/cascade/Github/NTL/temp_data/'
data_interim = '/Users/cascade/Github/NTL/data/interim/'
ms_data = '/Users/cascade/Github/NTL/temp_data/MS_Data/'
erl_data = '/Users/cascade/Github/NTL/temp_data/ERL_data/'
downloads = '/Users/cascade/Downloads/'

In [None]:
poly_file = 'ERL_data/GHS_POP_GPW42000_urbanmerge'
poly_gpd = gpd.read_file(data_temp+poly_file+'.shp')

In [None]:
file_out = poly_file+'_PopTot'

In [None]:
# Use Zeros raster in analysis because it gets ride of any negative values that are used as NaN
# GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros ... NaN and Neg values have been changed to zeros,
# and thus GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros & 2015 version are OK to USE 2019-02-21


zeros_file = data_interim+'GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros.tif'

#ghs_path = '/Users/cascade/Github/NTL/data/raw/ghs-pop/GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0/'
#zeros_file = ghs_path+'GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0.tif'


#raster = rasterio.open(raster_in)

In [None]:
raster_in = rasterio.open(zeros_file)

#### Reproject polygons if needed

In [None]:
# CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True})
# above can be reprojected as 'i'init': 'esri:54009'}'
raster_in.meta['crs']

In [None]:
# Check if poly_gpd has a crs
print(poly_gpd.crs)

# assignet the crs correctly, check with qgis when in doubt 
poly_gpd.crs = {'init': 'epsg:4326'}

print(poly_gpd.crs)

In [None]:
poly_gpd.head()

In [None]:
# Reproject 

poly_gpd = poly_gpd.to_crs({'init': 'esri:54009'})
poly_gpd.head()

#### Mask out zeros for rasters

If you haven't be sure to make a new raster where NaN and neg. values are set to zero

In [None]:
# kwargs = raster_in.meta
# kwargs

In [None]:
# make mask of nodata as zeros
# mask = raster_in.read(1)
# mask[mask <= 0] = 0

In [None]:
# Update kwargs (change in data type)
# kwargs.update(dtype=rasterio.float32, count = 1)

# with rasterio.open(data_interim+'GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0_Clip_zeros.tif', 'w', **kwargs) as dst:
#         dst.write_band(1, mask.astype(rasterio.float32))

### Analysis

In [None]:
zonalstats_towns = zone_stat(raster_in, 1, poly_gpd)

In [None]:
poly_gpd['PopTot'] = gpd.GeoDataFrame.from_dict(zonalstats_towns)
poly_gpd

In [None]:
# check for zeros or strange data points

neg_df = poly_gpd[poly_gpd.PopTot < 0]
neg_df

In [None]:
null = poly_gpd[poly_gpd.PopTot.isna()]
null

### zero out missing data for aezraster


In [None]:
# aezraster = rasterio.open(data_interim+'ssa-aez09-raster.tif')

In [None]:
# aezraster.meta

In [None]:
# import numpy as np

# np.unique(aezraster.read(1))

In [None]:
# maskaez = aezraster.read(1)
# maskaez[maskaez <= 0] = 0

In [None]:
# aez_kwargs = aezraster.meta
# aez_kwargs

In [None]:
# Update kwargs (change in data type)
# kwargs.update(dtype=rasterio.float32, count = 1)

# with rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif', 'w', **aez_kwargs) as dst:
#         dst.write_band(1, maskaez.astype(rasterio.float64))

In [None]:
# import numpy as np
# np.unique(maskaez)

In [None]:
aezraster_zeros = rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif')


In [None]:
aezraster_zeros.meta

In [None]:
# If needed, change crs back for gpd 

print(poly_gpd.crs)
poly_gpd = poly_gpd.to_crs({'init': 'epsg:4326'})
poly_gpd.head()

In [None]:
aez_class = zone_mode(aezraster_zeros, 1, poly_gpd)

In [None]:
foo = {}
cat =[]

for i in aez_class:
    if i == foo:
        mode = 'NoClass'
    else:
            mode = (max(i.items(), key=operator.itemgetter(1))[0])
    cat.append((mode))

In [None]:
len(cat)

In [None]:
poly_gpd['aez_class'] = gpd.GeoDataFrame.from_dict(cat)
poly_gpd.head(6)

In [None]:
# write files out

# poly_gpd.to_file(data_temp+file_out+".shp", driver='ESRI Shapefile')
# poly_gpd.to_csv(data_temp+file_out+'.csv')

# Attempt at some graphics

In [None]:
towns = 'AFR_PPP_2015_adj_v2_pop_towns.shp'
towns_gpd = gpd.read_file(ms_data+towns)
towns_gpd.shape

In [None]:
cities = 'AFR_PPP_2015_adj_v2_pop.shp'
cities_gpd = gpd.read_file(ms_data+cities)
cities_gpd.shape

In [None]:
type(towns)

In [None]:
import pandas as pd

urban_concat = pd.concat([towns_gpd, cities_gpd])
urban_concat.shape

In [None]:
test_df = urban_concat[urban_concat.PopTot <= 250000000]
len(test_df)

In [None]:
test_df_drop = test_df.drop_duplicates('PopTot', keep=False)
len(test_df_drop)

In [None]:
ax = sns.boxplot(x = 'PopTot', y = 'country', data = test_df)
ax.set(xscale="log")

In [None]:
import matplotlib

#from matplotlib.pyplot import figure
#figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
#plt.savefig('WP2015.png', dpi=700,  bbox_inches='tight')

ax = sns.boxplot(x = 'PopTot', y = 'country', data = test_df_drop)
ax.set(xscale="log")

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(18.5, 10.5)
fig.savefig('test2png.png', dpi=100)

In [None]:
import matplotlib.pyplot as plt
plt.show()
plt.savefig('WP2015.png', bbox_inches='tight')

In [None]:
fig = ax.get_figure()
fig.savefig('WP2015.png')

### Count

In [None]:
city_file = 'LS15_w001001_Clip_1500c300_polyoverlap.shp'
town_file = 'LS15_w001001_Clip_1500c300_polyoverlap_towns.shp'

In [None]:
poly_gpd_city = gpd.read_file(downloads+city_file)
poly_gpd_town= gpd.read_file(downloads+town_file)

In [None]:
poly_gpd_city.shape

In [None]:
poly_gpd_town.shape

In [None]:
poly_gpd_town['Unique'] = poly_gpd_town.FID.astype(str)+poly_gpd_town['osm_type']
poly_gpd_town.head()

In [None]:
poly_gpd_city['Unique'] = poly_gpd_city.FID.astype(str)+poly_gpd_city['osm_type']
poly_gpd_city.head()

In [None]:
import pandas as pd
urban_concat = pd.concat([poly_gpd_city, poly_gpd_town])


In [None]:
urban_concat.head(6)

In [None]:
test_df_drop = urban_concat.drop_duplicates('test', keep=False)


In [None]:
test_df_drop.shape

In [None]:
test_df_drop['osm_type'].value_counts()

In [None]:
test_df_drop.to_file(downloads+'test.shp', driver='ESRI Shapefile')
