# Zonal Stats

The objective of this notebook is to calculate zonal stats for a raster, given a set of polygons

1. Total population
2. Catagorical Values

### Dependencies

In [1]:
from rasterstats import zonal_stats
import rasterio
import geopandas as gpd
import operator

### Functions

In [2]:
def zone_stat(raster, band, polygon):
    """
    This function will calculate the zonal stats for each polygon within a raster
    requires gpd_df, raster, object and nodata value
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], nodata = 0, stats = 'sum')
    return zone_stat

In [3]:
def zone_mode(raster, band, polygon):
    """ 
    This function will find the mode class within a polygon overlayed on top
    of a classified raster 
    requires gpd_df, raster, object
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], categorical=True, category_map=cmap)
    return zone_stat

In [4]:
cmap = {
    
101: 'Temperate / arid',
102: 'Temperate / Semi-arid',
103: 'Temperate / sub-humid',
104: 'Temperate / humid',
211: 'Subtropic - warm / arid',
212: 'Subtropic - warm / semiarid',
213: 'Subtropic - warm / subhumid',
214: 'Subtropic - warm / humid',
221: 'Subtropic - cool / arid',
222: 'Subtropic - cool / semiarid',
223: 'Subtropic - cool / subhumid',
224: 'Subtropic - cool / humid',
311: 'Tropic - warm / arid',
312: 'Tropic - warm / semiarid',
313: 'Tropic - warm / subhumid',
314: 'Tropic - warm / humid',
321: 'Tropic - cool / arid',
322: 'Tropic - cool / semiarid',
323: 'Tropic - cool / subhumid',
324: 'Tropic - cool / humid',
400: 'Boreal'
    
}
    
    

### Analysis

In [5]:
data_raw = '/Users/cascade/Github/NTL/data/raw/'
data_temp = '/Users/cascade/Github/NTL/temp_data/'
data_interim = '/Users/cascade/Github/NTL/data/interim/'
ms_data = '/Users/cascade/Github/NTL/temp_data/MS_Data/'
erl_data = '/Users/cascade/Github/NTL/temp_data/ERL_data/'
downloads = '/Users/cascade/Downloads/'

In [6]:
poly_file = 'GHS_POP_GPW42015_urbanmerge.shp'
poly_gpd = gpd.read_file(erl_data+poly_file)

In [7]:
raster_in = data_interim+'GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0_Clip_4326.tif'
zeros_file = 'GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0_Clip_4326_zeros.tif'
raster = rasterio.open(raster_in)

In [8]:
file_out = 'GHS_POP_GPW42015'+'_final20190122'

In [9]:
# check meta
raster.meta

{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': None,
 'width': 10002,
 'height': 7290,
 'count': 1,
 'crs': CRS({'init': 'epsg:4326'}),
 'transform': Affine(0.009928515009156095, 0.0, -28.58594819768976,
        0.0, -0.009928515009156095, 37.4464883503564)}

In [10]:
# make mask of nodata as zeros
mask = raster.read(1)
mask[mask <= 0] = 0

In [11]:
kwargs = raster.meta
kwargs

{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': None,
 'width': 10002,
 'height': 7290,
 'count': 1,
 'crs': CRS({'init': 'epsg:4326'}),
 'transform': Affine(0.009928515009156095, 0.0, -28.58594819768976,
        0.0, -0.009928515009156095, 37.4464883503564)}

In [12]:
# Update kwargs (change in data type)
kwargs.update(dtype=rasterio.float32, count = 1)

with rasterio.open(data_interim+zeros_file, 'w', **kwargs) as dst:
        dst.write_band(1, mask.astype(rasterio.float32))

In [13]:
raster_zeros = rasterio.open(data_interim+zeros_file)

In [14]:
zonalstats_towns = zone_stat(raster_zeros, 1, poly_gpd)

In [15]:
poly_gpd['PopTot'] = gpd.GeoDataFrame.from_dict(zonalstats_towns)
poly_gpd.head(6)

Unnamed: 0,osm_id,FID,country,city,osm_type,lat,lon,geometry,PopTot
0,89369215,16863,Algeria,Tamanrasset,town,22.785454,5.532446,"POLYGON ((5.523842554283844 22.82143812548671,...",97099.29
1,252600742,187,Algeria,Boumerdès,town,36.758882,3.470596,"POLYGON ((3.879862753261432 36.92634664178389,...",6772831.0
2,253167052,187,Algeria,Thenia,town,36.724986,3.556935,"POLYGON ((3.879862753261432 36.92634664178389,...",6772831.0
3,253167208,187,Algeria,Zemmouri,town,36.786406,3.601221,"POLYGON ((3.879862753261432 36.92634664178389,...",6772831.0
4,253291208,1530,Algeria,Lakhdaria,town,36.563944,3.596907,"POLYGON ((3.52630149841732 36.592046118739, 3....",52793.11
5,253292622,187,Algeria,Draâ Ben Khedda,town,36.733332,3.958769,"POLYGON ((3.879862753261432 36.92634664178389,...",6772831.0


In [16]:
# check for zeros or strange data points

neg_df = poly_gpd[poly_gpd.PopTot < 0]
neg_df

Unnamed: 0,osm_id,FID,country,city,osm_type,lat,lon,geometry,PopTot


### zero out missing data for aezraster


In [None]:
# aezraster = rasterio.open(data_interim+'ssa-aez09-raster.tif')

In [None]:
# aezraster.meta

In [None]:
# import numpy as np

# np.unique(aezraster.read(1))

In [None]:
# maskaez = aezraster.read(1)
# maskaez[maskaez <= 0] = 0

In [None]:
# aez_kwargs = aezraster.meta
# aez_kwargs

In [None]:
# Update kwargs (change in data type)
# kwargs.update(dtype=rasterio.float32, count = 1)

# with rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif', 'w', **aez_kwargs) as dst:
#         dst.write_band(1, maskaez.astype(rasterio.float64))

In [None]:
# import numpy as np
# np.unique(maskaez)

In [17]:
aezraster_zeros = rasterio.open(data_interim+'ssa-aez09-raster-zeros.tif')

In [18]:
aezraster_zeros.meta

{'driver': 'GTiff',
 'dtype': 'float64',
 'nodata': -9999.0,
 'width': 9720,
 'height': 9159,
 'count': 1,
 'crs': CRS({'init': 'epsg:4326'}),
 'transform': Affine(0.0083333333333333, 0.0, -27.174992029555,
        0.0, -0.0083333333333333, 38.53333449158769)}

In [None]:
aez_class = zone_mode(aezraster_zeros, 1, poly_gpd)

In [None]:
foo = {}
cat =[]

for i in aez_class:
    if i == foo:
        mode = 'NoClass'
    else:
            mode = (max(i.items(), key=operator.itemgetter(1))[0])
    cat.append((mode))

In [None]:
len(cat)

In [None]:
poly_gpd['aez_class'] = gpd.GeoDataFrame.from_dict(cat)
poly_gpd.head(6)

In [None]:
# write files out

poly_gpd.to_file(erl_data+file_out+".shp", driver='ESRI Shapefile')
poly_gpd.to_csv(erl_data+file_out+'.csv')

# Attempt at some graphics

In [None]:
towns = 'AFR_PPP_2015_adj_v2_pop_towns.shp'
towns_gpd = gpd.read_file(ms_data+towns)
towns_gpd.shape

In [None]:
cities = 'AFR_PPP_2015_adj_v2_pop.shp'
cities_gpd = gpd.read_file(ms_data+cities)
cities_gpd.shape

In [None]:
type(towns)

In [None]:
import pandas as pd

urban_concat = pd.concat([towns_gpd, cities_gpd])
urban_concat.shape

In [None]:
test_df = urban_concat[urban_concat.PopTot <= 250000000]
len(test_df)

In [None]:
test_df_drop = test_df.drop_duplicates('PopTot', keep=False)
len(test_df_drop)

In [None]:
ax = sns.boxplot(x = 'PopTot', y = 'country', data = test_df)
ax.set(xscale="log")

In [None]:
import matplotlib

#from matplotlib.pyplot import figure
#figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
#plt.savefig('WP2015.png', dpi=700,  bbox_inches='tight')

ax = sns.boxplot(x = 'PopTot', y = 'country', data = test_df_drop)
ax.set(xscale="log")

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(18.5, 10.5)
fig.savefig('test2png.png', dpi=100)

In [None]:
import matplotlib.pyplot as plt
plt.show()
plt.savefig('WP2015.png', bbox_inches='tight')

In [None]:
fig = ax.get_figure()
fig.savefig('WP2015.png')

### Count

In [None]:
city_file = 'LS15_w001001_Clip_1500c300_polyoverlap.shp'
town_file = 'LS15_w001001_Clip_1500c300_polyoverlap_towns.shp'

In [None]:
poly_gpd_city = gpd.read_file(downloads+city_file)
poly_gpd_town= gpd.read_file(downloads+town_file)

In [None]:
poly_gpd_city.shape

In [None]:
poly_gpd_town.shape

In [None]:
poly_gpd_town['Unique'] = poly_gpd_town.FID.astype(str)+poly_gpd_town['osm_type']
poly_gpd_town.head()

In [None]:
poly_gpd_city['Unique'] = poly_gpd_city.FID.astype(str)+poly_gpd_city['osm_type']
poly_gpd_city.head()

In [None]:
import pandas as pd
urban_concat = pd.concat([poly_gpd_city, poly_gpd_town])


In [None]:
urban_concat.head(6)

In [None]:
test_df_drop = urban_concat.drop_duplicates('test', keep=False)


In [None]:
test_df_drop.shape

In [None]:
test_df_drop['osm_type'].value_counts()

In [None]:
test_df_drop.to_file(downloads+'test.shp', driver='ESRI Shapefile')
