# Zonal Stats

By Cascade Tuholske, June 2020

Notebook finds zonal stats of populations for give geographies. <br><br>
**NOTE** CRS should be epsg:4326 for everything!

In [1]:
#### Dependencies
import numpy as np
import pandas as pd
import rasterio
import geopandas as gpd
from rasterstats import zonal_stats, gen_zonal_stats
from glob import glob

In [2]:
#### File Paths & FNs
DATA_PATH = '/Users/cascade/Github/PopGridCompare/data/'

In [7]:
#### Run on Nigeria GDAM 
polys_fn = DATA_PATH+'raw/South_FL/study_area_wgs84.shp'
polys = gpd.read_file(polys_fn)
col = 'GEOID' # gdam level
fn_out = DATA_PATH+'interim/South_FL_stats.shp' #updatea

In [8]:
# subset, be sure to check the admin level
polys = polys[['geometry', col]]

In [11]:
# Git tif files
rst_fns = glob('/Users/cascade/Github/PopGridCompare/data/interim/*_matched.tif')
rst_fns

['/Users/cascade/Github/PopGridCompare/data/interim/GHS15_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/LS15_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/GPWv4_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/ESRI16_matched.tif',
 '/Users/cascade/Github/PopGridCompare/data/interim/WP16_matched.tif']

In [12]:
def zone_loop(polys_in, rst_list, stats_type):
    """ Function loops through rasters, calcs zonal_stats and returns stats as a data frame.
    Args:
        polys_in = polygons
        rst_list = list of paths & fns of rasters
        stats_type = stats type for each poly gone (see zonal stats)
    """
    
    # copy polys to write out
    polys_out = polys_in.copy()
    
    for rst in rst_list:
        
        # Get data name
        data = rst.split(DATA_PATH+'interim/')[1].split('_matched.tif')[0]
        print('Started', data)
        
        # Run zonal stats
        zs_feats = zonal_stats(polys_in, rst, stats=stats_type, geojson_out=True)
        zgdf = gpd.GeoDataFrame.from_features(zs_feats, crs=polys_in.crs)
        
        # Rename columns and merge
        zgdf = zgdf.rename(columns={stats_type: data+'_'+stats_type})
        
        polys_out = polys_out.merge(zgdf[[col, data+'_'+stats_type]], on = col, how = 'inner')
    
    return polys_out


In [13]:
# Run zonal stats loop
polys_sum = zone_loop(polys, rst_fns, 'sum')
polys_sum.head()

Started GHS15
Started LS15
Started GPWv4
Started ESRI16
Started WP16


Unnamed: 0,geometry,GEOID,GHS15_sum,LS15_sum,GPWv4_sum,ESRI16_sum,WP16_sum
0,POLYGON ((-81.65759818781882 28.49245932407797...,1209593198,193868.584355,248285.0,195620.4375,231276.0,197838.453125
1,POLYGON ((-81.26431207196599 28.44867332377709...,1209590910,38140.738175,30531.0,38790.6875,49513.0,40333.136719
2,"POLYGON ((-81.65740018980451 28.5531833386954,...",1209593705,99218.275478,89858.0,100557.328125,112560.0,101692.03125
3,POLYGON ((-81.49881514576593 28.61414535753242...,1209592522,612005.965026,673996.0,617417.9375,704152.0,630493.1875
4,POLYGON ((-81.70939512676496 25.96409570166286...,1202191027,16113.665862,14625.0,16167.994141,15024.0,17052.830078


In [20]:
#### merge
polys_fn = DATA_PATH+'raw/South_FL/study_area_wgs84.shp'
polys = gpd.read_file(polys_fn)

gpd_out = polys.merge(polys_sum.iloc[:,1:], on = 'GEOID', how = 'inner')

In [21]:
gpd_out.head()

Unnamed: 0,STATEFP,COUNTYFP,COUSUBFP,COUSUBNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,...,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,GHS15_sum,LS15_sum,GPWv4_sum,ESRI16_sum,WP16_sum
0,12,95,93198,1935935,1209593198,Southwest Orange,Southwest Orange CCD,22,Z5,G4040,...,420104600.0,60596528.0,28.4259659,-81.5373704,POLYGON ((-81.65759818781882 28.49245932407797...,193868.584355,248285.0,195620.4375,231276.0,197838.453125
1,12,95,90910,1935752,1209590910,East Orange,East Orange CCD,22,Z5,G4040,...,763838400.0,28292027.0,28.4749322,-81.0466829,POLYGON ((-81.26431207196599 28.44867332377709...,38140.738175,30531.0,38790.6875,49513.0,40333.136719
2,12,95,93705,1935977,1209593705,Winter Garden-Ocoee,Winter Garden-Ocoee CCD,22,Z5,G4040,...,86867910.0,13063092.0,28.5647852,-81.5431604,"POLYGON ((-81.65740018980451 28.5531833386954,...",99218.275478,89858.0,100557.328125,112560.0,101692.03125
3,12,95,92522,1935881,1209592522,Orlando,Orlando CCD,22,Z5,G4040,...,495117800.0,45963649.0,28.5003242,-81.3643411,POLYGON ((-81.49881514576593 28.61414535753242...,612005.965026,673996.0,617417.9375,704152.0,630493.1875
4,12,21,91027,1935761,1202191027,Everglades,Everglades CCD,22,Z5,G4040,...,2905216000.0,259425143.0,25.9463237,-81.2859612,POLYGON ((-81.70939512676496 25.96409570166286...,16113.665862,14625.0,16167.994141,15024.0,17052.830078


In [22]:
#### Save it out
gpd_out = gpd.GeoDataFrame(polys_sum)
gpd_out.to_file(fn_out)

# Old Code

In [None]:
#### Functions
def zonal_func(polys_in, rst_in, stats_type, save, fn_out = None):
    """ Runs zonal stats on a set of polygons for a given raster, see rasterstats for stats type.
    Returns geodata frame
    Args:
        polys = polygons as a shape file read into memory
        rst_fn = path to raster file to run zonal stats on
        stats_type = stats type for each poly gone (see zonal stats)
        save = True will save out a fail
        fn_out = file name and path to save out shape files
    """
    
    # Run Zonal Stats & Set to gpd df
    zs_feats = zonal_stats(polys_in, rst_in, stats=stats_type, geojson_out=True)
    zgdf = gpd.GeoDataFrame.from_features(zs_feats, crs=polys_in.crs)
    
    if save == True:
        zgdf.to_file(fn_out) 
    
    return zgdf

In [None]:
#### Run Zonal Stats
rst_fns = glob('/Users/cascade/Github/PopGridCompare/data/interim/*.tif')
fn_out = DATA_PATH+'interim/NGA_2_LS15.shp'
rst_fn = rst_fns[0]
zstats = zonal_func(polys, rst_fn, 'sum', save = True, fn_out =fn_out)