# CHIRTMax Poly

This is the cleaned version of cpt_UHI (see it for old code and more details). The goals are to build a routine to cal avg. temp for CHIRTMax for each city using the GHS-UCDB. I will compare these with the urban polygons I built for the ERL paper. 

by Cascade Tuholske 2019-08-15

### Dependencies

In [2]:
#import xarray 

import rasterio 
import numpy as np
import pandas as pd
import geopandas as gpd
from rasterstats import zonal_stats
from rasterio import features
import os
from ftplib import FTP
import xarray as xr

In [3]:
# File Paths <---- For testing use the chirts in the pop raster 
DATA_IN_GHS = '/Users/cascade/Github/PopRaster/data/raw/JRC/ghs-ucdb/'
DATA_IN_CHIRT = '/Users/cascade/Github/PopRaster/data/raw/CHIRT/'
DATA_OUT = '/Users/cascade/Github/PopRaster/data/interim/'

### LOOK AT CHIRTS RASTERS 

In [3]:
import os
import fnmatch
import re

for fn in os.listdir(DATA_IN_CHIRT):
    # find all the tif files
    if fn.endswith('.tif'):
        print(fn)

CHIRTSmax.2016.07.tif
CHIRTSmax.1983.01.tif


### Turn Polygons into rasters

In [None]:
# File Names In
shp_fn = 'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_0.shp'
rst_fn = 'CHIRTSmax.1983.01.tif'

# File Names Out ---> made with 1983 CHIRT as TEMPLATE
out_fn_ut = 'GHS_UCDB_Raster_untouched.tif'
out_fn_t = 'GHS_UCDB_Raster_Raster_touched.tif'

In [None]:
# Open the file with GeoPANDAS read_file
polys = gpd.read_file(DATA_IN_GHS+shp_fn)
rst = rasterio.open(DATA_IN_CHIRT+rst_fn)

In [None]:
def poly_to_raster (rst, polys, value, touched, out_fn, fill_value):
    """Function makes a raster from a list of polygons
    
    Args:   rst = input raster already read in as a rasterio object to act as a template
            polys = input polygons already read in as a gpd dataframe
            value = col with value to burn into raster
            touched = bool, if True all pixels touched (not centers) are burned into raster
            out_fn = out file name 
            fill_value = value to revalue input raster before burning in polygons 
    
    """

    meta = rst.meta.copy() # copy meta data from rst
    out_arr = rst.read(1) # get an array to burn shapes
    out_arr.fill(fill_value) # revalue rst to an Nan Value before burning in polygons
    
    # extract geom and values to burn
    shapes = ((geom,value) for geom, value in zip(polys['geometry'], polys[value])) 
    
    # burn shapes intp an array
    burned = features.rasterize(shapes=shapes, fill=0, out=out_arr, transform=rst.transform, all_touched=touched)
    
    # write our raster to disk
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, burned)

In [None]:
# Test out function
# Function works, though we need to deal with pixels that are used in more than one city

out = DATA_OUT+out_fn_ut
poly_to_raster(rst, polys, 'ID_HDC_G0', False, out, -9999)

#### Burn Results

Function works, though we need to deal with pixels that overlap with more than once city
Big difference between touched and untouched rasterized outputs
Ask Chris and Kelly what they think

<img src="../../Screenshots/GHS-UCDB-Raster-TouchVUn.png">

### Find Avg Temp w/ Xarray

In [None]:
# Load a ChirtMax File & GHS Raster
chirt_fn = 'CHIRTSmax.1983.01.tif'
ghs_fn = 'GHS_UCDB_Raster_Raster_touched.tif' # <<<---- NOTE TOUCHED v UN_TOUCHED

chirt = rasterio.open(DATA_IN_CHIRT+chirt_fn)
ghs = rasterio.open(DATA_OUT+ghs_fn)


In [None]:
# check meta data
print('chirt meta')
print(chirt.meta)
print('ghs meta')
print(ghs.meta)

In [None]:
# Get arrays

chirt_arr = chirt.read(1)
ghs_arr = ghs.read(1)

In [None]:
# Make arrays into xarray DataArray

chirt_da = xr.DataArray(chirt_arr, dims = ['y', 'x']) # y and x are our 2-d labels
ghs_da = xr.DataArray(ghs_arr, dims = ['y', 'x'])

In [None]:
# Make xarray dataset

ds = xr.Dataset(data_vars = 
                    {'ghs' : (['y', 'x'], ghs_da),
                    'chirt' : (['y', 'x'], chirt_da),})

In [None]:
# Mask values from chirt that are ocean -9999

ds_mask = ds.where(ds.chirt != -9999, drop = False)

In [None]:
ds_mask

In [None]:
# Mask values that are not GHS polys

ds_mask = ds_mask.where(ds_mask.ghs > 0, drop = False)

In [None]:
ds_mask

In [None]:
# write out the CHIRT w/ the non-GHS pixels masked to a .tif file

out_arr = np.array(ds_mask.chirt) # get masked chirt array
meta = chirt.meta

def raster_write(meta, array, file_out):
    """ function to write out a raster file with an np array
    requires meta data for raster, np array & file out path and name
    """
    
    kwargs = meta

    # Update kwargs (change in data type)
    kwargs.update(dtype=rasterio.float32, count = 1)

    with rasterio.open(file_out, 'w', **kwargs) as dst:
        dst.write_band(1, array.astype(rasterio.float32))

raster_write(meta, out_arr, DATA_OUT+'CHIRTSmax.1983.01_GHSMaskv4.tif') 

#### Gut check

From Chirt_GHS_Mask_Test.tif, it looks like I am isolating the correct pixels in the chirt dataset to exlcude ocean and include the GHS cities.

Recall this first group is using the 'touched' raster

Update 2019-08-19 **BE SURE TO FIRST MASK -9999 from the CHIRT raster, then from the GHS RASTER**

In [None]:
# find the mean CHIRT max for each GHS ID

avg = ds_mask.groupby('ghs').mean(xr.ALL_DIMS) # <--------------- double check this is the correct notation

In [None]:
# cords are the GHS side here, data variable are the chirt avg, type is a xarray dataset

avg

In [None]:
# turn GHS IDS and avg. CHIRTMax values into 1-D numpy arrays of equal length

avg_ID = np.array(avg.ghs)
avg_chirt = np.array(avg.chirt)

print(len(avg_ID))
print(len(avg_chirt))

In [None]:
# turn chirt max and IDS into a DF

df_avg = pd.DataFrame()
df_avg['chirtMax'] = avg_chirt
df_avg['ID_HDC_G0'] = avg_ID

#### Merge avg temp into back into orgional ghs-UCDB polygons

In [None]:
# Open org. ghs-ucdb polys
shp_fn = 'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_0.shp'
ghs_polys = gpd.read_file(DATA_IN_GHS+shp_fn)

In [None]:
# Note that 68 GHS polys don't have a chirtsMAX value likely because as poly ids are burned in pixels can only take
# on one value thus if a pixel contains more than one polygon only one value will be burned

len(ghs_polys)

In [None]:
# isolate col needed from original ghs-ucdb polygons

df_ghs = gpd.GeoDataFrame()
df_ghs['geometry'] = ghs_polys.geometry
df_ghs['ID_HDC_G0'] = ghs_polys.ID_HDC_G0

In [None]:
# isolate col needed from original ghs-ucdb polygons

df_avg = pd.DataFrame()
df_avg['chirtMax'] = avg_chirt
df_avg['ID_HDC_G0'] = avg_ID

In [None]:
# merge the df

df_merge = pd.merge(df_ghs, df_avg, on='ID_HDC_G0', how = 'outer')

In [None]:
# write out the merged tmax polys and look in QGIS 
out_fn = 'GHS-CHIRTS-Poly-Test.shp'
df_merge.to_file(DATA_OUT+out_fn)

#### make polygons of dropped GHS w/o temp

- I need to figure out why 68 polygons are not getting captured in the raster.
- I think what happens is that when the GHS-ID values get burned into a pixel, if more than one GHS-UCDB poly is in a pixel, the first or second GHS-ID value gets burned in, but not both obv. because a pixel can only take on one value. 
- I am not sure what to solution is ...

UPDATE 2019-08-19 Not to worry about this

In [None]:
ids = df_avg.ID_HDC_G0

In [None]:
df_merge_drop = df_merge[np.isnan(df_merge.chirtMax)]

In [None]:
df_merge_drop.to_file(DATA_OUT+'GHS-CHIRTS-Poly-Test-MissingTMax.shp')

### Next Step is to build this out as a full script

Build this as a .py file ... eg a freakin' program. Write a damn computer program

CHECK W/ 1983.01 POLYGONS have so few Avgs could be with the ds_mask > 0 and missing val


In [6]:
# Open files before loop

# Directories 
CHIRT_DIR = '/Users/cascade/Github/PopRaster/data/raw/CHIRT/' # <<--- path to loop through
SHP_DIR = '/Users/cascade/Github/PopRaster/data/raw/JRC/ghs-ucdb/'
POLY_RST_DIR = '/Users/cascade/Github/PopRaster/data/interim/'
DATA_OUT = '/Users/cascade/Github/PopRaster/data/interim/'

# Open Polygon Raster
polyRst_fn = 'GHS_UCDB_Raster_Raster_touched.tif'
polyRst = rasterio.open(POLY_RST_DIR+polyRst_fn)

# Open the file with GeoPANDAS read_file
shp_fn = 'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_0.shp'
shps = gpd.read_file(SHP_DIR+shp_fn)

In [11]:
# Isloate SHP Poly Col to merge back in later 

df_ghs = gpd.GeoDataFrame()
df_ghs['geometry'] = shps.geometry
df_ghs['ID_HDC_G0'] = shps.ID_HDC_G0
df_ghs['CTR_MN_NM'] = shps.CTR_MN_NM
df_ghs['P75'] = shps.P75
df_ghs['P90'] = shps.P90
df_ghs['P00'] = shps.P00
df_ghs['P15'] = shps.P15

In [None]:
# Get polyRst data as Xarray, 
polyRst_da = xr.DataArray(polyRst.read(1), dims = ['y', 'x'])

In [None]:
test = rasterio.open(CHIRT_DIR+'CHIRTSmax.2016.07.tif')

In [None]:
import os
import fnmatch
import re

# open urban polys

# open urban poly raster 

df_merge = df

# DOUBLE CHECK THIS DUDE AND WRITE OUT SOME RASTERS TO TEST IT

In [None]:
import os
import fnmatch
import re

# make a copy of the ghs polys
df_merge = df_ghs.copy()

for fn in os.listdir(CHIRT_DIR):
    # find all the tif files
    if fn.endswith('.tif'):
        
        # NEED TO BUILD META DATA CHECK INTO ROUTINE and throw an error<<<<---------

        # Get the date of each chirt file
        date = (fn.split('CHIRTSmax.')[1].split('.tif')[0])
        print(date)
        
        # Open CHIRT Data and turn data into array
        tempRst = rasterio.open(CHIRT_DIR+fn)
        
        # Make arrays into xarray DataArray
        tempRst_da = xr.DataArray(tempRst.read(1), dims = ['y', 'x']) # y and x are our 2-d labels
        
        # Make xarray dataset
        ds = xr.Dataset(data_vars = 
                    {'ghs' : (['y', 'x'], polyRst_da),
                    'temp' : (['y', 'x'], tempRst_da),})
        
        # UPDATED 2019-08-19 Mask the CHIRTS PIXELS FIRST, THEN GHS
        # Mask values from chirt that are ocean in ghs and chirt in our ds 
        ds_mask = ds.where(ds.chirt != -9999, drop = False) #<<<<------ need to double check this
        
        # Mask pixels for both ghs and chirts where ghs cities are not present
        ds_mask = ds_mask.where(ds_mask.ghs > 0, drop = False)
        
        # Group poly_IDs find temp
        avg = ds_mask.groupby('ghs').mean(xr.ALL_DIMS)
        
        # turn GHS IDS and avg. CHIRTMax values into 1-D numpy arrays of equal length
        avg_ID = np.array(avg.ghs)
        avg_temp = np.array(avg.temp)
        
        print(len(avg_ID))
        print(len(avg_temp))
        
        ###### CHECK W/ 1983.01 POLYGONS have so few Avgs could be with the ds_mask! ! ! ! !
        
        # turn chirt max and IDS into a DF
        df_avg = pd.DataFrame()
        df_avg[date] = avg_temp
        df_avg['ID_HDC_G0'] = avg_ID
        
        # merge the df
        df_merge = df_merge.merge(df_avg, on='ID_HDC_G0', how = 'outer')

In [None]:
df_merge.columns

In [None]:
df_merge.to_file(DATA_OUT+'GHS-CHIRTS-Poly-Loop-Test.shp')

In [None]:
out1983 = df_merge[np.isnan(df_merge['1983.01'])]

In [None]:
out1983.to_file(DATA_OUT+'GHS-CHIRTS-198701-Drop.shp')

### XARRAY TEST

In [None]:
a = xr.DataArray([[0,0,0], [0,19,19,], [19,0,0]], dims=('x', 'y'))
# b = xr.DataArray(np.ones(25).reshape(5, 5), dims=('x', 'y'))

b = xr.DataArray([[0,0,0], [1,1,1,], [0,0,0]], dims=('x', 'y'))


In [None]:
a

In [None]:
# Make xarray dataset

ds = xr.Dataset(data_vars = 
                    {'a' : (['y', 'x'], a),
                    'b' : (['y', 'x'], b),})

In [None]:
ds

In [None]:
ds_mask = ds.where(ds.a > 0, drop = False)

In [None]:
ds_mask.b