# New Raster Mask

Created 2019-01-15
This program takes the code from Raster Mask and cleans it up for use to do two things:

1. Create new rasters with a simple mask applied
2. Complete connected components of two masks

Check crs of raster
'EPSG:4326' ... or EPSG: 54009

In [1]:
import rasterio
from rasterio.mask import mask
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import numpy as np
import matplotlib.pyplot as plt

### Inputs

In [2]:
temp_data = '../../temp_data/'
raster_interim = '../../data/interim/'
raster_raw = '../../data/'

### Functions

In [3]:
# Let's try to make the mask directy with rasterio

def raster_mask_poly(raster, band, density, crs):
    """Fucntion makes numpy array for of raster then applies mask.
    requires rasterio object, band number, and density as float on int and crs of raster
    returns dict of polygons and numpy mask
    """

    mask = raster.read(band)
    mask[mask < density] = 0 # mask as zeros 
    mask[mask >= density] = 1 
    
    # Extract feature shapes and values from the array.
    for geom, val in rasterio.features.shapes(mask, transform=raster.transform):
        
        # Transform shapes from the dataset's own coordinate
        # reference system to CRS84 (EPSG:4326) removed 2019-01-15
        geom = rasterio.warp.transform_geom(raster.crs, crs, geom, precision=6) # WGS84 (EPSG 4326)
    
    # turn geom into Polygon object from shapely    
    # Polygon(geom['coordinates'][0])
    poly_gdf = gpd.GeoDataFrame()
    for i, poly in enumerate(geom['coordinates']):
        poly_gdf.loc[i,'geometry']= Polygon(poly)

    return mask, geom, poly_gdf

In [4]:
def raster_mask(raster, band, density):
    "Function returns raster mask"
    
    mask = raster.read(band)
    mask[mask < density] = 0 # mask as zeros and ones
    mask[mask >= density] = 1
    
    return mask

In [5]:
# Let's try to make polygons

def raster_poly(raster, band, crs):
    """Fucntion makes ploygons for of raster returns dict of polygons"""

    mask = raster.read(band)
    
    # Extract feature shapes and values from the array.
    for geom, val in rasterio.features.shapes(mask, transform=raster.transform):
        
        # Transform shapes from the dataset's own coordinate
        # reference system to CRS84 (EPSG:4326).
        geom = rasterio.warp.transform_geom(raster.crs, crs, geom, precision=6) # WGS84 (EPSG 4326)
    
    # turn geom into Polygon object from shapely    
    # Polygon(geom['coordinates'][0])
    poly_gdf = gpd.GeoDataFrame()
    for i, poly in enumerate(geom['coordinates']):
        poly_gdf.loc[i,'geometry']= Polygon(poly)

    return poly_gdf

In [6]:
def raster_gpd(array, raster, crs):
    """ 
    function takes an np array, raster
    and returns polygons from the np array in the raster CRS
    """
    for geom, val in rasterio.features.shapes(array, transform = raster.transform):

        # Transform shapes from the dataset's own coordinate
        # reference system to CRS84 (EPSG:4326). Removed # WGS84 (EPSG 4326) 
        geom = rasterio.warp.transform_geom(raster.crs, crs, geom, precision=6) 

    # turn geom into Polygon object from shapely    
    # Polygon(geom['coordinates'][0])
    poly_gdf = gpd.GeoDataFrame()
    for i, poly in enumerate(geom['coordinates']):
        poly_gdf.loc[i,'geometry']= Polygon(poly)
    
    return poly_gdf

In [7]:
def raster_write(meta, array, file_out):
    """ function to write out a raster file with an np array
    requires meta data for raster, np array & file out path and name
    """
    
    kwargs = meta

    # Update kwargs (change in data type)
    kwargs.update(dtype=rasterio.float32, count = 1)

    with rasterio.open(file_out, 'w', **kwargs) as dst:
        dst.write_band(1, array.astype(rasterio.float32))

### Polygons for >1500 ppl per km2

In [9]:
file = 'AFR_PPP_2015_adj_v2.tif'
raster = rasterio.open(raster_raw+file)
crs = raster.crs

In [10]:
raster.meta

{'count': 1,
 'crs': CRS({'init': 'epsg:4326'}),
 'driver': 'GTiff',
 'dtype': 'float32',
 'height': 11161,
 'nodata': -3.4028234663852886e+38,
 'transform': Affine(0.00833333329999305, 0.0, -33.3254172868223,
       0.0, -0.00833333329999305, 41.791250202105076),
 'width': 12575}

In [11]:
mask300m, geom300, poly_gdf300 = raster_mask_poly(raster, 1, 300, crs)

In [13]:
print(poly_gdf300.head(6))
print(len(poly_gdf300))

                                            geometry
0  POLYGON ((-33.325417 41.79125, -33.325417 -51....
1  POLYGON ((19.341249 -34.608749, 19.357916 -34....
2  POLYGON ((9.857915999999999 37.34125, 9.857915...
3  POLYGON ((19.224583 -34.417083, 19.224583 -34....
4  POLYGON ((19.391249 -34.600416, 19.391249 -34....
5  POLYGON ((19.157916 -34.417083, 19.157916 -34....
60626


In [14]:
# drop first row, which is bounding box of Africa and the write 
poly_gdf300 = poly_gdf300.drop(poly_gdf300.index[0], axis = 0)
print(poly_gdf300.head(6))
print(len(poly_gdf300))

                                            geometry
1  POLYGON ((19.341249 -34.608749, 19.357916 -34....
2  POLYGON ((9.857915999999999 37.34125, 9.857915...
3  POLYGON ((19.224583 -34.417083, 19.224583 -34....
4  POLYGON ((19.391249 -34.600416, 19.391249 -34....
5  POLYGON ((19.157916 -34.417083, 19.157916 -34....
6  POLYGON ((19.341249 -34.592083, 19.341249 -34....
60625


In [17]:
#write out polygons 
# poly_gdf300.to_file(temp_data+'AFR_PPP_2015_adj_v2_300.shp', driver='ESRI Shapefile')

### Connected Pixels 1500 - 300

In [None]:
import os

os.getcwd()
os.chdir('/home/cascade/tana-crunch-cascade/projects/NTL/')
os.getcwd()

from src import raster_funcs

In [None]:
file = rasterio.open('data/GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip.tif')
crs = file.crs

In [None]:
mask300 = raster_mask(file, 1, 300)
mask1500 = raster_mask(file, 1, 1500)

In [None]:
mask1500c300 = raster_funcs.remove_isolated_pixels(mask1500, mask300)

In [None]:
poly_gdf_1500c300 = raster_gpd(mask1500c300, file, crs)

In [None]:
print((poly_gdf_1500c300.head(6)))
print(len(poly_gdf_1500c300))

In [None]:
# drop first row, which is bounding box of Africa
poly_gdf_1500c300_drop = poly_gdf_1500c300.drop(poly_gdf_1500c300.index[0], axis = 0)
print((poly_gdf_1500c300_drop.head(6)))
print(len(poly_gdf_1500c300_drop))

In [None]:
# poly_gdf_1500c300_drop.to_file('data/GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip.tif_1500c300.shp', driver='ESRI Shapefile')