Preprocessing planet imagery for median mosaic

1. mask clouds
2. assign nodata for partial overlap scenes within bounds of supercell
3. median for each band and composite

In [2]:
from shapely.geometry import Polygon
import pyproj
import numpy as np
import geopandas as gpd
import rasterio as rio
from rasterio import mask
import rasterstats as rstats
import skimage as ski
import matplotlib.pyplot as plt
import glob
import os

sr_pattern = "/home/rave/cloud-free-planet/mosaic-tests/*SR*"
img_paths = glob.glob(sr_pattern)

def check_udm_and_mask(img_path):
    
    def udm_mask(image_path, udm_path):
        """
        Masks a planet image by it's udm
        """
    
        img = rio.open(image_path)
        img_meta = img.profile
        img_array = np.array([img.read(1), img.read(2), img.read(3), img.read(4)])
        mask = rio.open(udm_path).read(1)[..., :] == 0 # 0 is the value in the udm that corresponds to good data
        masked = np.where(mask,img_array, 0)
        with rio.open(img_path[0:-12]+'masked.tif', 'w', **img_meta) as dst:
            dst.write(masked) # when mask is true yield img, otherwise yield nan
        
    if os.path.isfile(img_path) and os.path.isfile(img_path[0:-12]+'_DN_udm_clip.tif'):
        udm_mask(img_path, img_path[0:-12]+'_DN_udm_clip.tif')
    else:
        pass
    
map(check_udm_and_mask, img_paths)

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [22]:
masked_pattern = "/home/rave/cloud-free-planet/mosaic-tests/*masked*"
masked_paths = glob.glob(masked_pattern)

In [25]:
os.path.basename(masked_paths[0])

'20180602_095703_1018_3B_AnalyticMSmasked.tif'

In [42]:
import functools
def save_single_bands(t):
    """
    Takes a band index and a path. Saves the
    specified band of each tif and names it by the band index. 
    """
    (band_index, path) = t
    img = rio.open(path)
    meta = img.profile
    meta.update(count=1) # update since we are writing a single band
    arr = img.read(band_index)
    dst_path = os.path.join(os.path.dirname(path),"Band_"+str(band_index)+"_"+os.path.basename(path))
    with rio.open(dst_path, 'w', **meta) as dst:
        dst.write(arr, 1)
for i in [1, 2, 3, 4]:
    map(save_single_bands, [(i, x) for x in masked_paths])

The above functions take raw analytic_sr that has been clipped and the udm masks and masks the data, then saves each band seperately out. Median compositing is done in GRASS GIS, because couldn't figure out how to median composite partially overlapping arrays wtih rasterio and numpy

# Code for creating Coarse Africa Grid in Decimal Degrees, WGS84 Datum

Below is the coarse layout defined in geopyspark to merge model outputs. Each tile is .0512 degrees by .0512 degrees. need to make a shapely multi line string or polygon bject that can be used to save out tiles as geojsons (only if they intersect an aoi) so that we can use each tile geojson with porder

coarse_layout = gps.LayoutDefinition(gps.Extent(-17.541, -34.845, 51.4766, 37.5518), gps.TileLayout(1348, 1414, 4096, 4096))

In [4]:
def layout_to_geojson(extent=(-17.541, -34.845, 51.4766, 37.5518), stepsize=.0512, output_name="/dev/data/tile_grid.geojson"):
    """
    :param extent: (lonmin, latmin, lonmax, latmax) checked this by 
    looking at coarse layout in run_geopyspark
    
    :param stepsize: width and height of tile in degrees, checked 
    this by measuring extent of probability image tiles
    
    :returns: a geodataframe where each row is a tile polygon
    """
    
    (lonmin, latmin, lonmax, latmax) = extent
    cols = (lonmax - lonmin)/.0512
    rows = (latmax - latmin)/.0512
    # Top left corner of grid, where we start to build the gdf
    XleftOrigin = lonmin
    XrightOrigin = lonmin + stepsize
    YtopOrigin = latmax
    YbottomOrigin = latmax - stepsize
    polygons = []
    
    for i in range(int(cols)):
        
        Ytop = YtopOrigin
        Ybottom =YbottomOrigin
        for j in range(int(rows)):
            polygons.append(Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop), (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)])) 
            Ytop = Ytop - stepsize
            Ybottom = Ybottom - stepsize
        XleftOrigin = XleftOrigin + stepsize
        XrightOrigin = XrightOrigin + stepsize

    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid.to_file(output_name, driver = "GeoJSON")
    
layout_to_geojson()

In [17]:
aoi1_polygon = gpd.read_file("../cfg/aois/1.geojson")['geometry'][0]
grid_gdf = gpd.read_file("/dev/data/tile_grid.geojson")

aoi1_grid_geometries = grid_gdf.intersection(aoi1_polygon)
aoi1_grid_geometries= aoi1_grid_geometries[aoi1_grid_geometries.is_empty==False]

test_geom=aoi1_grid_geometries.iloc[6]

gpd.GeoSeries([test_geom]).to_file('test_tile.geojson', driver= "GeoJSON")

In [56]:
gpd.GeoSeries([test_geom]).to_file('test_tile.geojson', driver= "GeoJSON")

ssh for idlist

In [None]:
porder idlist --input "/home/ubuntu/planet/cloud-free-planet/notebooks/test_tile.geojson" --start "2018-07-01" --end "2018-07-31" --item "PSScene4Band" --asset "analytic_sr" --number 10000 --outfile "/home/ubuntu/planet/cloud-free-planet/notebooks/test_aoi1_id.csv" --cmin 0 --cmax 1 --overlap 0


ssh command for ordering

In [None]:
porder order --name test-aoi1-month07 --idlist ~/planet/cloud-free-planet/notebooks/test_aoi1_id.txt --item PSScene4Band --asset analytic_sr --boundary test_tile.geojson --aws ~/planet/cloud-free-planet/notebooks/aws.yaml --op aws

In [58]:
from subprocess import Popen, PIPE
p = Popen("porder",stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()

OSError: [Errno 2] No such file or directory

In [6]:
stderr

'usage: porder [-h]\n              {quota,base64,idlist,difflist,idsplit,order,download,multipart,multiproc}\n              ...\nporder: error: too few arguments\n'