# Cleanup/PostProcess LULC

In [1]:
%load_ext chime
%matplotlib inline

import os
from glob import glob
import pickle
import chime

import rasterio as rio
from rasterio.mask import mask
import numpy as np
import geopandas as gpd
from skimage.morphology import binary_closing, binary_opening, square, remove_small_holes, disk, square, remove_small_objects
from scipy.ndimage import distance_transform_edt
import gdal

from skimage.filters.rank import modal, mean_bilateral
from datetime import datetime, time
from time import sleep
from joblib import Parallel, delayed

import nbimporter
#from KeyFunctions import *
from utils import *

--------------------------
### Cleanup
1. Smooth out roads. Binary closing 2-3 times with square selem
1. Remove dangles from structures. Binary opening 2-3 times with square selem
1. Fill holes in strucure class?
1. Fill holes in asphault class?
1. Fill holes in impervious class?
1. FIll holes in pool class?
1. Within x distance of roads, replace all non-ashault or non-dense veg with asphault
1. ID shadows?


In [2]:
def smoothValue(array, value, selem=square(5)):
    onlys = array == value
    for i in range(2):
        onlys = binary_closing(onlys, selem)
    array[onlys] = value
    #array[~asphault] = 255
    return array


def smoothStructures(array):
    structure = array == 9
    array[structure] = 255
    for i in range(3):
        structure = binary_opening(structure, square(3))
        structure = binary_closing(structure, square(5))
    
    array[structure] = 9
    array = fillHolesInClass(array, 9, 90)
    array = np.where(structure==1, 9, array)
    
    #for i in range(5):
    #    print(f"{i} iteration"
    #non_structure_mode = modal(array, square(21), mask=array==9)
    #array = np.where(array==255, non_structure_mode, array)

    return array


def fillHolesInClass(array, class_num, size_max):
    class_bool = array == class_num
    array[class_bool] = 255
    filled = remove_small_holes(class_bool, area_threshold=size_max, connectivity=1)
    array[filled] = class_num
    return array
    

def removeClassSmaller(array, class_num, min_size):
    non = np.where(array==class_num, 255, array)
    removed = remove_small_objects(array==class_num, min_size=min_size, connectivity=1)
    array_rem = np.where(removed==0, non, array)
    return array_rem


def getGeometryMask(geometry, raster):
    if geometry.type == "Polygon":
        geometry = [geometry]
    tds, tds_trans = mask(raster, geometry, all_touched=False, crop=False, filled=False)
    
    return ~tds.mask

In [58]:
def cleanClassifiedFile(path, row, lulc_dir, ortho_dir, overwrite=False):
    try:
        lulc_file = findFile(path,row, lulc_dir)
        ortho_file = findFile(path, row, ortho_dir)
    except:
        print(f"Couldn't find necessary files for {path}_{row}")
        return None
    
    postProcess_dir = r"../EPCExtent_30cm/Orthos_Segmented_Classified_cleaned"
    postProcess_dir = lulc_dir + "_cleaned"
    os.makedirs(postProcess_dir, exist_ok=True)
    
    print(postProcess_dir)
    
    ofile = os.path.join(postProcess_dir, os.path.basename(lulc_file).replace(".tif","_cleaned.tif"))
    if os.path.exists(ofile) and not overwrite:
        return ofile
    
    with rio.open(lulc_file) as src:
        lulc = src.read(1)
        kwargs = src.profile
        bnds = src.bounds
        railroad_mask = getGeometryMask(railroads, src)[0]
        washes_mask = getGeometryMask(washes, src)[0]
        pondsLakes_mask = getGeometryMask(pondsLakes, src)[0]
        #pavedRoads_mask = getGeometryMask(pavedRoads, src)[0]

    with rio.open(ortho_file) as src:
        
        descs = src.descriptions
        data = src.read()

        bands = {desc:data[ib] for ib, desc in enumerate(descs)}

        #This is a training dataset not created with the others, but may be in the classifier. Create and add
        if "RGBNmean" not in descs:
            print("Adding RGBNmean")
            rgbnMean = np.nanmean(data[:4], axis=0).astype(data.dtype)
            bands["RGBNmean"] = rgbnMean

        #features = {fn:bands[fn] for fn in feature_names}# if fn in descs}

    
    kwargs.update(compress="lzw", nodata=255)
    
    with rio.open(ofile, "w", **kwargs) as dst:
        out = lulc.copy()
        #out[((out==4) | (out==5)) & (hag>=6)] == 5
        out[(out==9) & (bands["HAG"]<5)] = 8 # if classified as structure, but less than 5 feet high, reclass to impervious
        #out[(redness>=20) & (greenness<=12) & (blueness<=5) & (lulc!=9) & (roads_distance<=3)] = 3 # good for red bare earth
        #out[(out==2) & (blueness<45) & (nirness>-70)] = 255 # remove pools that aren't very blue AND does not have low NIRness value
        #out[(out==2) & (ndpi < 0.25)] = 255 # if classified as pool but low pool index, set to 255
        #out[(out==1) & (ndpi < 0.5)] = 2 # if classified as pond/Lake, but high pool index, classify to pool
        #out[(out==255) & (msavi>=115) & (greenness>=12) & (mean_vals<=75) ] = 6 # set very green veg with high index and low brightness to irrigated
        
        out[(bands["REDness"]>=35336) & (bands["GREENness"]<=34307) & (bands["BLUEness"]<=33409) & (lulc!=9) & (bands["DPR"]<=3)] = 3 # good for red bare earth
        out[(out==33023) & (bands["BLUEness"]<38547) & (bands["NIRness"]>23769)] = 255 # remove pools that aren't very blue AND does not have low NIRness value
        out[(out==2) & (bands["NDPI"] < 40959)] = 255 # if classified as pool but low pool index, set to 255
        out[(out==1) & (bands["NDPI"] < 49151)] = 2 # if classified as pond/Lake, but high pool index, classify to pool
    
        
        out[(out==255) & (bands["MSAVI"]>=29556) & (bands["GREENness"]>=34307) & (bands["RGBNmean"]<=19273) ] = 6 # set very green veg with high index and low brightness to irrigated
        
        #if sparse veg but in road, reclass to dense veg (tree overhang)
        out[(bands["DPR"]<2) & (out==4)] = 5
        
        #burn in roads as asphault where not classified as dense veg (tree's overhang)
        out[(bands["DPR"]==0) & (lulc!=5)] = 7
        
        # burn in shadows where low values, but not pool or pond
        #out[(mean_vals<40) & (out!=1) & (lulc!=2)] = 10
        out[(bands["RGBNmean"]<10282) & (out!=1) & (lulc!=2)] = 10
               
        #burn in Major Wash polygons
        out[(washes_mask) & (out == 8)] = 3
        
        # burn in railroads where not structure or road
        out[(railroad_mask) & (out != 9) & (out != 7)] = 3
        
        # burn in known pondsLakes that have been set to no data (255)
        out[pondsLakes_mask] = 1
        
        # if distance from roads greater than (100?ft), all impervious and asphault to barren
        out[(out==8) & (bands["DPR"]>100)] = 3
        out[(out==7) & (bands["DPR"]>100)] = 3
        
        # working top high (trees) to ground
        out = smoothValue(out, 5, selem=disk(2)) # smooth dense veg (ideally trees)
        out = smoothStructures(out) # smooth structures
        out = smoothValue(out, 4, selem=disk(2)) # smooth sparse veg
        out = smoothValue(out, 7, selem=square(5)) # smooth asphault
        # fill holes in asphault smaller than 150?
        out = fillHolesInClass(out, 7, 150)
        out = smoothValue(out, 8, selem=square(5)) # smooth concrete/impervious
        out = smoothValue(out, 3, selem=square(5)) # smooth bare ground
        out = smoothValue(out, 6, selem=square(5)) # smooth sparse veg
        out = smoothValue(out, 2, selem=disk(2)) # smooth pools
        # drop ponds/lakes smaller than x?
        out = removeClassSmaller(out, 1, 10000)
        out = smoothValue(out, 1, selem=disk(5)) # smooth ponds
        
        out = smoothValue(out, 5, selem=disk(2)) # smooth dense veg

        if out.max() == 0:
            print(f"Bad output for {ofile}")
            return ofile

        # asphault smaller than x gets changed to impervious?

        #use modal or majority (?) rank filter to fill in nodata (255) values. Exclude structures (9), ponds/lakes (1), Use cross selem to get more angular fille
        
        dst.write(out, 1)
        
        colors = {
            1: (12,42,235, 255),
            2: (41, 210, 219,255),
            3: (255, 214, 117, 255),
            4: (171, 224, 85, 255),
            5: (12, 100, 1, 255),
            6: (0, 192, 32, 255),
            7: (62, 62, 62, 255),
            8: (160, 160, 160, 255),
            9: (160, 37, 6, 255),
            10: (0, 0, 0, 255),
            255: (255,1,166, 255)
        }
        
        dst.write_colormap(1, colors)
        
        print(f"Wrote out to {ofile}")

    return ofile  
    %chime

In [43]:
%%time
orthos_loc = "../EPCExtent_30cm/Orthos_Segmentedv3"
lulcs_loc = "../EPCExtent_30cm/Orthos_Segmented_Classifiedv3"


lulc_finished = glob(lulcs_loc + "/*.tif")
paths_and_rows = [os.path.basename(file).split("_")[:2] for file in lulc_finished]

railroads = gpd.read_file("../OtherData/PC_Railroad/railroad.shp").unary_union
railroads = railroads.buffer(20)
washes = gpd.read_file("../OtherData/Major_Washes_in_Eastern_Pima_County/Major_Washes_in_Eastern_Pima_County.shp").unary_union
pondsLakes = gpd.read_file("../OtherData/TrainingData/pondsLakesPolys.gpkg").unary_union
%chime

  for f in features_lst:


Wall time: 1.99 s


In [None]:
#%%time
#pavedRoads = gpd.read_file("../OtherData/OSMRoadsPimaPaved_20201124.gpkg").buffer(10).unary_union

In [60]:
for pr in paths_and_rows:
    cleanClassifiedFile(path=pr[0], row=pr[1], lulc_dir=lulcs_loc, ortho_dir=orthos_loc, overwrite=True)

../EPCExtent_30cm/Orthos_Segmentedv3_cleaned
Adding RGBNmean
Wrote out to ../EPCExtent_30cm/Orthos_Segmentedv3_cleaned\W1004789_W449850_TrainingStackV3_classLGBNewishGBLM_cleaned.tif
../EPCExtent_30cm/Orthos_Segmentedv3_cleaned
Adding RGBNmean
Wrote out to ../EPCExtent_30cm/Orthos_Segmentedv3_cleaned\W989789_W439850_TrainingStackV3_classLGBNewishGBLM_cleaned.tif


In [26]:
cleaned_tifs = Parallel(n_jobs=8, verbose=5)(delayed(cleanClassifiedFile)(path=pr[0], row=pr[1], lulc_dir=lulcs_loc, ortho_dir=orthos_loc, overwrite=False) for pr in paths_and_rows)
%chime

print(f"{len(paths_and_rows)} files to process")

cleaned_tifs = [f for f in cleaned_tifs if f != None]
print(cleaned_tifs)

#vrt = buildVRT(lulcs_loc, "EPC_30cmOrthoSegmented_Classified_cleaned.vrt")

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.


1 files to process
[]


[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:    2.0s finished


In [13]:
import shapely
from shapely.geometry import *
import pandas as pd

tindex = gpd.read_file("../EPCExtent_30cm/Ortho_5kSubIndex.gpkg")

tindex["LULCFile"] = tindex.apply(lambda r: findFile(path=r.path, row=r.row, directory="../EPCExtent_30cm/Orthos_Segmented_Classified"), axis=1)

# ignore tiles which don't have input variables created
tindex = tindex[(~pd.isnull(tindex.LULCFile))]

#prioritize central tucson and work out from there
tindex["centroid"] = tindex.geometry.centroid
central_tile = tindex[(tindex.path == "W1004789") & (tindex.row == "W449850")]
central_point = central_tile.centroid.values[0]

tindex["DistToCenter"] = tindex.centroid.apply(lambda c: int(c.distance(central_point)))
tindex.sort_values(by="DistToCenter", inplace=True)

  for f in features_lst:


In [15]:
target_files = tindex.iloc[0:int(len(tindex)/2)]
target_files.shape
cleaned_tifs = Parallel(n_jobs=8, verbose=5)(delayed(cleanClassifiedFile)(path=row.path, row=row.row, lulc_dir=lulcs_loc, ortho_dir=orthos_loc, overwrite=False) for i, row in target_files.iterrows())
%chime

print(f"{len(paths_and_rows)} files to process")

cleaned_tifs = [f for f in cleaned_tifs if f != None]
outVRT = os.path.join("../EPCExtent_30cm/Orthos_Segmented_Classified_cleaned", "EPC_30cmOrthoSegmented_Classified_cleaned.vrt")
vrt = gdal.BuildVRT(outVRT, cleaned_tifs)
del vrt
print(f"Created {outVRT}")
finished()

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:  7.8min
[Parallel(n_jobs=8)]: Done 272 tasks      | elapsed: 16.0min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed: 27.4min
[Parallel(n_jobs=8)]: Done 632 tasks      | elapsed: 42.2min
[Parallel(n_jobs=8)]: Done 866 tasks      | elapsed: 59.6min
[Parallel(n_jobs=8)]: Done 1136 tasks      | elapsed: 80.7min
[Parallel(n_jobs=8)]: Done 1442 tasks      | elapsed: 104.1min
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed: 132.6min
[Parallel(n_jobs=8)]: Done 2005 out of 2005 | elapsed: 150.6min finished


4011 files to process
Created EPC_30cmOrthoSegmented_Classified_cleaned.vrt


In [16]:
cleaned_tifs += Parallel(n_jobs=4, verbose=5)(delayed(cleanClassifiedFile)(path=row.path, row=row.row, lulc_dir=lulcs_loc, ortho_dir=orthos_loc, overwrite=False) for i, row in tindex.iterrows())


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    3.9s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   19.8s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:   46.7s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 640 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 874 tasks      | elapsed:  4.4min
[Parallel(n_jobs=4)]: Done 1144 tasks      | elapsed:  5.8min
[Parallel(n_jobs=4)]: Done 1450 tasks      | elapsed:  7.3min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  9.0min
[Parallel(n_jobs=4)]: Done 2170 tasks      | elapsed: 32.4min
[Parallel(n_jobs=4)]: Done 2584 tasks      | elapsed: 90.7min
[Parallel(n_jobs=4)]: Done 3034 tasks      | elapsed: 148.8min
[Parallel(n_jobs=4)]: Done 3520 tasks      | elapsed: 216.6min
[Parallel(n_jobs=4)]: Done 4011 out of 4011 | elapsed: 286.4mi