In [None]:
import geopandas as gps
import rasterio                  # I/O raster data (netcdf, height, geotiff, ...)
import rasterio.mask
import rasterio.warp             # Reproject raster samples
import rasterio.merge
from rasterio.transform import rowcol
from rasterio import features
import fiona                     # I/O vector data (shape, geojson, ...)
import pyproj                    # Change coordinate reference system
from osgeo import gdal
import pandas as pd
import shapely
from shapely.geometry import box, Point
import json

import numpy as np               # numerical array manipulation
import time
import os
from PIL import Image
import PIL.ImageDraw
from core.visualize import display_images

import matplotlib.pyplot as plt  # plotting tools
%matplotlib inline
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
import warnings                  # ignore annoying warnings
warnings.filterwarnings("ignore")

%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from multiprocessing import Pool

In [None]:
# Required configurations (including the input and output paths) are stored in a separate file (such as config/Preprocessing.py)
# Please provide required info in the file before continuing with this notebook. 
# hbh: in this scene,a new config named Preprocessing_within is created to distinguish from the original
from config import Preprocessing   
# In case you are using a different folder name such as configLargeCluster, then you should import from the respective folder 
# Eg. from configLargeCluster import Preprocessing
config = Preprocessing.Configuration()

In [None]:
output_dir=r'G:\5_lakemapping\sample635_12\output'
# raw_image_base_dir=r'G:\5_lakemapping\sample_img'
# # output_dir=r'/media/nkd/backup/5_lakemapping/sample600/output'
# # raw_image_base_dir=r'/media/nkd/backup/5_lakemapping/sample_img'
# raw_ndwi_image_prefix = 'ndwi_int8_'
# raw_bands_image_prefix = 'bands_int16_'
# raw_image_file_type = '.tif'
# extracted_file_type = '.png'
# extracted_ndwi_filename = 'ndwi'
# extracted_bands_filename = ['blue','green','red','swir']
# extracted_annotation_filename = 'annotation'
# type_num=6
# ndwi_band = [0]# If raster has multiple channels, then bands will be [0, 1, ...] otherwise simply [0]
# four_bands = [0,1,2,3]

In [None]:
#hbh: check whether the output dir(must be present) of each type is empty
for i in range(0,type_num):
    path_to_write=os.path.join(output_dir,'output'+str(i))
    assert os.path.exists(path_to_write)
    if not len(os.listdir(path_to_write))==0:
         print('Warning: path_to_write{} is not empty! The old files in the directory may not be overwritten!!'.format(i))

In [None]:
#Read the training area „ÄÅ training polygons
trainingArea = gps.read_file(os.path.join(config.training_base_dir, config.training_area_fn))
trainingPolygon = gps.read_file(os.path.join(config.training_base_dir, config.training_polygon_fn))
# trainingArea = gps.read_file(r"/media/nkd/backup/5_lakemapping/sample600/sample/area")
# trainingPolygon = gps.read_file(r"/media/nkd/backup/5_lakemapping/sample600/sample/polygon")

print(trainingPolygon.shape,trainingArea.shape)# area:id, geomerry;   polygon:id, geometry 
trainingPolygon
trainingArea
print(f'Read a total of {trainingPolygon.shape[0]} object polygons and {trainingArea.shape[0]} training areas.')
print(f'Polygons will be assigned to training areas in the next steps.')

In [None]:
# Check if the training areas and the training polygons have the same crs     
if trainingArea.crs  != trainingPolygon.crs:
    print('Training area CRS does not match training_polygon CRS')
    targetCRS = trainingPolygon.crs #Areas are less in number so conversion should be faster
    trainingArea = trainingArea.to_crs(targetCRS)
print(trainingPolygon.crs)
print(trainingArea.crs)
assert trainingPolygon.crs == trainingArea.crs

In [None]:
# As input we received two shapefile, first one contains the training areas/rectangles and other contains the polygon of lakes/objects in those training areas
# The first task is to determine the parent training area for each polygon.

def dividePolygonsInTrainingAreas(trainingPolygon, trainingArea):
    '''Assign annotated ploygons in to the training areas.'''
    # For efficiency, assigned polygons are removed from the list, we make a copy here. 
    cpTrainingPolygon = trainingPolygon.copy()
    splitPolygons = {}
    for i in tqdm(trainingArea.index):
        spTemp = [] 
        allocated = []
        print("area's index:",i)
        for j in cpTrainingPolygon.index:
            if cpTrainingPolygon.loc[j]['geometry'].intersects(trainingArea.loc[i]['geometry']):
                spTemp.append(cpTrainingPolygon.loc[j])
                allocated.append(j)      
        splitPolygons[i] = {'polygons':spTemp,'bounds':list(trainingArea.bounds.loc[i]),'id':trainingArea.loc[i]['id'] ,'type':trainingArea.loc[i]['type']}
        cpTrainingPolygon = cpTrainingPolygon.drop(allocated)#assigned polygons are removed from the list
    return splitPolygons

# areasWithPolygons contains the object polygons for each area!
areasWithPolygons = dividePolygonsInTrainingAreas(trainingPolygon, trainingArea)
print(f'Assigned training polygons in {len(areasWithPolygons)} training areas')

In [None]:
print(areasWithPolygons)

In [None]:
def extractAreasThatOverlapWithTrainingData(areaInfo, writePath):
    """Iterates over raw NDWI images and using findOverlap() extract areas that overlap with training data. 
    The overlapping areas in raw images are written in a separate file, and annotation file are created from polygons in the overlapping areas.
    """
    if not os.path.exists(writePath):
        os.makedirs(writePath)
        
    polygonsInAreaDf = gps.GeoDataFrame(areaInfo['polygons'])
    img_id=str(areaInfo['id'])
    bboxArea = box(*areaInfo['bounds'])

    #draw ndwi ong
    ndwiImg = rasterio.open(os.path.join(raw_image_base_dir,raw_ndwi_image_prefix+img_id+raw_image_file_type))  
    sm_ndwi = rasterio.mask.mask(ndwiImg, [bboxArea], all_touched=True, crop=True )
    profile_ndwi = ndwiImg.profile  
    profile_ndwi['height'] = sm_ndwi[0].shape[1]
    profile_ndwi['width'] = sm_ndwi[0].shape[2]
    profile_ndwi['transform'] = sm_ndwi[1]
        # That's a problem with rasterio, if the height and the width are less then 256 it throws: ValueError: blockysize exceeds raster height 
        # So set the blockxsize and blockysize to prevent this problem
    profile_ndwi['blockxsize'] = 32
    profile_ndwi['blockysize'] = 32
    profile_ndwi['count'] = 1
    profile_ndwi['dtype'] = rasterio.float32
    dt_ndwi = sm_ndwi[0][0].astype(profile_ndwi['dtype'])
    with rasterio.open(os.path.join(writePath, extracted_ndwi_filename+'_sup_{}.png'.format(img_id)), 'w', **profile_ndwi) as dst:
        dst.write(dt_ndwi, 1) 

    #draw annotation png
    polygons = []
    for i in polygonsInAreaDf.index:
        gm = polygonsInAreaDf.loc[i]['geometry']
        polygons.append(gm)
                
    with rasterio.open(os.path.join(writePath,extracted_annotation_filename+'_sup_{}.png'.format(img_id)), 'w+', **profile_ndwi) as out:
        out_arr = out.read(1)
        burned = features.rasterize(polygons, fill=0, default_value=1,out=out_arr, transform=out.transform)
        out.write_band(1, burned)
        
    #draw red green blue png
    bandsImg = rasterio.open(os.path.join(raw_image_base_dir,raw_bands_image_prefix+img_id+raw_image_file_type))
    sm_bands = rasterio.mask.mask(bandsImg, [bboxArea], all_touched=True, crop=True )
    for band, imFn in zip(four_bands, extracted_bands_filename):
        dt_bands = sm_bands[0][band].astype(profile_ndwi['dtype'])
        with rasterio.open(os.path.join(writePath, imFn+'_sup_{}.png'.format(img_id)), 'w', **profile_ndwi) as dst:
            dst.write(dt_bands, 1) 


In [None]:
for key,value in zip(areasWithPolygons.keys(),areasWithPolygons.values()):
    path_to_write=os.path.join(output_dir,'output{}'.format(value['type']))
    extractAreasThatOverlapWithTrainingData(value,path_to_write)

In [None]:
# Display extracted image 
sampleImage = '_55.png'
# path_to_write=os.path.join(training_base_dir,'output\output4')
path_to_write=os.path.join(training_base_dir,'output588\output0' )
fn = os.path.join(path_to_write, extracted_NDWI_filename + sampleImage)
NDWI_img = Image.open(fn)
read_NDWI_img = np.array(NDWI_img)

# redBands = os.path.join(path_to_write, 'red'+ sampleImage)
# red_img = Image.open(redBands)
# read_red_img = np.array(red_img)

greenBands = os.path.join(path_to_write, 'green'+ sampleImage)
green_img = Image.open(greenBands)
read_green_img = np.array(green_img)

# blueBands = os.path.join(path_to_write, 'blue'+ sampleImage)
# blue_img = Image.open(blueBands)
# read_blue_img = np.array(blue_img)

swirBands = os.path.join(path_to_write, 'swir'+ sampleImage)
swir_img = Image.open(swirBands)
read_swir_img = np.array(swir_img)
# print(read_NDWI_img.shape)
# print(read_Bands_img.shape)
annotation_im = Image.open(fn.replace(extracted_ndwi_filename ,config.extracted_annotation_filename))
read_annotation = np.array(annotation_im)
# print(read_annotation.shape)
# print(read_annotation)

all_images = np.array([read_NDWI_img,read_green_img,read_swir_img, read_annotation])#,read_red_img,read_blue_img
# print(all_images.shape[1])
display_images(np.expand_dims(np.transpose(all_images, axes=(1,2,0)), axis=0),['ndwi','green','swir','annotation'])