In [1]:
import geopandas as gps
import rasterio                  # I/O raster data (netcdf, height, geotiff, ...)
import rasterio.mask
import rasterio.warp             # Reproject raster samples
import rasterio.merge
from rasterio.transform import rowcol
from rasterio import features
import fiona                     # I/O vector data (shape, geojson, ...)
import pyproj                    # Change coordinate reference system
from osgeo import gdal
import pandas as pd
import shapely
from shapely.geometry import box, Point
import json

import numpy as np               # numerical array manipulation
import time
import os
from PIL import Image
import PIL.ImageDraw
from core.visualize import display_images

import matplotlib.pyplot as plt  # plotting tools
%matplotlib inline
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
import warnings                  # ignore annoying warnings
warnings.filterwarnings("ignore")

%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from multiprocessing import Pool

In [2]:
# training_base_dir=r'D:\lakemapping\2_dataset\output600'
# raw_image_base_dir=r'G:\5_lakemapping\sample_img_3'
output_dir=r'/media/nkd/backup/5_lakemapping/sample600/output'
raw_image_base_dir=r'/media/nkd/backup/5_lakemapping/sample_img'
raw_ndwi_image_prefix = 'ndwi_int8_'
raw_bands_image_prefix = 'bands_int16_'
raw_image_file_type = '.tif'
extracted_file_type = '.png'
extracted_ndwi_filename = 'ndwi'
extracted_bands_filename = ['blue','green','red','swir']
extracted_annotation_filename = 'annotation'
type_num=6
ndwi_band = [0]# If raster has multiple channels, then bands will be [0, 1, ...] otherwise simply [0]
four_bands = [0,1,2,3]

In [3]:
#hbh: check whether the output dir(must be present) of each type is empty
for i in range(0,type_num):
    path_to_write=os.path.join(output_dir,'output'+str(i))
    assert os.path.exists(path_to_write)
    if not len(os.listdir(path_to_write))==0:
         print('Warning: path_to_write{} is not empty! The old files in the directory may not be overwritten!!'.format(i))



In [16]:
#Read the training area 、 training polygons
# trainingArea = gps.read_file(r"D:\lakemapping\2_dataset\sampleAnnotations\area")
# trainingPolygon = gps.read_file(r"D:\lakemapping\2_dataset\sampleAnnotations\polygon")
trainingArea = gps.read_file(r"/media/nkd/backup/5_lakemapping/sample600/sample/area")
trainingPolygon = gps.read_file(r"/media/nkd/backup/5_lakemapping/sample600/sample/polygon")

print(trainingPolygon.shape,trainingArea.shape)# area:id, geomerry;   polygon:id, geometry 
trainingPolygon
trainingArea
print(f'Read a total of {trainingPolygon.shape[0]} object polygons and {trainingArea.shape[0]} training areas.')
print(f'Polygons will be assigned to training areas in the next steps.')

(84192, 2) (200, 5)


Unnamed: 0,CLASS_NAME,geometry
0,Lake,"POLYGON Z ((-87.75822 75.51463 0.00000, -87.75..."
1,Lake,"POLYGON Z ((-87.65832 75.50690 0.00000, -87.65..."
2,Lake,"POLYGON Z ((-87.73765 75.49621 0.00000, -87.73..."
3,Lake,"POLYGON Z ((-87.68015 75.49181 0.00000, -87.68..."
4,Lake,"POLYGON Z ((-87.75031 75.48534 0.00000, -87.75..."
...,...,...
84187,Lake,"POLYGON Z ((112.29534 62.21211 0.00000, 112.29..."
84188,Lake,"POLYGON Z ((112.30298 62.20591 0.00000, 112.30..."
84189,Lake,"POLYGON Z ((112.17056 62.20896 0.00000, 112.17..."
84190,Lake,"POLYGON Z ((112.30145 62.20339 0.00000, 112.30..."


Unnamed: 0,Shape_Leng,Shape_Area,id,type,geometry
0,0.000000,84.266275,10198,2,"POLYGON Z ((-3.15840 51.46483 0.00000, -3.2822..."
1,0.000000,156.689881,10203,2,"POLYGON Z ((-3.99269 56.16025 0.00000, -3.7520..."
2,0.000000,417.358125,10228,2,"POLYGON Z ((-0.39148 52.69972 0.00000, -0.1020..."
3,0.000000,402.500829,10247,2,"POLYGON Z ((4.29607 51.33661 0.00000, 4.59752 ..."
4,0.000000,352.732846,10277,2,"POLYGON Z ((8.09904 53.23757 0.00000, 8.33101 ..."
...,...,...,...,...,...
195,0.000000,0.000000,15346,1,"POLYGON Z ((54.02269 72.97526 0.00000, 54.3949..."
196,0.000000,0.000000,15355,1,"POLYGON Z ((56.16821 70.93170 0.00000, 56.3962..."
197,0.000000,0.000000,15381,3,"POLYGON Z ((61.22978 75.61631 0.00000, 61.5317..."
198,0.000000,242.767959,2796,1,"POLYGON Z ((-16.56337 12.58014 0.00000, -16.44..."


Read a total of 84192 object polygons and 200 training areas.
Polygons will be assigned to training areas in the next steps.


In [13]:
# Check if the training areas and the training polygons have the same crs     
if trainingArea.crs  != trainingPolygon.crs:
    print('Training area CRS does not match training_polygon CRS')
    targetCRS = trainingPolygon.crs #Areas are less in number so conversion should be faster
    trainingArea = trainingArea.to_crs(targetCRS)
print(trainingPolygon.crs)
print(trainingArea.crs)
assert trainingPolygon.crs == trainingArea.crs

GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]


In [17]:
# As input we received two shapefile, first one contains the training areas/rectangles and other contains the polygon of lakes/objects in those training areas
# The first task is to determine the parent training area for each polygon.

def dividePolygonsInTrainingAreas(trainingPolygon, trainingArea):
    '''Assign annotated ploygons in to the training areas.'''
    # For efficiency, assigned polygons are removed from the list, we make a copy here. 
    cpTrainingPolygon = trainingPolygon.copy()
    splitPolygons = {}
    for i in tqdm(trainingArea.index):
        spTemp = [] 
        allocated = []
        print("area's index:",i)
        for j in cpTrainingPolygon.index:
            if cpTrainingPolygon.loc[j]['geometry'].intersects(trainingArea.loc[i]['geometry']):
                spTemp.append(cpTrainingPolygon.loc[j])
                allocated.append(j)      
        splitPolygons[i] = {'polygons':spTemp,'bounds':list(trainingArea.bounds.loc[i]),'id':trainingArea.loc[i]['id'] ,'type':trainingArea.loc[i]['type']}
        cpTrainingPolygon = cpTrainingPolygon.drop(allocated)#assigned polygons are removed from the list
    return splitPolygons

# areasWithPolygons contains the object polygons for each area!
areasWithPolygons = dividePolygonsInTrainingAreas(trainingPolygon, trainingArea)
print(f'Assigned training polygons in {len(areasWithPolygons)} training areas')

  0%|          | 0/200 [00:00<?, ?it/s]

area's index: 0
area's index: 1
area's index: 2
area's index: 3
area's index: 4
area's index: 5
area's index: 6
area's index: 7
area's index: 8
area's index: 9
area's index: 10
area's index: 11
area's index: 12
area's index: 13
area's index: 14
area's index: 15
area's index: 16
area's index: 17
area's index: 18
area's index: 19
area's index: 20
area's index: 21
area's index: 22
area's index: 23
area's index: 24
area's index: 25
area's index: 26
area's index: 27
area's index: 28
area's index: 29
area's index: 30
area's index: 31
area's index: 32
area's index: 33
area's index: 34
area's index: 35
area's index: 36
area's index: 37
area's index: 38
area's index: 39
area's index: 40
area's index: 41
area's index: 42
area's index: 43
area's index: 44
area's index: 45
area's index: 46
area's index: 47
area's index: 48
area's index: 49
area's index: 50
area's index: 51
area's index: 52
area's index: 53
area's index: 54
area's index: 55
area's index: 56
area's index: 57
area's index: 58
area's 

In [None]:
print(areasWithPolygons)

In [10]:
def extractAreasThatOverlapWithTrainingData(areaInfo, writePath):
    """Iterates over raw NDWI images and using findOverlap() extract areas that overlap with training data. 
    The overlapping areas in raw images are written in a separate file, and annotation file are created from polygons in the overlapping areas.
    """
    if not os.path.exists(writePath):
        os.makedirs(writePath)
        
    polygonsInAreaDf = gps.GeoDataFrame(areaInfo['polygons'])
    img_id=str(areaInfo['id'])
    bboxArea = box(*areaInfo['bounds'])

    #draw annotation png
    # polygons = []
    # for i in polygonsInAreaDf.index:
    #     gm = polygonsInAreaDf.loc[i]['geometry']
    #     polygons.append(gm)
                
    # with rasterio.open(os.path.join(writePath,extracted_annotation_filename+'_{}.png'.format(img_id)), 'w+', **profile) as out:
    #     out_arr = out.read(1)
    #     burned = features.rasterize(polygons, fill=0, default_value=1,out=out_arr, transform=out.transform)
    #     out.write_band(1, burned)

    #draw ndwi ong
    ndwiImg = rasterio.open(os.path.join(raw_image_base_dir,raw_ndwi_image_prefix+img_id+raw_image_file_type))  
    sm_ndwi = rasterio.mask.mask(ndwiImg, [bboxArea], all_touched=True, crop=True )
    profile_ndwi = ndwiImg.profile  
    profile_ndwi['height'] = sm_ndwi[0].shape[1]
    profile_ndwi['width'] = sm_ndwi[0].shape[2]
    profile_ndwi['transform'] = sm_ndwi[1]
        # That's a problem with rasterio, if the height and the width are less then 256 it throws: ValueError: blockysize exceeds raster height 
        # So set the blockxsize and blockysize to prevent this problem
    profile_ndwi['blockxsize'] = 32
    profile_ndwi['blockysize'] = 32
    profile_ndwi['count'] = 1
    profile_ndwi['dtype'] = rasterio.float32
    dt_ndwi = sm_ndwi[0][0].astype(profile_ndwi['dtype'])
    with rasterio.open(os.path.join(writePath, extracted_ndwi_filename+'_{}.png'.format(img_id)), 'w', **profile_ndwi) as dst:
        dst.write(dt_ndwi, 1) 

    #draw red green blue png
    # bandsImg = rasterio.open(os.path.join(raw_image_base_dir,raw_bands_image_prefix+img_id+raw_image_file_type))
    # sm_bands = rasterio.mask.mask(bandsImg, [bboxArea], all_touched=True, crop=True )
    # profile_bands = bandsImg.profile  
    # profile_bands['height'] = sm_bands[0].shape[1]
    # profile_bands['width'] = sm_bands[0].shape[2]
    # profile_bands['transform'] = sm_bands[1]
    #     # That's a problem with rasterio, if the height and the width are less then 256 it throws: ValueError: blockysize exceeds raster height 
    #     # So set the blockxsize and blockysize to prevent this problem
    # profile_bands['blockxsize'] = 32
    # profile_bands['blockysize'] = 32
    # profile_bands['count'] = len(four_bands)
    # profile_bands['dtype'] = rasterio.float32
    # for band, imFn in zip(four_bands, extracted_bands_filename):
    #     dt_bands = sm_bands[0][band].astype(profile['dtype'])
    #     with rasterio.open(os.path.join(writePath, imFn+'_{}.png'.format(img_id)), 'w', **profile_bands) as dst:
    #         dst.write(dt_bands, 1) 


In [18]:
for key,value in zip(areasWithPolygons.keys(),areasWithPolygons.values()):
    path_to_write=os.path.join(output_dir,'output{}'.format(value['type']))
    extractAreasThatOverlapWithTrainingData(value,path_to_write)

In [9]:
# Display extracted image 
sampleImage = '_55.png'
# path_to_write=os.path.join(training_base_dir,'output\output4')
path_to_write=os.path.join(training_base_dir,'output588\output0' )
fn = os.path.join(path_to_write, extracted_NDWI_filename + sampleImage)
NDWI_img = Image.open(fn)
read_NDWI_img = np.array(NDWI_img)

# redBands = os.path.join(path_to_write, 'red'+ sampleImage)
# red_img = Image.open(redBands)
# read_red_img = np.array(red_img)

greenBands = os.path.join(path_to_write, 'green'+ sampleImage)
green_img = Image.open(greenBands)
read_green_img = np.array(green_img)

# blueBands = os.path.join(path_to_write, 'blue'+ sampleImage)
# blue_img = Image.open(blueBands)
# read_blue_img = np.array(blue_img)

swirBands = os.path.join(path_to_write, 'swir'+ sampleImage)
swir_img = Image.open(swirBands)
read_swir_img = np.array(swir_img)
# print(read_NDWI_img.shape)
# print(read_Bands_img.shape)
annotation_im = Image.open(fn.replace(extracted_ndwi_filename ,config.extracted_annotation_filename))
read_annotation = np.array(annotation_im)
# print(read_annotation.shape)
# print(read_annotation)

all_images = np.array([read_NDWI_img,read_green_img,read_swir_img, read_annotation])#,read_red_img,read_blue_img
# print(all_images.shape[1])
display_images(np.expand_dims(np.transpose(all_images, axes=(1,2,0)), axis=0),['ndwi','green','swir','annotation'])

NameError: name 'training_base_dir' is not defined