<table style="font-size: 1em; padding: 0; margin: 0;">
<p style="border: 1px solid #ff5733; border-left: 15px solid #ff5733; padding: 10px; text-align:justify;">
    <strong style="color: #ff5733">Deliverable</strong>  
    <br/>The deliverable for Part 2 is a jupyter notebook showing a workflow to create test and training datasets, consisting of folders of imagery and corresponding label imagery, ready for training a semantic segmentation model in keras. This will mostly test your understanding the generic workflow for preparing a dataset to train and test a deep learning model, which is an essential component of the remaining Parts.
    </p>

##### NWPU image labels

To get you started on the NWPU lakes dataset for model training, three files are provided with labels are provided for a total of 70 lake images (10% of the total number). These files are ```nwpu_labels\nwpu_lakes_30samples.json```, ```nwpu_labels\nwpu_lakes_20samplesA.json```, and ```nwpu_labels\nwpu_lakes_20samplesB.json```. You may use these as, respectively, train, test and validation sets, or alternatively in any way you choose. You are strongly recommended to create more of your own label images. Deep neural networks tend to work well when they have lots (and lots!) of examples to learn from.

In [1]:
import json, os
import numpy as np
import rasterio
import matplotlib
import matplotlib.pyplot as plt
# import some drawing tools from PIL
from PIL import Image, ImageDraw
%matplotlib inline

In [2]:
# Funcion : prepara los datos, extrayendo las coordenadas de todos los poligonos desde un arreglo desde un json
def preparar_datos(data, images, i):
    X = []; Y = [] #pre-allocate lists to fill in a for loop
    for k in data[images[i]]['regions']: #cycle through each polygon
        # get the x and y points from the dictionary
        X.append(data[images[i]]['regions'][k]['shape_attributes']['all_points_x'])
        Y.append(data[images[i]]['regions'][k]['shape_attributes']['all_points_y'])
    # make a plot of the image, create a title (the image filename)    
    #plt.imshow(all_images[i])
    #plt.title(images[i])
    # plot each polygon and turn the axes off
    #for k in range(len(X)):
    #    plt.plot(Y[k],X[k])
    #plt.axis('off')
    snombre=images[i]
    return Y,X,snombre #image coordinates are flipped (volteadas) relative to json coordinates

In [3]:
# Funcion : genera la mascara a partir de las coordenadas extraida de un json
# y genera una mascara de tamaño similar a imagen asociada 
def genera_mascara(all_images,X,Y,i):
    # get the dimensions of the image
    nx, ny, nz = np.shape(all_images[i])
    mask = np.zeros((ny,nx))
    
    for x,y in zip(X,Y):
        # the ImageDraw.Draw().polygon function we will use to create the mask
        # requires the x's and y's are interweaved-entrelazados, which is what the following
        # one-liner does    
        polygon = np.vstack((x,y)).reshape((-1,),order='F').tolist()

        # create a mask image of the right size and infill according to the polygon
        if nx>ny:
            x,y = y,x 
            img = Image.new('L', (ny, nx), 0)
        #elif ny>nx:
        #    x,y = y,x 
        #    img = Image.new('L', (ny, nx), 0)            
        else:
            img = Image.new('L', (nx, ny), 0)

        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        # turn into a numpy array
        m = np.array(img)
        mask = mask + m
    return mask

## Archivo : nwpu_labels\nwpu_lakes_30samples.json

In [4]:
data = json.load(open('nwpu_labels'+os.sep+'nwpu_lakes_30samples.json'))
images = sorted(data.keys())
print(images)
print(len(images))

['lake_001.jpg', 'lake_002.jpg', 'lake_003.jpg', 'lake_004.jpg', 'lake_005.jpg', 'lake_006.jpg', 'lake_007.jpg', 'lake_008.jpg', 'lake_009.jpg', 'lake_010.jpg', 'lake_011.jpg', 'lake_012.jpg', 'lake_013.jpg', 'lake_014.jpg', 'lake_015.jpg', 'lake_016.jpg', 'lake_017.jpg', 'lake_018.jpg', 'lake_019.jpg', 'lake_020.jpg', 'lake_642.jpg', 'lake_648.jpg', 'lake_660.jpg', 'lake_678.jpg', 'lake_681.jpg', 'lake_682.jpg', 'lake_690.jpg', 'lake_693.jpg', 'lake_694.jpg', 'lake_695.jpg']
30


In [5]:
all_images = []
for image in images:
    with rasterio.open('nwpu_images'+os.sep+'data'+os.sep+image) as dataset:
        all_images.append(dataset.read().T)

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


In [6]:
sup=len(images)
for i in range(0,sup):
    X,Y,stitle = preparar_datos(data,images,i)
    #print(stitle)
    mascara = genera_mascara(all_images,X,Y,i)
    # Graba la mascara en una carpeta con el nombre de la imagen asociada
    matplotlib.image.imsave('nwpu_mask'+os.sep+stitle, mascara.astype('uint8'))

## Archivo : nwpu_labels\nwpu_lakes_20samplesA.json

In [7]:
data2A = json.load(open('nwpu_labels'+os.sep+'nwpu_lakes_20samplesA.json'))
images2A = sorted(data2A.keys())
print(images2A)
print(len(images2A))

['lake_021.jpg', 'lake_022.jpg', 'lake_023.jpg', 'lake_024.jpg', 'lake_025.jpg', 'lake_026.jpg', 'lake_028.jpg', 'lake_029.jpg', 'lake_030.jpg', 'lake_031.jpg', 'lake_032.jpg', 'lake_033.jpg', 'lake_034.jpg', 'lake_035.jpg', 'lake_036.jpg', 'lake_037.jpg', 'lake_038.jpg', 'lake_039.jpg', 'lake_040.jpg', 'lake_041.jpg']
20


In [8]:
all_images2A = []
for image in images2A:
    with rasterio.open('nwpu_images'+os.sep+'data'+os.sep+image) as dataset:
        all_images2A.append(dataset.read().T)

In [9]:
sup2A=len(images2A)
for i in range(0,sup2A):
    X,Y,stitle = preparar_datos(data2A,images2A,i)
    #print(stitle)
    mascara = genera_mascara(all_images2A,X,Y,i)
    # Graba la mascara en una carpeta con el nombre de la imagen asociada
    matplotlib.image.imsave('nwpu_mask'+os.sep+stitle, mascara.astype('uint8'))

## Archivo : nwpu_labels\nwpu_lakes_20samplesB.json

In [10]:
data2B = json.load(open('nwpu_labels'+os.sep+'nwpu_lakes_20samplesB.json'))
images2B = sorted(data2B.keys())
print(images2B)
print(len(images2B))

['lake_042.jpg', 'lake_043.jpg', 'lake_044.jpg', 'lake_045.jpg', 'lake_046.jpg', 'lake_047.jpg', 'lake_048.jpg', 'lake_049.jpg', 'lake_050.jpg', 'lake_051.jpg', 'lake_052.jpg', 'lake_053.jpg', 'lake_054.jpg', 'lake_055.jpg', 'lake_056.jpg', 'lake_057.jpg', 'lake_058.jpg', 'lake_059.jpg', 'lake_060.jpg', 'lake_061.jpg']
20


In [11]:
all_images2B = []
for image in images2B:
    with rasterio.open('nwpu_images'+os.sep+'data'+os.sep+image) as dataset:
        all_images2B.append(dataset.read().T)

In [12]:
sup2B=len(images2B)
for i in range(0,sup2B):
    X,Y,stitle = preparar_datos(data2B,images2B,i)
    #print(stitle)
    mascara = genera_mascara(all_images2B,X,Y,i)
    # Graba la mascara en una carpeta con el nombre de la imagen asociada
    matplotlib.image.imsave('nwpu_mask'+os.sep+stitle, mascara.astype('uint8'))