# Deep Learning for Satellite Image Classification

## Milestone 3

In [1]:
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.layers import Concatenate, Conv2DTranspose
from tensorflow.keras.models import Model
from random import shuffle
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import numpy as np
import json, os, glob
import zipfile
import shutil
import rasterio

In [2]:
DATA_DIR = '/home/aki/ManningLiveProjects/2_Data/'
MODELS_DIR = '/home/aki/ManningLiveProjects/3_Models/'

shutil.rmtree(MODELS_DIR+'nwpu_images', ignore_errors=True)
shutil.rmtree(MODELS_DIR+'nwpu_label_images', ignore_errors=True)
os.mkdir(MODELS_DIR+'nwpu_label_images')
os.mkdir(MODELS_DIR+'nwpu_label_images'+os.sep+'data')

zipfile.ZipFile(DATA_DIR+'NWPU_images.zip', 'r').extractall(MODELS_DIR)
os.rename(MODELS_DIR+'images', MODELS_DIR+'nwpu_images')

subdirecs = [x[0] for x in os.walk(MODELS_DIR+'nwpu_images')][1:]
to_delete = [s for s in subdirecs if 'lake' not in s]
for k in to_delete:
    shutil.rmtree(k, ignore_errors=True) 
os.rename(MODELS_DIR+'nwpu_images'+os.sep+'lake',MODELS_DIR+'nwpu_images'+os.sep+'data')    

Load multiple VGG-JSON into a dict.  Dict key is the image file name.

In [3]:
vgg_json_files = ['nwpu_lakes_30samples.json', 'nwpu_lakes_20samplesA.json', 'nwpu_lakes_20samplesB.json']

image_files = {}
for f in vgg_json_files:
    image_files.update(json.load(open(DATA_DIR+'nwpu_labels/'+f)))

Add image data to dict.

In [4]:
for k, v in image_files.items():
    img_data = rasterio.open(MODELS_DIR+'nwpu_images/data/'+k)
    v['img_data'] = img_data.read().T
    image_files[k] = v
    

  s.start()


At this point image_files dict contains VGG_JSON (i.e. label) and image data.   This way you don't have to maintain two arrays with matching order like in the example.

In [5]:
def get_region_data(regions):
    X = []; Y = [] #pre-allocate lists to fill in a for loop
    for k in regions: #cycle through each polygon
        # get the x and y points from the dictionary
        X.append(regions[k]['shape_attributes']['all_points_x'])
        Y.append(regions[k]['shape_attributes']['all_points_y'])
    return Y,X #image coordinates are flipped relative to json coordinates


def write_mask(filename, filedata):
    regions = filedata['regions']
    X, Y = get_region_data(regions)
    
    # get the dimensions of the image
    image_data = filedata['img_data']
    nx, ny, nz = np.shape(image_data)
    mask = np.zeros((ny,nx))
    
    for x,y in zip(X,Y):
        # the ImageDraw.Draw().polygon function we will use to create the mask
        # requires the x's and y's are interweaved, which is what the following
        # one-liner does    
        polygon = np.vstack((x,y)).reshape((-1,),order='F').tolist()
        
        # create a mask image of the right size and infill according to the polygon
        if nx>ny:
            x,y = y,x 
            img = Image.new('L', (ny, nx), 0)
        elif ny>nx:
            x,y = y,x 
            img = Image.new('L', (ny, nx), 0)            
        else:
            img = Image.new('L', (nx, ny), 0)
        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        # turn into a numpy array
        m = np.flipud(np.rot90(np.array(img)))
        try:
            mask = mask + m
        except:
            mask = mask + m.T
    matplotlib.image.imsave(MODELS_DIR+'nwpu_label_images/data/'+filename+"_mask.jpg", mask.astype('uint8'))  

In [6]:
for filename, filedata in image_files.items():
    write_mask(filename, filedata)

In [7]:
def image_batch_generator(files, batch_size = 32, sz = (512, 512)):
  
  while True: # this is here because it will be called repeatedly by the training function
    
    #extract a random subset of files of length "batch_size"
    batch = np.random.choice(files, size = batch_size)    
    
    #variables for collecting batches of inputs (x) and outputs (y)
    batch_x = []
    batch_y = []
    
    #cycle through each image in the batch
    for f in batch:

        #preprocess the raw images 
        rawfile = f'nwpu_images/data/{f}'
        raw = Image.open(rawfile)
        raw = raw.resize(sz)
        raw = np.array(raw)

        #check the number of channels because some of the images are RGBA or GRAY
        if len(raw.shape) == 2:
            raw = np.stack((raw,)*3, axis=-1)
        else:
            raw = raw[:,:,0:3]
            
        #get the image dimensions, find the min dimension, then square the image off    
        nx, ny, nz = np.shape(raw)
        n = np.minimum(nx,ny)
        raw = raw[:n,:n,:] 
            
        batch_x.append(raw)
        
        #get the masks. 
        maskfile = rawfile.replace('nwpu_images','nwpu_label_images')+'_mask.jpg'
        mask = Image.open(maskfile)
        # the mask is 3-dimensional so get the max in each channel to flatten to 2D
        mask = np.max(np.array(mask.resize(sz)),axis=2)
        # water pixels are always greater than 100
        mask = (mask>100).astype('int')
        
        mask = mask[:n,:n]

        batch_y.append(mask)

    #preprocess a batch of images and masks 
    batch_x = np.array(batch_x)/255. #divide image by 255 to normalize
    batch_y = np.array(batch_y)
    batch_y = np.expand_dims(batch_y,3) #add singleton dimension to batch_y

    yield (batch_x, batch_y) #yield both the image and t

In [8]:
batch_size = 4
prop_train = 0.5

file_names = list(image_files.keys())
split = int(prop_train * len(file_names))

#split into training and testing
train_files = file_names[0:split]
test_files  = file_names[split:]

train_generator = image_batch_generator(train_files, batch_size = batch_size)
test_generator  = image_batch_generator(test_files, batch_size = batch_size)

In [9]:
batch_size = 1

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,
        shear_range=0,
        zoom_range=0,
        rotation_range=90,
        horizontal_flip=True)

img_generator = train_datagen.flow_from_directory(
        MODELS_DIR+'nwpu_images',
        target_size=(128, 128),
        batch_size=batch_size,
        class_mode=None, seed=42, shuffle=False)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,    
        shear_range=0,
        zoom_range=0,
        rotation_range=90,
        horizontal_flip=True)

mask_generator = test_datagen.flow_from_directory(
        MODELS_DIR+'nwpu_label_images',
        target_size=(128, 128),
        batch_size=batch_size,
        class_mode=None, seed=42, shuffle=False)

train_generator = (pair for pair in zip(img_generator, mask_generator))

Found 700 images belonging to 1 classes.
Found 70 images belonging to 1 classes.
