In [1]:
import cv2 as cv
import keras
import keras.applications as apps
import numpy as np
import os
import densenet
import densenet_noise
import glob
import tqdm
import matplotlib.pyplot as plt
import matplotlib
import shutil
import random
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Network constants
TILE_SIZE = 221
INPUT_CHANNELS = 2 # HH, HV
CLASSES = 1 # regression
RANDOM_SEED = 1 # for consistency in test/train split
TRAINING_PATH = 'training_data/20*'

### Three networks used

In [7]:
# Network 1: No modifications, simple densenet
normal_net = densenet.DenseNet121(include_top=True,
                      weights=None,
                      input_shape=(TILE_SIZE, TILE_SIZE, INPUT_CHANNELS),
                      pooling=None,
                      classes=CLASSES)
# Network 2: Densenet with initial gaussian noise layrer 
noise_net = densenet_noise.DenseNet121(include_top=True,
                      weights=None,
                      input_shape=(TILE_SIZE, TILE_SIZE, INPUT_CHANNELS),
                      pooling=None,
                      classes=CLASSES)
# Network 3: Normal Densenet but using weights from network 2 initially
net = densenet.DenseNet121(include_top=True,
                      weights=None,
                      input_shape=(TILE_SIZE, TILE_SIZE, INPUT_CHANNELS),
                      pooling=None,
                      classes=CLASSES)

  str(input_shape[-1]) + ' input channels.')


In [4]:
# Utility methods to process/load images
def is_data_valid(folder_path):
    valid = os.path.exists(os.path.join(folder_path, 'imagery_HH.tif'))
    valid = valid and os.path.exists(os.path.join(folder_path, 'imagery_HV.tif')) 
    valid = valid and os.path.exists(os.path.join(folder_path, 'conc.tiff'))
    return valid

def read_data(folder):
    hh_image = cv.imread(os.path.join(folder, 'imagery_HH.tif'), cv.IMREAD_GRAYSCALE)
    hv_image = cv.imread(os.path.join(folder, 'imagery_HV.tif'), cv.IMREAD_GRAYSCALE)
    conc_image = cv.imread(os.path.join(folder, 'conc.tiff'), cv.IMREAD_GRAYSCALE)
    return hh_image, hv_image, conc_image

def tile_image(HH, HV, conc, tile_size=221):
    # This function assumes all images are the same shape
    tile_center = tile_size//2
    amount_x = (HH.shape[0]//tile_size) -1
    amount_y = (HH.shape[1]//tile_size) -1
    sar_tiles = []
    conc_tiles = []
    for i in range(amount_x):
        for j in range(amount_y):
            x_bounds = [tile_size*i, tile_size*(i+1)]
            y_bounds = [tile_size*j, tile_size*(j+1)]
            conc_tile = conc[x_bounds[0]:x_bounds[1], y_bounds[0]:y_bounds[1]]
            # check if the center of the tile is land or not
            if conc_tile[tile_center, tile_center] != 255:
                # set all values within the SAR image to by 255 if that part of the image is land
                HH_tile = np.where(conc_tile == 255,255,HH[x_bounds[0]:x_bounds[1],y_bounds[0]:y_bounds[1]])
                HV_tile = np.where(conc_tile == 255, 255, HV[x_bounds[0]:x_bounds[1],y_bounds[0]:y_bounds[1]])
                tile = np.stack([HH_tile, HV_tile],axis=-1)
                sar_tiles.append(tile)
                conc_tiles.append([conc_tile[tile_center, tile_center]])
    sar_tiles = np.asarray(sar_tiles).astype(np.float32)/255
    conc_tiles = np.asarray(conc_tiles).astype(np.float32)/100
    
    return sar_tiles, conc_tiles

def gen_tile_data(folder_name, images, labels, pos, dry_run=False, multiplier=1):
    amount = 0
    try:
        SHIFT = TILE_SIZE//multiplier
        hh_image, hv_image, conc_image = read_data(folder_name)
        conc_image_big = cv.resize(conc_image, hh_image.shape[0:2][::-1])
        for _ in range(multiplier):
            hh_image = hh_image[SHIFT:, SHIFT:]
            hv_image = hv_image[SHIFT:, SHIFT:]
            conc_image_big = conc_image_big[SHIFT:, SHIFT:]
            im_tiles, c_tiles = tile_image(hh_image, hv_image, conc_image_big)
            if not dry_run:
                images[pos+amount:pos + amount+ len(im_tiles)] = im_tiles
                labels[pos+amount:pos + amount + len(c_tiles)] = c_tiles
            amount = amount + len(im_tiles)
    except:
        print(folder_name)
    if dry_run:
        return amount
    return images, labels, pos + amount

Generating the test/train split by randomly selecting a series of image folders for each. Images are kept independent from each other

In [5]:
folders = [f for f in glob.glob(os.path.join('training_data/20*')) if is_data_valid(f)]
random.seed(RANDOM_SEED)
rand_folders = sorted(folders, key=lambda f: random.random())
# 17% test/train split (roughly as using independent images)
train_amount = len(rand_folders)//6
training_folders = rand_folders[:-train_amount]
testing_folders = rand_folders[-train_amount:]

In [9]:
len(training_folders)

23

In [10]:
testing_folders

['training_data/20110405',
 'training_data/20101009B',
 'training_data/20110217',
 'training_data/20110223']

## Allocating arrays pre-emptively to save space

In [10]:
train_length = 0
for k, folder in enumerate(tqdm.tqdm(training_folders)):
    train_length += gen_tile_data(folder, None, None, pos=0, dry_run=True)
test_length = 0
for k, folder in enumerate(tqdm.tqdm(testing_folders)):
    test_length += gen_tile_data(folder, None, None, pos=0, dry_run=True)
    
training_images = np.zeros((train_length, TILE_SIZE, TILE_SIZE, 2), dtype=np.float32)
training_labels = np.zeros((train_length, 1), dtype=np.float32)

testing_images = np.zeros((test_length, TILE_SIZE, TILE_SIZE, 2), dtype=np.float32)
testing_labels = np.zeros((test_length, 1), dtype=np.float32)

 13%|█▎        | 3/23 [00:04<00:27,  1.38s/it]

training_data/20110717B


 74%|███████▍  | 17/23 [00:27<00:09,  1.60s/it]

training_data/20110214


100%|██████████| 23/23 [00:38<00:00,  1.67s/it]
100%|██████████| 4/4 [00:07<00:00,  1.97s/it]


In [None]:
# ALTERNATE
if False:
    length = 0
    for k, folder in enumerate(tqdm.tqdm(training_folders + testing_folders)):
        length += gen_tile_data(folder, None, None, pos=0, dry_run=True)
    images = np.zeros((length, TILE_SIZE, TILE_SIZE, 2), dtype=np.float32)
    labels = np.zeros((length, 1), dtype=np.float32)

    pos = 0
    for k, folder in enumerate(tqdm.tqdm(training_folders + testing_folders)):
        images, labels, pos = gen_tile_data(folder, images, labels, pos)
    training_length = int(length - length//7)
    testing_images = images[training_length:]
    testing_labels = labels[training_length:]
    
    images = images[:training_length]
    labels = labels[:training_length]
    training_images = images
    training_labels = labels
#     pos = 0
#     for k, folder in enumerate(tqdm.tqdm(testing_folders)):
#         testing_images, testing_labels, pos = gen_tile_data(folder, testing_images, testing_labels, pos)
# training_images = np.zeros((train_length, TILE_SIZE, TILE_SIZE, 2), dtype=np.float32)
# training_labels = np.zeros((train_length, 1), dtype=np.float32)
# 
# testing_images = np.zeros((test_length, TILE_SIZE, TILE_SIZE, 2), dtype=np.float32)
# testing_labels = np.zeros((test_length, 1), dtype=np.float32)

Generating the training data from the images and their corresponding concentration labels

In [11]:
pos = 0
for k, folder in enumerate(tqdm.tqdm(training_folders)):
    training_images, training_labels, pos = gen_tile_data(folder, training_images, training_labels, pos)

pos = 0
for k, folder in enumerate(tqdm.tqdm(testing_folders)):
    testing_images, testing_labels, pos = gen_tile_data(folder, testing_images, testing_labels, pos)

 13%|█▎        | 3/23 [00:02<00:18,  1.08it/s]

training_data/20110717B


 74%|███████▍  | 17/23 [00:19<00:06,  1.16s/it]

training_data/20110214


100%|██████████| 23/23 [00:28<00:00,  1.23s/it]
100%|██████████| 4/4 [00:06<00:00,  1.56s/it]


In [None]:
# mixing the training data

### Training

Network #1

In [12]:
optimizer = keras.optimizers.Adam(lr=0.001)
loss = 'mean_squared_error'
normal_net.compile(optimizer, loss=loss)

In [9]:
checkpointer = ModelCheckpoint(filepath='weights/normal_checkpoint.hdf5', verbose=1, save_best_only=True)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1),
                                    cooldown=0, patience=3, min_lr=1e-5)

In [10]:
normal_net.load_weights('weights/normal_checkpoint_R.hdf5')

OSError: Unable to open file (Bad object header version number)

In [11]:
hist_loss_normal = []
val_loss_normal = []

In [None]:
# Using loop to save loss data between epochs
for _ in range(25):
    hist = normal_net.fit(x=training_images, y=training_labels, epochs=1, validation_data=(testing_images, testing_labels), callbacks=[checkpointer, lr_reducer])
    hist_loss_normal += hist.history['loss']
    val_loss_normal += hist.history['val_loss']

Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 samples, validate on 5190 samples
Epoch 1/1

Epoch 00001: val_loss did not improve
Train on 20385 sampl

In [19]:
# TODO: tune hyper params
optimizer = keras.optimizers.Adam(lr=0.003)
# metrics = ['accuracy']
loss = 'mean_absolute_error'
# loss = 'mean_squared_error'

noise_net.compile(optimizer, loss=loss)


In [18]:
checkpointer = ModelCheckpoint(filepath='weights/noise_checkpoint_FINAL.hdf5', verbose=1, save_best_only=True)
lr_reducer = ReduceLROnPlateau(monitor='loss', factor=np.sqrt(0.1),
                                    cooldown=0, patience=3, min_lr=1e-5)

In [14]:
hist_loss_noise = []
val_loss_noise = []

Due to the server re-setting it was necessary to reload the model several times from a checkpoint

In [17]:
noise_net.load_weights('weights/noise_checkpoint_MORE1.hdf5')

In [24]:
hist = noise_net.fit(x=training_images, y=training_labels, epochs=25, validation_data=(testing_images, testing_labels), callbacks=[checkpointer, lr_reducer])
hist_loss_noise += hist.history['loss']
val_loss_noise += hist.history['val_loss']

Train on 20385 samples, validate on 5190 samples
Epoch 1/25

Epoch 00001: val_loss did not improve
Epoch 2/25

Epoch 00002: val_loss did not improve
Epoch 3/25

Epoch 00003: val_loss did not improve
Epoch 4/25

Epoch 00004: val_loss did not improve
Epoch 5/25

Epoch 00005: val_loss did not improve
Epoch 6/25
  896/20385 [>.............................] - ETA: 4:21 - loss: 0.1399

KeyboardInterrupt: 

In [22]:
keras.backend.set_value(optimizer.lr,0.003/np.sqrt(10))

In [21]:
keras.backend.get_value(optimizer.lr)

0.003

In [25]:
noise_net.save_weights('weights/noise_FINAL.hdf5')

In [26]:
net.load_weights('weights/noise_FINAL.hdf5')

In [27]:
checkpointer = ModelCheckpoint(filepath='weights/net_checkpoint_FINAL.hdf5', verbose=1, save_best_only=True)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1),
                                    cooldown=0, patience=3, min_lr=1e-5)

In [28]:
# TODO: tune hyper params
optimizer = keras.optimizers.Adam(lr=0.001)
# metrics = ['accuracy']
loss = 'mean_absolute_error'
# loss = 'mean_squared_error'

net.compile(optimizer, loss=loss)


In [None]:
hist = net.fit(x=training_images, y=training_labels, epochs=15, validation_data=(testing_images, testing_labels), callbacks=[checkpointer, lr_reducer])
hist_loss_noise += hist.history['loss']
val_loss_noise += hist.history['val_loss']

Train on 20385 samples, validate on 5190 samples
Epoch 1/15

In [37]:
keras.backend.set_value(optimizer.lr,0.00005 )

In [32]:
keras.backend.get_value(optimizer.lr)

0.0005

In [None]:
fig = plt.figure()
plt.plot(val_loss_noise, label='test')
plt.plot(hist_loss_noise, label='train')
plt.ylabel('MAE')
plt.xlabel('epoch')
plt.legend()
fig.get_axes()[0].set_ylim(0, 0.2)
plt.grid()

fig = plt.figure()
plt.plot(np.power(val_loss_noise,2), label='test')
plt.plot(np.power(hist_loss_noise,2), label='train')
plt.ylabel('SMAE')
plt.xlabel('epoch')
plt.legend()
fig.get_axes()[0].set_ylim(0, 0.2)
plt.grid()

In [18]:
shutil.copy2('weights/noise_checkpoint.hdf5', 'weights/noisy_SAR_DENSE.h5')

'weights/noisy_SAR_DENSE.h5'

In [19]:
os.listdir('weights/')

['noise_weights_absssss.hdf5',
 'weights.hdf5',
 'noise_weights_undo.hdf5',
 'noise_weights_abs2222.hdf5',
 'densenet_reset_v1.h5',
 'noise_weights.hdf5',
 'noise_weights_rand1.hdf5',
 'noise_weights_abs222244.hdf5',
 'noise_weights_abs.hdf5',
 'noisy_SAR_DENSE.h5',
 'noise_checkpoint.hdf5',
 'noise_weights_undo_2.hdf5',
 'noise_weights_temp.hdf5',
 'noise_weights_abs22.hdf5']

In [None]:
import keras.backend as K

In [None]:
print('x')

In [None]:
 K.get_value(net.optimizer.lr)

In [None]:
K.set_value(net.optimizer.lr, K.get_value(net.optimizer.lr)/5)

In [None]:
net_noise.load_weights('check.h5')

In [None]:
print('h')

In [None]:
val_loss += hist.history['val_loss']

In [None]:
hist_loss

In [None]:
hist = net.fit(x=training_images, y=training_labels, epochs=1, batch_size=32, validation_data=(testing_images, testing_labels))
hist_loss += hist.history['loss']

In [None]:
for i in range(3):
    hist = net.fit(x=training_images, y=training_labels, epochs=3, validation_data=(testing_images, testing_labels))
    hist_loss += hist.history['loss']

In [None]:
fig = plt.figure()
plt.plot(val_loss_noise, label='test')
plt.plot(hist_loss_noise, label='train')
plt.ylabel('MAE')
plt.xlabel('epoch')
plt.legend()
# fig.get_axes()[0].set_ylim(0, 0.1)
plt.grid()

fig = plt.figure()
plt.plot(np.power(val_loss_noise,2), label='test')
plt.plot(np.power(hist_loss_noise,2), label='train')
plt.ylabel('SMAE')
plt.xlabel('epoch')
plt.legend()
# fig.get_axes()[0].set_ylim(0, 0.1)
plt.grid()

In [None]:
plt.plot(training_labels.flatten())

In [None]:
hh_image, hv_image, conc_image = read_data(testing_folders[2])
conc_image_big = cv.resize(conc_image, hh_image.shape[0:2][::-1])


In [None]:
del hh_image
del hv_image
del conc_image

In [None]:
del training_images

In [None]:
for test_folder in testing_folders:
    hh_image, hv_image, conc_image = read_data(test_folder)
    conc_image_big = cv.resize(conc_image, hh_image.shape[0:2][::-1])
    conc = predict_image_fine(hh_image, hv_image, conc_image_big, net)
    image_name = os.path.basename(test_folder) + ".tiff"
    cv.imwrite(image_name, conc)
    print(image_name)

In [None]:
conc = predict_image_fine(hh_image, hv_image, conc_image_big, net)

In [None]:
n = matplotlib.colors.Normalize(vmin=0.,vmax=1.)

In [None]:
# plt.figure(figsize=(11,11))
plt.imshow(hh_image)

In [None]:
conc.shape

In [None]:
plt.imshow(conc_image)
plt.colorbar()

In [None]:
plt.imshow(conc, norm=n)
plt.colorbar()

In [None]:
testing_folders

In [None]:
plt.imshow(conc_image)
plt.colorbar()

In [None]:
import matplotlib

In [None]:
plt.imshow(conc,norm=n)
plt.colorbar()

In [None]:
plt.imshow(conc, norm=n)
plt.colorbar()

In [None]:
conc_test = conc.copy()

In [None]:
conc_re = cv.resize(conc_test, conc_image.shape[0:2][::-1])

In [None]:
conc_re.dtype

In [None]:
plt.imshow(conc_re)
plt.colorbar()

In [None]:
mask = conc_image == 255

In [None]:
masked_conc = np.ma.masked_array(conc_image, mask)

In [None]:
plt.imshow(masked_conc/100, norm=n)
plt.colorbar()

In [None]:
plt.imshow(conc_image - mask)

In [None]:
plt.imshow(conc_image)
plt.colorbar()

In [None]:
test = net.predict(training_images[1000:1001])

In [None]:
test

In [None]:
plt.imshow(hh_image)

In [None]:
net.save_weights('densenet_reset_v2.h5')

In [None]:
net.load_weights('densenet_reset_v1.h5')

In [None]:
# after we have saved the network we want to predict an image and compare it to the result


In [None]:
plt.plot(hist.history['loss'])

In [None]:
plt.imshow(training_images[100,:,:,0])

In [None]:
plt

In [None]:
hist.history.keys()