In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Activation, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, AveragePooling2D
from tensorflow.keras.layers import Reshape, Conv2DTranspose, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

import matplotlib.pyplot as plt
import pandas as pd
import os
from PIL import Image
from sklearn.cluster import KMeans
from skimage.color import rgb2hsv, rgb2lab
from minisom import MiniSom
from matplotlib.gridspec import GridSpec
from collections import defaultdict, Counter
from pylab import pcolor

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024 * 8.5)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

## Pet Image Dataset

In [None]:
# load data
pet_dir = './pet_images/'

pets_df = pd.DataFrame(columns=['img', 'img_mask'])

k = 0
# load image data
for filename in sorted(os.listdir(pet_dir)):
    if filename.endswith('.jpg'):
        pets_df.loc[k, 'img'] = np.asarray(Image.open(pet_dir + filename).resize((160, 240)))
    else:
        continue
    k = k+1  

In [None]:
# load data
pet_dir1 = './trimaps/'

k = 0
# load ground truth data
for filename in sorted(os.listdir(pet_dir1)):
    if filename.startswith('._'):
        continue  
    elif filename.endswith(".png"):
        pets_df.loc[k, 'img_mask'] = np.asarray(Image.open(pet_dir1 + filename).resize((160, 240)))
    else:
        continue
    k = k+1

In [None]:
pets_df.head()

In [None]:
pet, mask = pets_df['img'].to_numpy(), pets_df['img_mask'].to_numpy()

In [None]:
# check if all images have same shape
for i in range(pet.shape[0]):
    if pet[i].shape != (240, 160, 3):
        print(i, pet[i].shape)

In [None]:
# remove images with wrong shape
pet = np.delete(pet, (136, 148, 1033, 1042, 1043, 1049, 1070, 1079, 1089, 1095, 6899, 6905), axis=0)
mask = np.delete(mask, (136, 148, 1033, 1042, 1043, 1049, 1070, 1079, 1089, 1095, 6899, 6905), axis=0)

In [None]:
# get data in right format and normalize
f = []
for i in pet:
    f.append(i)
pet_feats = np.asarray(f)
pet_feats = np.reshape(pet_feats, [-1, 240, 160, 3])
pet_feats = pet_feats.astype('float32') / 255
pet_feats.shape

In [None]:
# get random image function
def get_rand_images(original, others):
    
    idx = np.random.randint(0, original.shape[0], 8)
    images = []
    for ind in idx:
        images.append(original[ind])
        images.append(others[ind])
    plt.figure(figsize=(64, 64))
    num_images = 16
    rows = 4
    for i in range(num_images):
        plt.subplot(rows+1, rows+1, i+1)
        plt.imshow(images[i])
        plt.axis('off')        

In [None]:
# original and ground truth mask
get_rand_images(pet_feats, mask)

In [None]:
## Save pets dataset as numpy array

np.save('pets', pet_feats)

# KMeans Segmentation

In [None]:
# comment out if data loaded
pets = np.load('pets.npy')

In [None]:
pets.shape

In [None]:
# get hsv and lab conversions for data
hsv = np.zeros((7378, 240, 160, 3))
lab = np.zeros((7378, 240, 160, 3))

for i in range(pets.shape[0]):
    hsv[i] = rgb2hsv(pets[i])
    lab[i] = rgb2lab(pets[i])

In [None]:
idx = np.random.randint(0, pets.shape[0], 8)
images = []
for ind in idx:
    images.append(pets[ind])
    images.append(hsv[ind])
    images.append(lab[ind])
plt.figure(figsize=(64, 64))
num_images = 16
rows = 4
for i in range(num_images):
    plt.subplot(rows+1, rows+1, i+1)
    plt.imshow(images[i])
    plt.axis('off')        

In [None]:
# load all data in one array
z = np.zeros((7378, 240, 160, 9))

for i in range(z.shape[0]):
    z[i] = np.dstack((pet_feats[i], hsv[i], lab[i]))

z.shape

In [None]:
# reshape
z = np.reshape(z, [7378, 38400, 9])
z.shape

In [None]:
# run kmeans
labels = np.zeros((7378, 38400))
kmeans = KMeans(random_state=42, init='k-means++', n_init=10, n_clusters=2, max_iter=1000)
for i in range(z.shape[0]):
    labels[i] = kmeans.fit_predict(z[i])
    if i % 500 == 0:
        print(i)

In [None]:
labels.shape

In [None]:
# reshape kmeans output 
labels = np.reshape(labels, [-1, 240, 160])
labels.shape

In [None]:
## Save kmeans mask images in numpy array

np.save('pets_kmeans1', labels)

In [None]:
get_rand_images(pets, labels)

## Autoencoder and Kmeans

In [None]:
pet_feats = np.load('pets.npy')

In [None]:
pet_feats = np.reshape(pet_feats, [-1, 240, 160, 3])
pet_feats.shape

In [None]:
# network parameters
input_shape = (240, 160, 3)
batch_size = 16
kernel_size = 9
latent_dim = 250

In [None]:
# encoder/decoder number of CNN layers and filters per layer
layer_filters = [64, 128, 256, 512]

# first build the encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for filters in layer_filters:
    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               activation='relu',
               kernel_initializer = 'he_normal',
               strides=1,
               padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2)(x)

shape = K.int_shape(x)

# generate latent vector
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)

# instantiate encoder model
encoder = Model(inputs,
                latent,
                name='encoder')
encoder.summary()

In [None]:
# build the decoder model
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')

x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

# stack of Conv2DTranspose(64)-Conv2DTranspose(32)
for filters in layer_filters[::-1]:
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        activation='relu',
                        kernel_initializer = 'he_normal',
                        strides=2,
                        padding='same')(x)

# reconstruct the input
outputs = Conv2DTranspose(filters=3,
                          kernel_size=1,
                          activation='sigmoid',
                          padding='same',
                          kernel_initializer = 'he_normal',
                          name='decoder_output')(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

In [None]:
# autoencoder = encoder + decoder
# instantiate autoencoder model
autoencoder = Model(inputs,
                    decoder(encoder(inputs)),
                    name='autoencoder')
autoencoder.summary()

# Mean Square Error (MSE) loss function, adagrad optimizer
autoencoder.compile(loss='mse', optimizer='adagrad')

In [None]:
checkpoint_path = "pet_autoencoder.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
# train the autoencoder
autoencoder.fit(pet_feats,
                pet_feats,
                epochs=10,
                batch_size=batch_size,
                callbacks=[cp_callback])

In [None]:
pets_decoded = np.zeros((7378, 240, 160, 3))
dataset = tf.data.Dataset.from_tensor_slices(pet_feats)
dataset = dataset.batch(batch_size=16)
x = 0
y = 16
for i in dataset:
    pets_decoded[x:y] = autoencoder.predict_on_batch(i)
    x+=i.shape[0]
    y+=i.shape[0]

In [None]:
pets_decoded.shape

In [None]:
np.save('pets_autoencoder_images', pets_decoded)

In [None]:
get_rand_images(pet_feats, pets_decoded)

In [None]:
z = np.zeros((7378, 240, 160, 6))

for i in range(pet_feats.shape[0]):
    z[i] = np.dstack((pets_decoded[i], pet_feats[i]))

z.shape

In [None]:
z = np.reshape(z, [7378, 38400, 6])

labels2 = np.zeros((7378, 38400))
kmeans = KMeans(random_state=42, init='k-means++', n_init=10, n_clusters=2, max_iter=1000)
for i in range(z.shape[0]):
    labels2[i] = kmeans.fit_predict(z[i])
    if i % 500 == 0:
        print(i)

In [None]:
np.save('pets_kmeans2', labels2)

In [None]:
get_rand_images(pet_feats, labels2)

## Self-organizing maps, Autoencoder, KMeans

In [None]:
def segment_with_SOM(image, nx, ny, sigma=1., n=1500):
    
    pixels = np.reshape(image, (image.shape[0]*image.shape[1], 3))
    
    som = MiniSom(x=nx, y=ny, input_len=3, sigma=sigma, learning_rate=0.2)
    som.random_weights_init(pixels)
    starting_weights = som.get_weights().copy()
    som.train_random(pixels, n)
    
    qnt = som.quantization(pixels)
    
    clustered = np.zeros(image.shape)
    
    for i,q in enumerate(qnt):
        clustered[np.unravel_index(i, shape=(image.shape[0], image.shape[1]))] = q
    
    final_weights= som.get_weights()
    
    return clustered, starting_weights, final_weights

In [None]:
c = np.zeros((7378, 240, 160, 3))
for i in range(pet_feats.shape[0]):
    c[i], _, _ = segment_with_SOM(pet_feats[i], 1, 3, .1)
    if i % 500 == 0:
        print(i)

In [None]:
np.save('pet_som', c)

In [None]:
get_rand_images(pet_feats, c)

In [None]:
z = np.zeros((7378, 240, 160, 9))

for i in range(z.shape[0]):
    z[i] = np.dstack((pets_decoded[i], pet_feats[i], c[i]))

z.shape

In [None]:
z = np.reshape(z, [7378, 38400, 9])

labels3 = np.zeros((7378, 38400))
kmeans = KMeans(random_state=42, init='random', n_init=10, n_clusters=2, max_iter=1000)
for i in range(z.shape[0]):
    labels3[i] = kmeans.fit_predict(z[i])
    if i % 500 == 0:
        print(i)

In [None]:
# save kmeans images 
np.save('pets_kmeans3', labels3)

In [None]:
get_rand_images(pet_feats, labels3)

## All features of pets dataset

In [None]:
# comment out if data already loaded
pets, pet_auto, pet_som = np.load('pets.npy'), np.load('pets_autoencoder_images.npy'), np.load('pet_som.npy')

In [None]:
## Can't load this many images into memory at once so only going to use first 1000

hsv = np.zeros((1000, 240, 160, 3))
lab = np.zeros((1000, 240, 160, 3))

for i in range(1000):
    hsv[i] = rgb2hsv(pets[i])
    lab[i] = rgb2lab(pets[i])

# load all features into one numpy array
z = np.zeros((1000, 240, 160, 15))

for i in range(1000):
    z[i] = np.dstack((pets[i], pet_auto[i], pet_som[i], hsv[i], lab[i]))

z.shape

In [None]:
# reshape and run k-means

z = np.reshape(z, [1000, 38400, 15])

labels4 = np.zeros((1000, 38400))
kmeans = KMeans(random_state=42, init='random', n_init=10, n_clusters=2, max_iter=1000)
for i in range(z.shape[0]):
    labels4[i] = kmeans.fit_predict(z[i])
    if i % 250 == 0:
        print(i)

In [None]:
get_rand_images(pets, labels4)

# Drone Image Dataset

In [None]:
drone_dir1 = './original_images/'
drone_df = pd.DataFrame(columns=['img', 'img_mask'])

# load drone image data 
k = 0
for filename in sorted(os.listdir(drone_dir1)):
    if filename.endswith('.jpg'):
        drone_df.loc[k, 'img'] = np.asarray(Image.open(drone_dir1 + filename).resize((384, 576)))
    else:
        continue
    k = k+1 

In [None]:
drone_dir2 = './label_images_semantic/'

# load mask data
k=0
for filename in sorted(os.listdir(drone_dir2)):
    if filename.endswith('.png'):
        drone_df.loc[k, 'img_mask'] = np.asarray(Image.open(drone_dir2 + filename).resize((384, 576)))
    else:
        continue
    k = k+1  

In [None]:
drone, drone_mask = drone_df['img'].to_numpy(), drone_df['img_mask'].to_numpy()

In [None]:
for i in range(drone.shape[0]):
    if drone[i].shape != (576, 384, 3):
        print(i, drone[i].shape)

In [None]:
get_rand_images(drone, drone_mask)

In [None]:
f = []
for i in drone:
    f.append(i)
drone_feats = np.asarray(f)
drone_feats = np.reshape(drone_feats, [-1, 576, 384, 3])
drone_feats = drone_feats.astype('float32') / 255

In [None]:
np.save('drone_feats', drone_feats)

## KMeans Segmentation

In [None]:
drone_feats.shape

In [None]:
drone_hsv = np.zeros((400, 576, 384, 3))
drone_lab = np.zeros((400, 576, 384, 3))

for i in range(drone_feats.shape[0]):
    drone_hsv[i] = rgb2hsv(drone_feats[i])
    drone_lab[i] = rgb2lab(drone_feats[i])

In [None]:
idx = np.random.randint(0, drone_feats.shape[0], 8)
images = []
for ind in idx:
    images.append(drone_feats[ind])
    images.append(drone_hsv[ind])
    images.append(drone_lab[ind])
    
plt.figure(figsize=(64, 64))
num_images = 24
rows = 4
for i in range(num_images):
    plt.subplot(rows+1, rows+1, i+1)
    plt.imshow(images[i])
    plt.axis('off')

In [None]:
d_z = np.zeros((400, 576, 384, 9))

for i in range(d_z.shape[0]):
    d_z[i] = np.dstack((drone_feats[i], drone_hsv[i], drone_lab[i]))
    
d_z.shape

In [None]:
d_z = np.reshape(d_z, [400, 221184, 9])

d_z.shape

In [None]:
d_labels1 = np.zeros((400, 221184))
for i in range(d_z.shape[0]):
    for j in drone_mask[i]:
        c = Counter(j)
    kmeans = KMeans(random_state=42, init='random', n_clusters=len(c.keys()), max_iter=1000)
    d_labels1[i] = kmeans.fit_predict(d_z[i])
    if i % 50 == 0:
        print(i)

In [None]:
d_labels1.shape

In [None]:
d_labels1 = np.reshape(d_labels1, [-1, 576, 384])

In [None]:
np.save('drone_kmeans1', d_labels1)

In [None]:
get_rand_images(drone_feats, d_labels1)

## Autoencoder and KMeans

In [None]:
drone_feats = np.reshape(drone_feats, [-1, 576, 384, 3])
drone_feats.shape

In [None]:
# network parameters
input_shape = (576, 384, 3)
batch_size = 1
kernel_size = 3
latent_dim = 250

In [None]:
# encoder/decoder number of CNN layers and filters per layer
layer_filters = [16, 32, 64, 128, 256, 512]

# first build the encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for filters in layer_filters:
    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               activation='relu',
               kernel_initializer = 'he_normal',
               strides=1,
               padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2)(x)

shape = K.int_shape(x)

# generate latent vector
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)

# instantiate encoder model
encoder = Model(inputs,
                latent,
                name='encoder')
encoder.summary()

In [None]:
# build the decoder model
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')

x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

# stack of Conv2DTranspose(64)-Conv2DTranspose(32)
for filters in layer_filters[::-1]:
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        activation='relu',
                        kernel_initializer = 'he_normal',
                        strides=2,
                        padding='same')(x)

# reconstruct the input
outputs = Conv2DTranspose(filters=3,
                          kernel_size=1,
                          activation='sigmoid',
                          padding='same',
                          kernel_initializer = 'he_normal',
                          name='decoder_output')(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

In [None]:
# autoencoder = encoder + decoder
# instantiate autoencoder model
autoencoder = Model(inputs,
                    decoder(encoder(inputs)),
                    name='autoencoder')
autoencoder.summary()

# Mean Square Error (MSE) loss function, adagrad optimizer
autoencoder.compile(loss='mse', optimizer='adagrad')

In [None]:
checkpoint_path = "drone_autoencoder.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
# train the autoencoder
autoencoder.fit(drone_feats,
                drone_feats,
                epochs=100,
                batch_size=batch_size,
                callbacks=[cp_callback])

In [None]:
drone_decoded = np.zeros((400, 576, 384, 3))
dataset = tf.data.Dataset.from_tensor_slices(drone_feats)
dataset = dataset.batch(batch_size=1)
x = 0
y = 1
for i in dataset:
    drone_decoded[x:y] = autoencoder.predict_on_batch(i)
    x+=i.shape[0]
    y+=i.shape[0]

In [None]:
drone_decoded.shape

In [None]:
np.save('drone_autoencoder_images', drone_decoded)

In [None]:
get_rand_images(drone_feats, drone_decoded)

In [None]:
d_z2 = np.zeros((400, 576, 384, 6))

for i in range(drone_feats.shape[0]):
    d_z2[i] = np.dstack((drone_decoded[i], drone_feats[i]))

d_z2.shape

In [None]:
d_z2 = np.reshape(d_z2, [400, 221184, 6])

d_z2.shape

In [None]:
d_labels2 = np.zeros((400, 221184))

kmeans = KMeans(random_state=42, init='random', n_clusters=5, max_iter=1000)
for i in range(d_z2.shape[0]):
    d_labels2[i] = kmeans.fit_predict(d_z2[i])
    if i % 50 == 0:
        print(i)

In [None]:
np.save('drone_kmeans2', d_labels2)

In [None]:
get_rand_images(drone_feats, d_labels2)

## Self-organizing Maps

In [None]:
drone_feats = np.load('drone_feats.npy')

In [None]:
drone_decoded = np.load('drone_autoencoder_images.npy')

In [None]:
d_maps = np.zeros((400, 576, 384, 3))
for i in range(drone_feats.shape[0]):
    d_maps[i], _, _ = segment_with_SOM(drone_feats[i], 1, 5, .1)
    if i % 50 == 0:
        print(i)

In [None]:
plt.imshow(d_maps[157])

In [None]:
d_z3 = np.zeros((400, 576, 384, 9))

for i in range(d_z3.shape[0]):
    d_z3[i] = np.dstack((drone_decoded[i], drone_feats[i], d_maps[i]))

d_z3.shape

In [None]:
d_z3 = np.reshape(d_z3, [400, 221184, 9])

d_labels3 = np.zeros((400, 221184))
for i in range(d_z3.shape[0]):
    kmeans = KMeans(random_state=42, init='k-means++', n_init=10, n_clusters=5, max_iter=1000)
    d_labels3[i] = kmeans.fit_predict(d_z3[i])
    if i % 50 == 0:
        print(i)

In [None]:
d_labels3 = np.reshape(d_labels3, [-1, 576, 384])

np.save('drone_kmeans3', d_labels3)

In [None]:
get_rand_images(drone_feats, d_labels3)

## All features

In [None]:
# load all features into array
d_z4 = np.zeros((400, 576, 384, 15))

for i in range(d_z4.shape[0]):
    d_z4[i] = np.dstack((drone_decoded[i], drone_feats[i], d_maps[i], drone_hsv[i], drone_lab[i]))

d_z4.shape

In [None]:
# reshape and run kmeans
d_z4 = np.reshape(d_z4, [400, 221184, 15])

d_labels4 = np.zeros((400, 221184))
for i in range(d_z4.shape[0]):
    kmeans = KMeans(random_state=42, init='k-means++', n_init=10, n_clusters=5, max_iter=1000)
    d_labels4[i] = kmeans.fit_predict(d_z4[i])
    if i % 50 == 0:
        print(i)

In [None]:
# reshape and save kmeans
d_labels4 = np.reshape(d_labels4, [-1, 576, 384])

np.save('drone_kmeans4', d_labels4)

In [None]:
get_rand_images(drone_feats, d_labels4)

## Try 5 original images for kmeans

In [None]:
# comment out if data loaded 
drone_feats = np.load('drone_feats.npy')

In [None]:
# get 5 copies of each image
d_z5 = np.zeros((400, 576, 384, 15))

for i in range(d_z5.shape[0]):
    d_z5[i] = np.dstack((drone_feats[i], drone_feats[i], drone_feats[i], drone_feats[i], drone_feats[i]))

d_z5.shape

In [None]:
# reshape and run kmeans
d_z5 = np.reshape(d_z5, [400, 221184, 15])

d_labels5 = np.zeros((400, 221184))
for i in range(d_z5.shape[0]):
    kmeans = KMeans(random_state=42, init='k-means++', n_init=10, n_clusters=5, max_iter=1000)
    d_labels5[i] = kmeans.fit_predict(d_z5[i])
    if i % 50 == 0:
        print(i)

In [None]:
get_rand_images(drone_feats, d_labels5)