In [None]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
###Imports

from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import os
import random
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

#Imports for models
import tensorflow as tf 
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Activation, Input, Conv2DTranspose, Lambda
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from skimage.io import imread, imshow
from skimage.transform import resize

seed = 42
np.random.seed = seed


In [None]:
###Image Filtering

def filterDataset(dataDir, classes=None, mode='train'):
  ##Initialize COCO api for instance annotations
  annFile = '{}/annotations/instances_{}2017.json'.format(dataDir, mode)
  coco = COCO(annFile)
  
  ##Image array
  images = []
  if classes != None:
    ##Iterate for each individual class in the list
    for className in classes:
      ##Get all images from the given categories
      catIds = coco.getCatIds(catNms = className)
      imgIds = coco.getImgIds(catIds = catIds)
      images += coco.loadImgs(imgIds)

  else:
    imgIds = coco.getImgIds()
    images = coco.loadImgs(imgIds)

  ##Filter out repeated images
  unique_images = []
  for i in range(len(images)):
    if images[i] not in unique_images:
      unique_images.append(images[i])

  random.shuffle(unique_images)
  dataset_size = len(unique_images)

  return unique_images, dataset_size, coco

dataDir = '/content/drive/My Drive/datasets/COCO_2017_Practice'
classes = ['tv', 'cell phone', 'laptop']
mode = 'val'

images, dataset_size, coco = filterDataset(dataDir, classes, mode)


loading annotations into memory...
Done (t=1.86s)
creating index...
index created!


In [None]:
##Sanity check
print(images)

[{'license': 6, 'file_name': '000000532761.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000532761.jpg', 'height': 480, 'width': 640, 'date_captured': '2013-11-21 22:03:44', 'flickr_url': 'http://farm8.staticflickr.com/7133/7544629434_724e339b8d_z.jpg', 'id': 532761}, {'license': 1, 'file_name': '000000031296.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000031296.jpg', 'height': 425, 'width': 640, 'date_captured': '2013-11-24 22:26:16', 'flickr_url': 'http://farm6.staticflickr.com/5534/10153588564_c0f7f12928_z.jpg', 'id': 31296}, {'license': 1, 'file_name': '000000068833.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000068833.jpg', 'height': 480, 'width': 640, 'date_captured': '2013-11-14 16:32:09', 'flickr_url': 'http://farm8.staticflickr.com/7152/6427642465_a62732d040_z.jpg', 'id': 68833}, {'license': 3, 'file_name': '000000440475.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000440475.jpg', 'height': 427, 'width': 640, 'date_c

In [None]:
def getClassName(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return None

def getImage(imageObj, img_folder, input_image_size):
    ##Read and normalize an image
    train_img = io.imread(img_folder + '/' + imageObj['file_name'])/255.0
    ##Resize
    train_img = cv2.resize(train_img, input_image_size)
    if (len(train_img.shape)==3 and train_img.shape[2]==3): ##If it is a RGB 3 channel image
        return train_img
    else: ##To handle a black and white image, increase dimensions to 3
        stacked_img = np.stack((train_img,)*3, axis=-1)
        return stacked_img
    
def getNormalMask(imageObj, classes, coco, catIds, input_image_size):
    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    cats = coco.loadCats(catIds)
    train_mask = np.zeros(input_image_size)
    for a in range(len(anns)):
        className = getClassName(anns[a]['category_id'], cats)
        pixel_value = classes.index(className)+1
        new_mask = cv2.resize(coco.annToMask(anns[a])*pixel_value, input_image_size)
        train_mask = np.maximum(new_mask, train_mask)

    ##Add extra dimension for parity with train_img size [X * X * 3]
    train_mask = train_mask.reshape(input_image_size[0], input_image_size[1], 1)
    return train_mask  
    
def getBinaryMask(imageObj, coco, catIds, input_image_size):
    annIds = coco.getAnnIds(imageObj['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    train_mask = np.zeros(input_image_size)
    for a in range(len(anns)):
        new_mask = cv2.resize(coco.annToMask(anns[a]), input_image_size)
        
        ##Threshold because resizing may cause extraneous values
        new_mask[new_mask >= 0.5] = 1
        new_mask[new_mask < 0.5] = 0

        train_mask = np.maximum(new_mask, train_mask)

    ##Add extra dimension for parity with train_img size [X * X * 3]
    train_mask = train_mask.reshape(input_image_size[0], input_image_size[1], 1)
    return train_mask



In [None]:
def dataGeneratorCoco(images, classes, coco, folder, 
                      input_image_size=(224,224), batch_size=4, mode='train', mask_type='binary'):
    
    img_folder = '{}/{}2017/'.format(folder, mode)
    dataset_size = len(images)
    catIds = coco.getCatIds(catNms=classes)
    
    c = 0
    while(True):
        img = np.zeros((batch_size, input_image_size[0], input_image_size[1], 3)).astype('float')
        mask = np.zeros((batch_size, input_image_size[0], input_image_size[1], 1)).astype('float')

        for i in range(c, c+batch_size): #Initially from 0 to batch_size, when c = 0
            imageObj = images[i]
            
            ###Retrieve Image
            train_img = getImage(imageObj, img_folder, input_image_size)
            
            ###Create Mask
            if mask_type=="binary":
                train_mask = getBinaryMask(imageObj, coco, catIds, input_image_size)
            
            elif mask_type=="normal":
                train_mask = getNormalMask(imageObj, classes, coco, catIds, input_image_size)                
            
            ##Add to respective batch sized arrays
            img[i-c] = train_img
            mask[i-c] = train_mask
            
        c+=batch_size
        if(c + batch_size >= dataset_size):
            c=0
            random.shuffle(images)
        yield img, mask

In [None]:
batch_size = 4
input_image_size = (224,224)
mask_type = 'normal'

val_gen = dataGeneratorCoco(images, classes, coco, dataDir,
                            input_image_size, batch_size, mode, mask_type)

In [None]:
###Function to visualize generated data
def visualizeGenerator(gen):
    img, mask = next(gen)
    
    fig = plt.figure(figsize=(20, 10))
    outerGrid = gridspec.GridSpec(1, 2, wspace=0.1, hspace=0.1)
    
    for i in range(2):
        innerGrid = gridspec.GridSpecFromSubplotSpec(2, 2,
                        subplot_spec=outerGrid[i], wspace=0.05, hspace=0.05)

        for j in range(4):
            ax = plt.Subplot(fig, innerGrid[j])
            if(i==1):
                ax.imshow(img[j])
            else:
                ax.imshow(mask[j][:,:,0])
                
            ax.axis('off')
            fig.add_subplot(ax)        
    plt.show()

In [None]:
###Visualizing data generated by dataGeneratorCoco

visualizeGenerator(val_gen)

In [None]:
###Defining augmentation generator

def augmentationGenerator(gen, augGeneratorArgs, seed=None):
  ##Initialize image data generator using the provided arguments
  image_gen = ImageDataGenerator(**augGeneratorArgs)
  ##Remove mask's brightness arguments
  augGeneratorArgs_mask = augGeneratorArgs.copy()
  _ = augGeneratorArgs_mask.pop('brightness_range', None)
  ##Initialize mask data generator using modified arguments
  mask_gen = ImageDataGenerator(**augGeneratorArgs_mask)

  np.random.seed(seed if seed is not None else np.random.choice(range(9999)))

  for img, mask in gen:
    seed = random.choice(range(9999))
    ##It is important to keep the seeds synchronized to make sure the images still correspond to the masks
    g_x = image_gen.flow(255*img, batch_size = img.shape[0], seed = seed, shuffle = True)
    g_y = mask_gen.flow(mask, batch_size = mask.shape[0], seed = seed, shuffle = True)
    img_aug = next(g_x) / 255.0
    mask_aug = next(g_y)

    yield img_aug, mask_aug


In [None]:
###Applying augmentation


##Assign arguments to augGeneratorArgs variable
augGeneratorArgs = dict(featurewise_center = False, 
                        samplewise_center = False,
                        rotation_range = 5, 
                        width_shift_range = 0.01, 
                        height_shift_range = 0.01, 
                        brightness_range = (0.8,1.2),
                        shear_range = 0.01,
                        zoom_range = [1, 1.25],  
                        horizontal_flip = True, 
                        vertical_flip = False,
                        fill_mode = 'reflect',
                        data_format = 'channels_last')


##Call the function with the arguments
aug_gen = augmentationGenerator(val_gen, augGeneratorArgs)

In [None]:
###Visualizing data generated by dataGeneratorCoco and augmented by augmentationGenerator
visualizeGenerator(aug_gen)

In [None]:
###Model time!

#Defining variables
_classes = ['tv','laptop']
train = 'train'
val = 'val'
input_image_size = (224, 224)
batch_size = 8
binary = 'binary'
normal = 'normal'


##Creating train dataset
train_set, train_size, coco_train = filterDataset(dataDir, _classes, train)

##Creating val dataset
val_set, val_size, coco_val = filterDataset(dataDir, _classes, val)

##Train generator
#val_gen = dataGeneratorCoco(images, classes, coco, dataDir, input_image_size, batch_size, mode, mask_type)
train_gen = dataGeneratorCoco(train_set, _classes, coco_train, dataDir, input_image_size, batch_size, train, normal)

##Val generator
val_gen = dataGeneratorCoco(val_set, _classes, coco_val, dataDir, input_image_size, batch_size, val, normal)

loading annotations into memory...
Done (t=22.14s)
creating index...
index created!
loading annotations into memory...
Done (t=0.53s)
creating index...
index created!


In [None]:
##Sanity check
print(train_size)

7069


In [None]:
##Visualizing data from train_generator
visualizeGenerator(train_gen)

In [None]:
IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_CHANNELS = 3
epochs = 25
validation_steps = val_size
steps_per_epoch = train_size

##Creating the model

initializer = "he_normal"

###Building U-Net Model

##Input Layer
inputs = Input((IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS))

##Converting inputs to float
s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs)

##Contraction
c1 = tf.keras.layers.Conv2D(16, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(s)
c1 = tf.keras.layers.Dropout(0.1)(c1)
c1 = tf.keras.layers.Conv2D(16, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c1)
p1 = tf.keras.layers.MaxPooling2D((2,2))(c1)

c2 = tf.keras.layers.Conv2D(32, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(p1)
c2 = tf.keras.layers.Dropout(0.1)(c2)
c2 = tf.keras.layers.Conv2D(32, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c2)
p2 = tf.keras.layers.MaxPooling2D((2,2))(c2)

c3 = tf.keras.layers.Conv2D(64, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(p2)
c3 = tf.keras.layers.Dropout(0.2)(c3)
c3 = tf.keras.layers.Conv2D(64, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c3)
p3 = tf.keras.layers.MaxPooling2D((2,2))(c3)

c4 = tf.keras.layers.Conv2D(128, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(p3)
c4 = tf.keras.layers.Dropout(0.2)(c4)
c4 = tf.keras.layers.Conv2D(128, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c4)
p4 = tf.keras.layers.MaxPooling2D((2,2))(c4)

c5 = tf.keras.layers.Conv2D(256, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(p4)
c5 = tf.keras.layers.Dropout(0.3)(c5)
c5 = tf.keras.layers.Conv2D(256, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c5)

##Expansion
u6 = tf.keras.layers.Conv2DTranspose(128, (2,2), strides=(2,2), padding="same")(c5)
u6 = tf.keras.layers.concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(128, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(u6)
c6 = tf.keras.layers.Dropout(0.2)(c6)
c6 = tf.keras.layers.Conv2D(128, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c6)

u7 = tf.keras.layers.Conv2DTranspose(64, (2,2), strides=(2,2), padding="same")(c6)
u7 = tf.keras.layers.concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(64, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(u7)
c7 = tf.keras.layers.Dropout(0.2)(c7)
c7 = tf.keras.layers.Conv2D(64, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c7)

u8 = tf.keras.layers.Conv2DTranspose(32, (2,2), strides=(2,2), padding="same")(c7)
u8 = tf.keras.layers.concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(32, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(u8)
c8 = tf.keras.layers.Dropout(0.1)(c8)
c8 = tf.keras.layers.Conv2D(32, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c8)

u9 = tf.keras.layers.Conv2DTranspose(16, (2,2), strides=(2,2), padding="same")(c8)
u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
c9 = tf.keras.layers.Conv2D(16, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(u9)
c9 = tf.keras.layers.Dropout(0.1)(c9)
c9 = tf.keras.layers.Conv2D(16, (3,3), activation="relu", kernel_initializer=initializer, padding="same")(c9)

##Output Layer
outputs = tf.keras.layers.Conv2D(1, (1,1), activation="softmax")(c9)

##Defining Model
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])

##Compiling Model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])

##Calling Model Summary
#model.summary()

##Defining callbacks
callbacks = [
             tf.keras.callbacks.ModelCheckpoint('/content/drive/My Drive/THESIS/COCO_PRACTICE_SEG.h5', verbose=1, save_best_only=True),
             tf.keras.callbacks.EarlyStopping(patience=2, monitor="val_loss"),
             tf.keras.callbacks.TensorBoard(log_dir='logs')]

##Training the model
results = model.fit(x = train_gen, 
                    validation_data = val_gen, 
                    steps_per_epoch = steps_per_epoch, 
                    validation_steps = validation_steps, 
                    epochs = epochs, 
                    verbose = True)


FileNotFoundError: ignored

In [None]:
! '/content/drive/My Drive/datasets/COCO_2017_Practice/train2017/000000074924.jpg'

UnicodeDecodeError: ignored