In [None]:
import numpy as np
from tensorflow.keras import backend as K
import os
import threading
import multiprocessing.pool
from functools import partial
from PIL import Image as pil_image
import matplotlib.pyplot as plt
import tensorflow as tf
from IPython.display import clear_output
from tensorflow.keras.callbacks import Callback, TensorBoard
from ipynb.fs.full.CNN_LSTM_Model import *
import matplotlib.pyplot as plt
from IPython.display import clear_output
import datetime

In [None]:
IMG_SIZE = (240, 320)
FRAMES_PER_STEP = 4
BATCH_SIZE = 8
DATA_FORMAT = 'channels_last'
DROPOUT = 0.7

os.environ['KMP_DUPLICATE_LIB_OK']='True'
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
#sess = tf.Session(config=config)

In [None]:
#!pip3 install tensorflow-gpu

In [None]:
class ImageDataGenerator(object):
    
    def __init__(self, data_format = None):
        if data_format is None:
            data_format = K.image_data_format()
            if data_format not in {'channels_last', 'channels_first'}:
                raise ValueError('`data_format` should be `"channels_last"` (channel after row and '
                             'column) or `"channels_first"` (channel before row and column). '
                             'Received arg: ', data_format)
            self.data_format = data_format
            # need to deal with data_format
            print('Supported image data format')
            if data_format == DATA_FORMAT:
                self.channel_axis = 3
                self.row_axis = 1
                self.col_axis = 2
                
    def flow(self, x, y = None, batch_size = BATCH_SIZE, seed = None):
        return NumpyArrayIterator(x,
                                  y,
                                  self,
                                  seed = seed,
                                  data_format = self.data_format)
        
    
    def flow_from_directory(self, directory, 
                            target_size= IMG_SIZE, 
                            frames_per_step = FRAMES_PER_STEP, 
                            batch_size = BATCH_SIZE, 
                            seed=None):
        return DirectoryIterator(directory, 
                                 self, 
                                 target_size = target_size, 
                                 frames_per_step = frames_per_step,
                                 data_format = self.data_format,
                                 batch_size = batch_size,
                                 seed = seed)
    
    def chnage_dims(self, image):
        """Expands dimentions of a batch of images"""
        image = np.expand_dims(image, axis=0)
        return image
        
        

In [None]:
class Iterator(object):
    
    def __init__(self,
                 n,
                 batch_size, 
                 frames_per_step,
                 seed):
        self.n = n
        self.batch_size = batch_size
        self.batch_index = 0
        self.total_batches_seen = 0
        self.lock = threading.Lock()
        self.index_generator = self._flow_index(n, batch_size, frames_per_step, seed)
        
    def reset(self):
        self.batch_index = 0
        
    def _flow_index(self, n, batch_size = BATCH_SIZE, frames_per_step = FRAMES_PER_STEP, shuffle = False, seed = None):
        # Ensure self.batch_index = 0
        self.reset()
        while True:
            if seed is not None:
                np.random.seed(seed + self.total_batches_seen)
            if self.batch_index == 0:
                index_array = np.arange(n)
                if shuffle:
                    index_array = np.arange.permutation(n)
            current_index = (self.batch_index * batch_size * frames_per_step) % n
            if n > current_index + batch_size * frames_per_step:
                current_batch_size = batch_size * frames_per_step
                #current_batch_size = batch_size 
                self.batch_index += 1
            else:
                current_batch_size = n - current_index
                self.batch_index = 0
            self.total_batches_seen += 1
            yield(index_array[current_index: current_index + current_batch_size],
                  current_index,
                  batch_size)
            
    def __iter__(self):
        # needed if you want to do something like:
        # for x, y in data_gen.flow(...):
        return self
    def __next__(self, *args, ** kwargs):
        return self.next(*args, **kwargs)
             

In [None]:
class NumpyArrayIterator(Iterator):
    
    def __init__(self,
                 x, 
                 y,
                 batch_size = BATCH_SIZE,
                 shuffle = True,
                 seed = None,
                 data_format = None):
        if y is not None and len(x) != len(x):
            raise ValueError('X (images tensor) and y (labels) '
                             'should have the same length. '
                             'Found: X.shape = %s, y.shape = %s' %
                             (np.asarray(x).shape, np.asarray(y).shape))
        if data_format is None:
            data_format = K.image_data_format()
        # for x data(input image) need to recheck about data_format . it requires or needs to totaly remove
        # when we deal with input and ground /// needs to re visit.
        # currently we use single data_format // if need to add data_format for y also.
        
        self.x = np.asarray(x, dtype = K.floatx())
        
        if self.x.ndim != 4:
            raise ValueError('Input data in `NumpyArrayIterator` '
                             'should have rank 4. You passed an array '
                             'with shape', self.x.shape)
            
        channels_axis = 3 if data_format == DATA_FORMAT else 1
        
        if self.x.shape[channels_axis] not in {1, 3, 4}:
            warnings.warn('NumpyArrayIterator is set to use the '
                          'data format convention "' + data_format + '" '
                          '(channels on axis ' +
                          str(channels_axis) + '), i.e. expected '
                          'either 1, 3 or 4 channels on axis ' +
                          str(channels_axis) + '. '
                          'However, it was passed an array with shape ' + str(self.x.shape) +
                          ' (' + str(self.x.shape[channels_axis]) + ' channels).')
            
            
        self.y = np.asarray(y, dtype = K.floatx())
        
        if self.y.ndim != 4:
            raise ValueError('Input data in `NumpyArrayIterator` '
                             'should have rank 4. You passed an array '
                             'with shape', self.y.shape)
            
        channels_axis = 3 if data_format == DATA_FORMAT else 1
        
        if self.y.shape[channels_axis] not in {1, 3, 4}:
            warnings.warn('NumpyArrayIterator is set to use the '
                          'data format convention "' + data_format + '" '
                          '(channels on axis ' +
                          str(channels_axis) + '), i.e. expected '
                          'either 1, 3 or 4 channels on axis ' +
                          str(channels_axis) + '. '
                          'However, it was passed an array with shape ' + str(self.x.shape) +
                          ' (' + str(self.y.shape[channels_axis]) + ' channels).')
            
        self.image_data_generotor = image_data_generotor
        self.data_format = data_format
        super(NumpyArrayIterator, self).__init(x.shape[0],
                                               batch_size,
                                               shuffle,
                                               seed)
    def next(self):
        # keeps under lock only the mechanism which advances, so indexing of each batch.
        with self.lock:
            index_array, current_index, current_batch_size = next(self.index_generator)
            
        # The transformation of images is not under thread lock
        # so it can be done in parallel
        print(list(self.x.shape)[1:])
        batch_x = np.zeros(
            tuple([current_batch_size] + (1,) + list(self.x.shape)[1:]), dtype=K.floatx()) #Added +(1,) +
        
        batch_y = np.zeros(
            tuple([current_batch_size]+(1,) + list(self.y.shape)[1:]), dtype=K.floatx()) #Added +(1,) +
        
        for i, j in enumerate(index_array):
            x = self.x[j]
            #x = self.image_data_generator.random_transform(x.astype(K.floatx()))
            #x = self.image_data_generator.standardize(x)
            x = self.image_data_generator.change_dims(x)  # my addition
            batch_x[i] = x
            
            # need to check if it requirs or not for ground 
            y = self.y[j]
            y = self.image_data_generator.change_dims(x)  # my addition
            batch_y[i] = y
            
        return batch_x, batch_y


In [None]:
def load_img(path, grayscale=False, target_size=None):
    """Loads an image into PIL format.
    # Arguments
        path: Path to image file
        grayscale: Boolean, whether to load the image as grayscale.
        target_size: Either `None` (default to original size)
            or tuple of ints `(img_height, img_width)`.
    # Returns
        A PIL Image instance.
    # Raises
        ImportError: if PIL is not available.
    """
    if pil_image is None:
        raise ImportError('Could not import PIL.Image. '
                          'The use of `array_to_img` requires PIL.')
    img = pil_image.open(path)
    if grayscale:
        if img.mode != 'L':
            img = img.convert('L')
    else:
        if img.mode != 'RGB':
            img = img.convert('RGB')
    if target_size:
        hw_tuple = (target_size[1], target_size[0])
        if img.size != hw_tuple:
            img = img.resize(hw_tuple)
    return img

In [None]:
def img_to_array(img, data_format=None):
    """Converts a PIL Image instance to a Numpy array.
    # Arguments
        img: PIL Image instance.
        data_format: Image data format.
    # Returns
        A 3D Numpy array.
    # Raises
        ValueError: if invalid `img` or `data_format` is passed.
    """
    if data_format is None:
        data_format = K.image_data_format()
    if data_format not in {'channels_first', 'channels_last'}:
        raise ValueError('Unknown data_format: ', data_format)
    # Numpy array x has format (height, width, channel)
    # or (channel, height, width)
    # but original PIL image has format (width, height, channel)
    x = np.asarray(img, dtype=K.floatx())
    if len(x.shape) == 3:
        if data_format == 'channels_first':
            x = x.transpose(2, 0, 1)
    elif len(x.shape) == 2:
        if data_format == 'channels_first':
            x = x.reshape((1, x.shape[0], x.shape[1]))
        else:
            x = x.reshape((x.shape[0], x.shape[1], 1))
    else:
        raise ValueError('Unsupported image shape: ', x.shape)
    return x

In [None]:
def show(img):
    plt.imshow(img)
    plt.axis('off')
    plt.title('Image')

In [None]:
def show_array_of_images(images):
    plt.figure(figsize=(10, 9))
    plt.subplots_adjust(hspace=0.5)
    for i in range(len(images)):
        plt.subplot(2, 2, i + 1)
        #img = pil_image.open(images[i]).resize(IMG_SIZE)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(images[i]))
        plt.title(i)
        plt.axis('off')
        plt.suptitle('ImageNet predictions')

In [None]:
def create_mask(pred_mask):
    #print(pred_mask.shape)
    pred_mask = tf.argmax(pred_mask, axis=-1)
    #print(pred_mask.shape)
    pred_mask = pred_mask[..., tf.newaxis]
    #print(pred_mask.shape)
    return pred_mask[0]

In [None]:
def show_array_of_forground_mask(images):
    plt.figure(figsize=(10, 9))
    plt.subplots_adjust(hspace=0.5)
    for i in range(len(images)):
        plt.subplot(2, 2, i + 1)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(create_mask(images[i])))
        plt.title(i)
        plt.axis('off')
        plt.suptitle('ImageNet predictions')

In [None]:
def show_array_of_images(images):
    plt.figure(figsize=(10, 9))
    plt.subplots_adjust(hspace=0.5)
    for i in range(len(images)):
        plt.subplot(2, 2, i + 1)
        #img = pil_image.open(images[i]).resize(IMG_SIZE)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(images[i]))
        plt.title(i)
        plt.axis('off')
        plt.suptitle('ImageNet predictions')

In [None]:
class DirectoryIterator(Iterator):
    
    def __init__(self,
                directory,
                image_data_generator,
                target_size = IMG_SIZE,
                frames_per_step = FRAMES_PER_STEP,
                batch_size = BATCH_SIZE,
                data_format = None,
                seed = None):
        if data_format is None:
            data_format = K.image_data_format
        self.directory = directory
        self.frames_per_step = frames_per_step
        self.image_data_generator = image_data_generator
        self.target_size = tuple(target_size)
        self.data_format = data_format
        ## Need to handle data_format here. check code
        
        self.samples = 0
        
        classes = []
        for subdir in sorted(os.listdir(directory)):
            if os.path.isdir(os.path.join(directory, subdir)):
                classes.append(subdir)
        #print(classes)
        self.num_class = len(classes)
        # first, count the number of samples and classes
            
        #print(self.num_class)
        self.class_indices = dict(zip(classes, range(self.num_class)))
        #print(self.class_indices)
        
        pool = multiprocessing.pool.ThreadPool()
        function_partial = partial(count_valid_files_in_directory,
                                   follow_links=False)
        
        self.samples = sum(pool.map(function_partial, (os.path.join(directory, subdir) for subdir in classes)))
        
        print('Found %d images belonging to %d classes.' %
              (self.samples, self.num_class))
        
        # second, build an index of the images in the different class subfolders
        
        results = []
        
        self.input_files = []
        self.groundtruth_files = []
        self.classes = np.zeros((self.samples,), dtype='int32')
        #print(self.classes)
        #i = 0
        for dirpath in (os.path.join(directory, subdir) for subdir in classes):
            results.append(pool.apply_async(list_valid_filenames_in_directory,
                                            (dirpath, False)))
            
        for res in results:
            input_file, groundtruth_file = res.get()
            self.input_files += input_file
            self.groundtruth_files += groundtruth_file
            #i += len(input_file)
        #print(self.input_files)
        pool.close()
        pool.join()
        super(DirectoryIterator, self).__init__(self.samples, batch_size, frames_per_step, seed)
        
    def next(self):
        # Keeps under lock only the mechanism which advances
        # the indexing of each batch.
        with self.lock:
            index_array, current_index, current_batch_size = next(self.index_generator)
        #print('index array', index_array)
        #print('current_batch_size',current_batch_size)
        #print()
        
        # the transformation of images is not under the thread lock so it can be done in paraller
        #images = []
        batch_x = np.zeros((current_batch_size,) + (self.frames_per_step,) + IMG_SIZE + (3,), dtype=K.floatx())
        batch_y = np.zeros((current_batch_size,) + (self.frames_per_step,) + IMG_SIZE + (1,), dtype=K.floatx())
        for xx in range(self.frames_per_step):
        #for kk in range(1):
            for index in range(int(len(index_array)/self.frames_per_step)):
            #for i in range(1):
                #print('total count================',len(self.input_files))
                #print('index ================',(index + (xx * BATCH_SIZE)))
                #print('input file size',len(self.input_files))
                #print('index', index)
                #print('xx', xx)
                #print('index_array', index_array[index + (xx * BATCH_SIZE)]) 
                #print('calculated index', index + (xx * BATCH_SIZE))
                
                if len(index_array)> (index + (xx * BATCH_SIZE)):
                    
                    fname = self.input_files[index_array[index + (xx * BATCH_SIZE)]]
                    #fname = self.input_files[index_array[index]]
                    #img = pil_image.open(fname).resize(IMG_SIZE)
                    #print('x->',index)
                    img = load_img(fname, grayscale=False, target_size= IMG_SIZE)
                    #images.append(img)
                    x = img_to_array(img, data_format= self.data_format)
                    x /= 255.0
                    #x.shape
                    batch_x[index, xx] = x
                
        for yy in range(self.frames_per_step):
        #for kk in range(1):
            for index in range(int(len(index_array)/self.frames_per_step)):
            #for i in range(1):
                if len(index_array)> (index + (yy * BATCH_SIZE)):
                    fname = self.groundtruth_files[index_array[index + (yy * BATCH_SIZE)]]
                    #fname = self.groundtruth_files[index_array[index]]
                    #img = pil_image.open(fname).resize(IMG_SIZE)
                    img = load_img(fname, grayscale=True, target_size= IMG_SIZE)
                    #images.append(img)
                    y = img_to_array(img, data_format= self.data_format)
                    y /= 255.0 
                    #x.shape
                    batch_y[index, yy] = y
        return batch_x, batch_y 

In [None]:
def count_valid_files_in_directory(directory, follow_links):
    def count_file(subdir):
        samples = 0
        arr = sorted(os.listdir(subdir))
        for files in arr:
            is_valid = False 
            for extension in ['png', 'jpg', 'jpeg']:
                if files.lower().endswith('.' + extension):
                    is_valid = True
                    break
            if is_valid:
                samples += 1
        return samples
    
    input_file_path = directory + '/input'
    #groundtruth_file_path = directory + '/groundtruth'
    input_samples = count_file(input_file_path)
   # groundtruth_samples = count_file(groundtruth_file_path)
    return input_samples

In [None]:
def list_valid_filenames_in_directory(directory, follow_links):
    input_names = []
    groundtruth_names = []
    input_file_path = directory + '/input'
    arr = sorted(os.listdir(input_file_path))
    for fname in arr:
        is_valid = False 
        for extension in ['png', 'jpg', 'jpeg']:
            if fname.lower().endswith('.' + extension):
                is_valid = True
                break
        if is_valid:
            #classes.append(class_indices[subdir])
            # add filename relative to directory
            input_names.append(os.path.join(input_file_path, fname))
            
    groundtruth_file_path = directory + '/groundtruth'
    arr = sorted(os.listdir(groundtruth_file_path))
    for fname in arr:
        is_valid = False 
        for extension in ['png', 'jpg', 'jpeg']:
            if fname.lower().endswith('.' + extension):
                is_valid = True
                break
        if is_valid:
            #classes.append(class_indices[subdir])
            # add filename relative to directory
            groundtruth_names.append(os.path.join(groundtruth_file_path, fname))
    return input_names, groundtruth_names

In [None]:
train_data_path = "\\Users\\bchau\\Projects\\Thesis\\train_dataset\\badWeather"
validation_data_path = "\\Users\\bchau\\Projects\\Thesis\\validation_dataset\\badWeather"

                
train_gen = ImageDataGenerator()
train_data = train_gen.flow_from_directory(train_data_path, target_size= IMG_SIZE, frames_per_step=FRAMES_PER_STEP, batch_size=BATCH_SIZE)

validation_gen = ImageDataGenerator()
validation_data = validation_gen.flow_from_directory(validation_data_path, target_size= IMG_SIZE, frames_per_step=FRAMES_PER_STEP, batch_size=BATCH_SIZE)

STEPS_PER_EPOCH = train_data.samples // BATCH_SIZE

VALIDATION_STEPS_PER_EPOCH = validation_data.samples // BATCH_SIZE

In [None]:
train_data_batch = train_data.next()
input_img = train_data_batch[0][3]
show_array_of_images(input_img)

ground_img = train_data_batch[1][3]
show_array_of_images(ground_img)

In [None]:
input_dim = [FRAMES_PER_STEP, 240, 320, 3]
model = Models.ecoder_decoder_cnn_lstm(input_dim, DROPOUT)

In [None]:
model.compile(optimizer='adam',
              loss= 'binary_crossentropy',
              metrics=[tf.keras.metrics.Accuracy(),
                        tf.keras.metrics.Recall(), 
                        tf.keras.metrics.Precision(), 
                        tf.keras.metrics.FalseNegatives(),
                        tf.keras.metrics.FalsePositives(), 
                        tf.keras.metrics.TrueNegatives(), 
                        tf.keras.metrics.TruePositives()])

In [None]:
#The callback define below is used to observe how the model improves while it is training.
class DisplayCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print ('\nSample Prediction after epoch {}\n'.format(epoch+1))
        
#Define TensorBoard callback here
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir,
                                   histogram_freq=1)

In [None]:
model_history = model.fit(train_data,
          epochs=500,
          validation_data=validation_data,
          steps_per_epoch=STEPS_PER_EPOCH,
          validation_steps=VALIDATION_STEPS_PER_EPOCH,
          verbose=1, callbacks=[DisplayCallback()])

In [None]:
# plot recall over precision here.

recall = model_history.history['recall']
precision = model_history.history['precision']
plt.figure()
plt.step(recall, precision, where='post' )
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.xlim((0, 1))
plt.ylim((0, 1))
plt.show()
plt.close()

In [None]:
# plot overall accuracy of the model
loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

acc = model_history.history['accuracy']
val_acc = model_history.history['val_accuracy']

plt.figure()
plt.plot(model_history.epoch, loss, 'r', label='Training loss')
plt.plot(model_history.epoch, val_loss, 'bo', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.ylim([0, 1])
plt.legend()
plt.show()

plt.figure()
plt.plot(model_history.epoch, acc, 'r', label='Training loss')
plt.plot(model_history.epoch, val_acc, 'bo', label='Validation loss')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy Value')
plt.ylim([0, 1])
plt.legend()
plt.show()

In [None]:
show_predictions()

In [None]:
def show_predictions():
    # Load unseen data for forground extraction 
    test_data_path = "\\Users\\bchau\\Projects\\Thesis\\test_dataset\\badWeather"
    test_gen = ImageDataGenerator()
    test_data = test_gen.flow_from_directory(test_data_path, target_size= IMG_SIZE, frames_per_step=FRAMES_PER_STEP, batch_size=BATCH_SIZE)
    test_samples = test_data.next()
    
    # predict forground by passing the test data.
    predictions = model.predict(test_samples[0], batch_size=BATCH_SIZE, steps=None, verbose=1)
    predicted_img = predictions[3]
    
    # plot score map of forground
    show_array_of_images(predicted_img)
    
    # plot output of forground
    show_array_of_forground_mask(predicted_img)