# Imports

In [0]:
import argparse
import json
import importlib
import glob
import os

In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Mount gdrive for data access

- This whole project, including the datasets, is maintained on Google Drive for ease of access while using Colab. 
- Thus it is needed to mount the drive for use as a target location for I/O operations.

In [0]:
def gdrive_mount():
    from google.colab import drive
    drive.mount('/content/gdrive')

gdrive_mount()

# Utilities

 - Utilities functions for ease of use.
 - Image based functions such as `random_crop`, `random_horizontal_flip` are used for data augmentation purposes while generating Training data.
 - Label based functions such as `normalize_labels` and `calc_mean_score` are used for statistical purposes for input image labels (rating distributions).

In [0]:
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)


def save_json(data, target_file):
    with open(target_file, 'w') as f:
        json.dump(data, f, indent=2, sort_keys=True)


def load_config(config_file):
    config = load_json(config_file)
    return config


def random_crop(img, crop_dims):
    h, w = img.shape[0], img.shape[1]
    ch, cw = crop_dims[0], crop_dims[1]
    assert h >= ch, 'image height is less than crop height'
    assert w >= cw, 'image width is less than crop width'
    x = np.random.randint(0, w - cw + 1)
    y = np.random.randint(0, h - ch + 1)
    return img[y:(y+ch), x:(x+cw), :]


def random_horizontal_flip(img):
    assert len(img.shape) == 3, 'input tensor must have 3 dimensions (height, width, channels)'
    assert img.shape[2] == 3, 'image not in channels last format'
    if np.random.random() < 0.5:
        img = img.swapaxes(1, 0)
        img = img[::-1, ...]
        img = img.swapaxes(0, 1)
    return img


def load_image(img_file, target_size):
    return np.asarray(tf.keras.preprocessing.image.load_img(img_file, target_size=target_size))


def normalize_labels(labels):
    labels_np = np.array(labels)
    return labels_np / labels_np.sum()


def calc_mean_score(score_dist):
    score_dist = normalize_labels(score_dist)
    return (score_dist*np.arange(1, 11)).sum()


def ensure_dir_exists(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)


def ensure_file_exists(file):
  if not os.path.exists(file):
      os.touch(file)


def load_samples(samples_file):
    return load_json(samples_file)

# Loss function

The special feature of NIMA is the use of the Earth Mover’s Loss (EML) as the loss function, which differs from the Categorical Cross Entropy (CCE) loss generally applied in Deep Learning classification tasks. The EML can be understood as the amount of “earth” that needs to be moved to make two probability distributions equal. A useful attribute of this loss function is that it captures the inherent order of the classes.

For example, in our scenario, the scores 4, 5, and 6 are more related than 1, 5, and 10. In other words, **we want to punish a prediction of 4 more if the true score is 10 than when the true score is 5.** CCE does not capture this relationship, which is often not required in object classification tasks (e.g. misclassifying a tree as a dog is as bad as classifying it as a cat).

![Earth Mover's Distance](https://devblogs.nvidia.com/wp-content/uploads/2018/10/pastedImage0-3-1024x253.png)

_Image source: https://devblogs.nvidia.com/deep-learning-hotel-aesthetics-photos/_


$$EMD(p, \hat{p})=\bigg(\frac{1}{N}\sum_{k=1}^{N} {|CDF_{p}(k)-CDF_{\hat p}(k)|}^r\bigg)^{1/r}$$

$p: \textit{ground truth probability mass functions}$

$\hat{p}: \textit{estimated probability mass functions}$

$N: \textit{number of ordered classes}$

$CDF_p(k): \textit{cumulative distribution function as} \sum_{i=1}^{k}{p_{s_{i}}}$

$r: \textit{r-norm}$

***

$r = 2 \textit{ to penalize the Euclidean distance between the
CDFs. It allows easier optimization when working with
gradient descent.}$


In [0]:
def earth_movers_distance(y_true, y_pred):
    cdf_true = K.cumsum(y_true, axis=-1)
    cdf_pred = K.cumsum(y_pred, axis=-1)
    emd = K.sqrt(K.mean(K.square(cdf_true - cdf_pred), axis=-1))
    return K.mean(emd)

# Data Generator

- Every `tf.keras.utils.Sequence` must implement the `__getitem__` and the `__len__` methods.
- The method `__getitem__` should return a complete batch.
- The method `on_epoch_end` is used to modify the dataset between epochs.
- The method `__data_generator` generates an array of `image, label` pair for enumerated pairs in `batch_samples`, after performing image augmentation on every image and `basenet_preprocess` on the generated image array.

In [0]:
class TrainDataGenerator(tf.keras.utils.Sequence):
    '''inherits from Keras Sequence base object, allows to use multiprocessing in .fit_generator'''
    def __init__(self, samples, img_dir, batch_size, n_classes, basenet_preprocess, img_format,
                 img_load_dims=(256, 256), img_crop_dims=(224, 224), shuffle=True):
        self.samples = samples
        self.img_dir = img_dir
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.basenet_preprocess = basenet_preprocess  # Keras basenet specific preprocessing function
        self.img_load_dims = img_load_dims  # dimensions that images get resized into when loaded
        self.img_crop_dims = img_crop_dims  # dimensions that images get randomly cropped to
        self.shuffle = shuffle
        self.img_format = img_format
        self.on_epoch_end()  # call ensures that samples are shuffled in first epoch if shuffle is set to True

    def __len__(self):
        return int(np.ceil(len(self.samples) / self.batch_size))  # number of batches per epoch

    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]  # get batch indexes
        batch_samples = [self.samples[i] for i in batch_indexes]  # get batch samples
        X, y = self.__data_generator(batch_samples)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.samples))
        if self.shuffle is True:
            np.random.shuffle(self.indexes)

    def __data_generator(self, batch_samples):
        # initialize images and labels tensors for faster processing
        X = np.empty((len(batch_samples), *self.img_crop_dims, 3))
        y = np.empty((len(batch_samples), self.n_classes))

        for i, sample in enumerate(batch_samples):
            # load and randomly augment image
            img_file = os.path.join(self.img_dir, '{}.{}'.format(sample['image_id'], self.img_format))
            img = load_image(img_file, self.img_load_dims)
            if img is not None:
                img = random_crop(img, self.img_crop_dims)
                img = random_horizontal_flip(img)
                X[i, ] = img

            # normalize labels
            y[i, ] = normalize_labels(sample['label'])

        # apply basenet specific preprocessing
        # input is 4D numpy array of RGB values within [0, 255]
        X = self.basenet_preprocess(X)

        return X, y

In [0]:
class TestDataGenerator(tf.keras.utils.Sequence):
    '''inherits from Keras Sequence base object, allows to use multiprocessing in .fit_generator'''
    def __init__(self, samples, img_dir, batch_size, n_classes, basenet_preprocess, img_format,
                 img_load_dims=(224, 224)):
        self.samples = samples
        self.img_dir = img_dir
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.basenet_preprocess = basenet_preprocess  # Keras basenet specific preprocessing function
        self.img_load_dims = img_load_dims  # dimensions that images get resized into when loaded
        self.img_format = img_format
        self.on_epoch_end()  # call ensures that samples are shuffled in first epoch if shuffle is set to True

    def __len__(self):
        return int(np.ceil(len(self.samples) / self.batch_size))  # number of batches per epoch

    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]  # get batch indexes
        batch_samples = [self.samples[i] for i in batch_indexes]  # get batch samples
        X, y = self.__data_generator(batch_samples)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.samples))

    def __data_generator(self, batch_samples):
        # initialize images and labels tensors for faster processing
        X = np.empty((len(batch_samples), *self.img_load_dims, 3))
        y = np.empty((len(batch_samples), self.n_classes))

        for i, sample in enumerate(batch_samples):
            # load and randomly augment image
            img_file = os.path.join(self.img_dir, '{}.{}'.format(sample['image_id'], self.img_format))
            img = load_image(img_file, self.img_load_dims)
            if img is not None:
                X[i, ] = img

            # normalize labels
            if sample.get('label') is not None:
                y[i, ] = normalize_labels(sample['label'])

        # apply basenet specific preprocessing
        # input is 4D numpy array of RGB values within [0, 255]
        X = self.basenet_preprocess(X)

        return X, y

# Model Builder

- The method `_get_base_module` imports the base model from the Keras Applications. They are canned architectures with pre-trained weights.
- The method `build` is used to specify the architecture of network. The method `keras.applications.<model_name>` (`BaseCnn` in this case) can have following parameters:
  - `include_top`:	whether to include the fully-connected layer at the top of the network.
  - `weights`:	one of `None` (random initialization), `'imagenet'` (pre-training on ImageNet), or the path to the weights file to be loaded.
  - `input_tensor`:	optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
  - `input_shape`:	optional shape tuple, only to be specified if include_top is False (otherwise the input shape has to be (299, 299, 3) (with 'channels_last' data format) or (3, 299, 299) (with 'channels_first' data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 75. E.g. (150, 150, 3) would be one valid value.
  - `pooling`:	Optional pooling mode for feature extraction when include_top is False.
     - `None`: means that the output of the model will be the 4D tensor output of the last convolutional block.
     - `'avg'`: means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor.
     - `'max'`: means that global max pooling will be applied.
  - `classes`:	optional number of classes to classify images into, only to be specified if include_top is True, and if no weights argument is specified.
  - `classifier_activation`:	A str or callable. The activation function to use on the "top" layer. Ignored unless include_top=True. Set classifier_activation=None to return the logits of the "top" layer.


In [0]:
class Nima:
    def __init__(self, base_model_name, n_classes=10, learning_rate=0.001, dropout_rate=0, loss=earth_movers_distance,
                 decay=0, weights='imagenet'):
        self.n_classes = n_classes
        self.base_model_name = base_model_name
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.loss = loss
        self.decay = decay
        self.weights = weights
        self._get_base_module()

    def _get_base_module(self):
        # import Keras base model module
        if self.base_model_name == 'InceptionV3':
            self.base_module = importlib.import_module('tensorflow.keras.applications.inception_v3')
        elif self.base_model_name == 'InceptionResNetV2':
            self.base_module = importlib.import_module('tensorflow.keras.applications.inception_resnet_v2')
        else:
            self.base_module = importlib.import_module('tensorflow.keras.applications.'+self.base_model_name.lower())

    def build(self):
        # get base model class
        BaseCnn = getattr(self.base_module, self.base_model_name)

        # load pre-trained model
        self.base_model = BaseCnn(input_shape=(224, 224, 3), weights=self.weights, include_top=False, pooling='avg')

        # add dropout and dense layer
        x = Dropout(self.dropout_rate)(self.base_model.output)
        x = Dense(units=self.n_classes, activation='softmax')(x)

        self.nima_model = Model(self.base_model.inputs, x)

    def compile(self):
        self.nima_model.compile(optimizer=Adam(lr=self.learning_rate, decay=self.decay), loss=self.loss)

    def preprocessing_function(self):
        return self.base_module.preprocess_input

# Train

- The method `ModelCheckpoint` can have the following parameters:
  - `filepath`
  - `monitor`: quantity to monitor.
  - `verbose`: verbose mode, 0 or 1.
  - `save_best_only`:	if `save_best_only=True`, the latest best model according to the quantity monitored will not be overwritten. If filepath doesn't contain formatting options like `{epoch}` then filepath will be overwritten by each new better model.
  - `mode`:	one of `{auto, min, max}`. If `save_best_only=True`, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For `val_acc`, this should be `max`, for `val_loss` this should be `min`, etc. In `auto` mode, the direction is automatically inferred from the name of the monitored quantity.
  - `save_weights_only`:	if `True`, then only the model's weights will be saved, else the full model is saved.
  - `save_freq`:	`'epoch'` or integer. When using `'epoch'`, the callback saves the model after each epoch. When using integer, the callback saves the model at end of this many batches.

- The method `fit_generator` is deprecated and needs to be replaced with `fit`. It can have the following parameters:
  - `generator`
  - `steps_per_epoch`: Integer or `None`. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. 
  - `epochs`: Integer. Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. Note that in conjunction with initial_epoch, epochs is to be understood as "final epoch". The model is not trained for a number of iterations given by epochs, but merely until the epoch of index epochs is reached.
  - `verbose`: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
  - `callbacks`: List of keras.callbacks.Callback instances. List of callbacks to apply during training.
  - `validation_data`: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. validation_data will override validation_split. validation_data could be:
    - `tuple (x_val, y_val)` of Numpy arrays or tensors
    - `tuple (x_val, y_val, val_sample_weights)` of Numpy arrays
    - `dataset`
  - `validation_steps`: Only relevant if `validation_data` is provided and is a tf.data dataset. Total number of steps (batches of samples) to draw before stopping when performing validation at the end of every epoch. If `validation_steps` is None, validation will run until the validation_data dataset is exhausted. In the case of an infinitely repeated dataset, it will run into an infinite loop. If `validation_steps` is specified and only part of the dataset will be consumed, the evaluation will start from the beginning of the dataset at each epoch. This ensures that the same validation samples are used every time.
  - `validation_freq`: Only relevant if validation data is provided. Integer or collections_abc.Container instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. validation_freq=2 runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. validation_freq=[1, 2, 10] runs validation at the end of the 1st, 2nd, and 10th epochs.
  - `class_weight`: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class.
  - `max_queue_size`: Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10.
  - `workers`: Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. If 0, will execute the generator on the main thread.
  - `use_multiprocessing`: Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes.
  - `shuffle`: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch').
  - `initial_epoch`: Integer. Epoch at which to start training (useful for resuming a previous training run).


In [0]:
def train(
    base_model_name,
    n_classes,
    samples,
    image_dir,
    batch_size,
    epochs_train_dense,
    epochs_train_all,
    learning_rate_dense,
    learning_rate_all,
    dropout_rate,
    weights_dir,
    log_dir,
    img_format='jpg',
    existing_weights=None,
    multiprocessing_data_load=False,
    num_workers_data_load=2,
    decay_dense=0,
    decay_all=0,
    **kwargs
):

    # build NIMA model and load existing weights if they were provided in config
    nima = Nima(
        base_model_name, n_classes, learning_rate_dense, dropout_rate, decay=decay_dense
    )
    nima.build()

    if existing_weights is not None:
        nima.nima_model.load_weights(existing_weights)

    # split samples in train and validation set, and initialize data generators
    samples_train, samples_test = train_test_split(
        samples, test_size=0.05, shuffle=True, random_state=10207
    )

    training_generator = TrainDataGenerator(
        samples_train,
        image_dir,
        batch_size,
        n_classes,
        nima.preprocessing_function(),
        img_format=img_format,
    )

    validation_generator = TestDataGenerator(
        samples_test,
        image_dir,
        batch_size,
        n_classes,
        nima.preprocessing_function(),
        img_format=img_format,
    )

    # initialize callbacks TensorBoard and ModelCheckpoint
    tensorboard = TensorBoard(
        log_dir=log_dir, update_freq='batch'
    )

    model_save_name = (
        'weights_' + base_model_name.lower() + '_{epoch:02d}_{val_loss:.3f}.hdf5'
    )
    model_file_path = os.path.join(weights_dir, model_save_name)
    model_checkpointer = ModelCheckpoint(
        filepath=model_file_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
    )

    # start training only dense layers
    # freeze
    for layer in nima.base_model.layers:
        layer.trainable = False

    nima.compile()
    nima.nima_model.summary()

    nima.nima_model.fit_generator(
        generator=training_generator,
        validation_data=validation_generator,
        epochs=epochs_train_dense,
        verbose=1,
        use_multiprocessing=multiprocessing_data_load,
        workers=num_workers_data_load,
        max_queue_size=30,
        callbacks=[tensorboard, model_checkpointer],
    )

################################################################################

    # start training all layers
    for layer in nima.base_model.layers:
        layer.trainable = True

    nima.learning_rate = learning_rate_all
    nima.decay = decay_all
    nima.compile()
    nima.nima_model.summary()

    nima.nima_model.fit_generator(
        generator=training_generator,
        validation_data=validation_generator,
        epochs=epochs_train_dense + epochs_train_all,
        initial_epoch=epochs_train_dense,
        verbose=1,
        use_multiprocessing=multiprocessing_data_load,
        workers=num_workers_data_load,
        max_queue_size=30,
        callbacks=[tensorboard, model_checkpointer],
    )

    K.clear_session()


# Call for Train

In [0]:
IMAGE_DIR = '/content/gdrive/My Drive/TID2013/distorted_images'

WEIGHTS_DIR = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/weights/E1'
LOG_DIR = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/logs/E1'
ensure_dir_exists(WEIGHTS_DIR)
ensure_dir_exists(LOG_DIR)

CONFIG_DIR = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/config'
CONFIG_FILE = os.path.join(CONFIG_DIR, 'config_technical_gpu.json')
config = load_config(CONFIG_FILE)

SAMPLES_DIR = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/labels/TID2013'
SAMPLES_FILE = os.path.join(SAMPLES_DIR, 'tid_labels_train.json')
samples = load_samples(SAMPLES_FILE)

train(samples=samples, weights_dir=WEIGHTS_DIR, log_dir=LOG_DIR, image_dir=IMAGE_DIR, **config)

# Predict

In [0]:
def image_file_to_json(img_path):
    img_dir = os.path.dirname(img_path)
    img_id = os.path.basename(img_path).split('.')[0]

    return img_dir, [{'image_id': img_id}]


def image_dir_to_json(img_dir, img_type='jpg'):
    img_paths = glob.glob(os.path.join(img_dir, '*.'+img_type))

    samples = []
    for img_path in img_paths:
        img_id = os.path.basename(img_path).split('.')[0]
        samples.append({'image_id': img_id})

    return samples


def predict(base_model_name, weights_file, image_source, predictions_file, img_format='jpg'):
    # load samples
    if os.path.isfile(image_source):
        image_dir, samples = image_file_to_json(image_source)
    else:
        image_dir = image_source
        samples = image_dir_to_json(image_dir, img_type='jpg')

    # build model and load weights
    nima = Nima(base_model_name, weights=None)
    nima.build()
    nima.nima_model.load_weights(weights_file)

    # initialize data generator
    data_generator = TestDataGenerator(samples, image_dir, 64, 10, nima.preprocessing_function(),
                                       img_format=img_format)

    # get predictions
    predictions = nima.nima_model.predict_generator(data_generator, workers=8, use_multiprocessing=True, verbose=1)

    # calc mean scores and add to samples
    for i, sample in enumerate(samples):
        sample['mean_score_prediction'] = calc_mean_score(predictions[i])

    print(json.dumps(samples, indent=2))

    if predictions_file is not None:
        save_json(samples, predictions_file)


# Call for predict

In [0]:
BASE_MODEL_NAME = "MobileNet"
WEIGHTS_FILE = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/weights/E1/weights_mobilenet_50_0.074.hdf5'
IMAGE_SOURCE = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/test_images'
PREDICTIONS_FILE = '/content/gdrive/My Drive/GC-IQA_Prism/IQA_Prism/predictions/E1.json'

ensure_file_exists(PREDICTIONS_FILE)

predict(base_model_name=BASE_MODEL_NAME, weights_file=WEIGHTS_FILE, image_source=IMAGE_SOURCE, predictions_file=PREDICTIONS_FILE)