In [2]:
import numpy as np
import os
import glob
from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb
from skimage.transform import resize
from skimage.io import imsave, imread
import tensorflow as tf
from tensorflow import data as tfdata
from keras import backend as K
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from keras.models import Model
from keras.callbacks import TensorBoard
from keras.preprocessing.image import img_to_array, load_img

Using TensorFlow backend.


In [2]:
from keras.preprocessing.image import img_to_array, load_img

In [3]:
sess = tf.Session()
K.set_session(sess)
sess = None

#### Define Path for Generating Training and Valididation TFRecord 

In [113]:
'''PATH_TRAIN_RAW_IMG = r"../dd2424_project/tiny-imagenet-200/train"
PATH_VAL_RAW_IMG = r"../dd2424_project/tiny-imagenet-200/val/images"
PATH_TRAIN_TF = "tf_data/train_images.tfrecord"
PATH_VAL_TF = "tf_data/val_images.tfrecord"
PATH_TRAIN_RAW_IMG = r"../dd2424_project/imagenet/imagenet_12k"
PATH_VAL_RAW_IMG = r"../dd2424_project/imagenet/imagenet_12k"
PATH_TRAIN_TF = "tf_data_imagenet/train_images.tfrecord"
PATH_VAL_TF = "tf_data_imagenet/val_images.tfrecord"
PATH_TRAIN_RAW_IMG = r"../dd2424_project/imagenet_cat/images"
PATH_VAL_RAW_IMG = r"../dd2424_project/imagenet_cat/images"
PATH_TRAIN_TF = "tf_data_imagenet_cat/train_images.tfrecord"
PATH_VAL_TF = "tf_data_imagenet_cat/val_images.tfrecord"'''
PATH_TRAIN_RAW_IMG = r"../dd2424_project/imagenet_"
PATH_VAL_RAW_IMG = r"../dd2424_project/imagenet_"
PATH_TRAIN_TF = "tf_data_imagenet_5k/train_images.tfrecord"
PATH_VAL_TF = "tf_data_imagenet_5k/val_images.tfrecord"

In [8]:
if not os.path.exists('model_weights/'):
    os.makedirs('model_weights/')
if not os.path.exists('predicted_images/'):
    os.makedirs('predicted_images/')
if not os.path.exists('tf_data/'):
    os.makedirs('tf_data/')

#### Define required funtion

In [6]:
## Get pre-trained InceptionResNetV2

inception_act = None
def get_inception():
    global feature_extract_model, inception_act
    
    inception_act = InceptionResNetV2(input_shape=(EMBEDDING_IMG_SIZE, EMBEDDING_IMG_SIZE, 3),
                                             include_top=True, weights='imagenet')
    inception_act.graph = tf.get_default_graph()
    inception_act.trainable = False

In [7]:
## Get last layer output of InceptionResNetV2

def get_last_layer_inception(grayscaled_img, batch_size=100):
    feature_map = inception_act.predict(grayscaled_img, batch_size=batch_size)
    return feature_map

In [8]:
## Extract high-level features from InceptionResNetV2 

def extract_features(grayscaled_rgb, batch_size=100):
    if inception_act is None:
        get_inception()
        
    grayscaled_rgb_resized = []
    
    for img in grayscaled_rgb:
        img = resize(img, (EMBEDDING_IMG_SIZE, EMBEDDING_IMG_SIZE, 3), mode='constant')
        grayscaled_rgb_resized.append(img)
        
    grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
    grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
    
    with inception_act.graph.as_default():
        features = get_last_layer_inception(grayscaled_rgb_resized, batch_size)
        features = features.reshape((-1,1000))
    
    return features

In [9]:
## process batch of image for training

def process_img_batch(X, batch_size):
    grayscaled_rgb = gray2rgb(rgb2gray(X))
    lab_batch = rgb2lab(X)
    X_batch = lab_batch[:, :, :, 0]
    X_batch = X_batch.reshape(X_batch.shape + (1,))
    X_batch = 2 * X_batch / 100 - 1.
    Y_batch= lab_batch[:, :, :, 1:] / 127
    features = extract_features(grayscaled_rgb, batch_size)
    
    return ([X_batch, features], Y_batch)

In [10]:
## serialize batch of image into TFRecords

def serialize_records(X, writer, batch_size):
    [X_batch, features], Y_batch = process_img_batch(X, batch_size)
    
    for j, (img_l, embed, y) in enumerate(zip(X_batch, features, Y_batch)):
        img_l = resize(img_l, (IMG_SIZE, IMG_SIZE, 1), mode='constant')
        y = resize(y, (IMG_SIZE, IMG_SIZE, 2), mode='constant')
        
        example_dict = {
            'image_l': tf.train.Feature(float_list=tf.train.FloatList(value=img_l.flatten())),
            'image_ab': tf.train.Feature(float_list=tf.train.FloatList(value=y.flatten())),
            'image_features' : tf.train.Feature(float_list=tf.train.FloatList(value=embed.flatten()))
        }
        example_features = tf.train.Features(feature=example_dict)
        example = tf.train.Example(features=example_features)
        writer.write(example.SerializeToString())

In [None]:
## Generate TFRecords for training and validation data
## the TFRecords contain L* component of the image, ab component, extracted feature from InceptionResNetV2
def img_to_tf_records(path_to_images, tf_record_name, total_size, _type, batch_size=100, n_class = None):
    if os.path.exists(tf_record_name):
        print("remove old TF records!")
    else:
        if _type == 'train':
            folders = glob.glob(path_to_images + '*')
            files = []
            for i, folder in enumerate(folders):
                print(folder)
                if i < n_class:
                    files_temp = glob.glob(folder + '/images/*')
                    files_temp = files_temp[0:(total_size//n_class)]
                    files.append(files_temp)
            files = [y for x in files for y in x]
            
            
        elif _type == 'val':
            folders = glob.glob(path_to_images + '*')
            files = []
            for i, folder in enumerate(folders):
                print(folder)
                if i < 5:
                    files_temp = glob.glob(folder + '/images/*')
                    files_temp = files_temp[-110:-10]
                    files.append(files_temp)
            files = [y for x in files for y in x]
            

        print("panjang file: " + str(len(files)))
        files = sorted(files)

        options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
        writer = tf.python_io.TFRecordWriter(tf_record_name, options)

        size = max(EMBEDDING_IMG_SIZE, IMG_SIZE)

        X_buffer = []
        done_count = 0
        for i, file in enumerate(files):
            #print(file)
            try:
                X = imread(file)
            except:
                continue
            #print(file)
            X = resize(X, (size, size, 3), mode = 'constant')

            X_buffer.append(X)
            done_count += 1

            if len(X_buffer) >= batch_size:
                X_buffer = np.array(X_buffer)
                serialize_records(X_buffer, writer, batch_size)
                del X_buffer
                X_buffer = []
                print("Done %d / %d images" % (done_count, total_size))

        if len(X_buffer) != 0:
            X_buffer = np.array(X_buffer)
            serialize_records(X_buffer, writer, batch_size)
            del X_buffer

        print("Done %d / %d images" % (done_count, total_size))
        print("Finished preprocessed data to TFRecoeds")
        writer.close()

In [14]:
## Generate train TFRecords

def generate_train_data(batch_size, total_size, n_class):
    img_to_tf_records(PATH_TRAIN_RAW_IMG, PATH_TRAIN_TF, total_size, 'train', batch_size, n_class)

In [15]:
## Generate validation TFRecords

def generate_val_data(batch_size, total_size):
    img_to_tf_records(PATH_VAL_RAW_IMG, PATH_VAL_TF, total_size, 'val', batch_size)

In [13]:
## Read TFRecords and load it as Dataset

def construct_dataset(path_to_record, batch_size, sess):
    def parse_record(serialized_example):
        features = tf.parse_single_example(serialized_example,
                                          features={
                                              'image_l': tf.FixedLenFeature([IMG_SIZE, IMG_SIZE, 1], tf.float32),
                                              'image_ab': tf.FixedLenFeature([IMG_SIZE, IMG_SIZE, 2], tf.float32),
                                              'image_features': tf.FixedLenFeature([1000, ], tf.float32)
                                          })
        l, ab, embed = features['image_l'], features['image_ab'], features['image_features']
        return l, ab, embed
    
    dataset = tfdata.TFRecordDataset([path_to_record], 'ZLIB')
    dataset = dataset.map(parse_record, num_parallel_calls=2)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size)
    dataset = dataset.shuffle(buffer_size=5)
    iterator = dataset.make_initializable_iterator()
    
    sess.run(iterator.initializer)
    next_batch = iterator.get_next()
    
    return dataset, next_batch

In [16]:
## Helper function to read data from TFRecords for training

def train_generator(batch_size):
    with tf.Session() as train_gen_session:
        dataset, next_batch = construct_dataset(PATH_TRAIN_TF, batch_size, train_gen_session)
        
        while True:
            try:
                l, ab, features = train_gen_session.run(next_batch)
                yield([l, features], ab)
            except:
                iterator = dataset.make_initializable_iterator()
                train_gen_session.run(iterator.initializer)
                next_batch = iterator.get_next()
                
                l, ab, features = train_gen_session.run(next_batch)
                yield([l, features], ab)

In [17]:
## Helper function to read data from TFRecords for validation

def val_batch_generator(batch_size):
    with tf.Session() as val_gen_session:
        dataset, next_batch = construct_dataset(PATH_VAL_TF, batch_size, val_gen_session)
        
        while True:
            try:
                l, ab, features = val_gen_session.run(next_batch)
                yield([l, features], ab)
            except:
                iterator = dataset.make_initializable_iterator()
                val_gen_session.run(iterator.initializer)
                next_batch = iterator.get_next()
                
                l, ab, features = val_gen_session.run(next_batch)
                yield([l, features], ab)

In [18]:
## Generate L*, a*, b* components and extracted features of the input image 
## The outputs are used for testing (predict the a* and b* component)

def preprocess_input_img_batch(X, batch_size=100):
    X_processed = X/255.
    X_grayscaled = gray2rgb(rgb2gray(X_processed))
    X_features = extract_features(X_grayscaled, batch_size)
    X_lab = rgb2lab(X_grayscaled)[:, :, :, 0]
    X_lab = X_lab.reshape(X_lab.shape + (1,))
    X_lab = 2*X_lab/100-1.
    
    return X_lab, X_features

In [50]:
## Write predicted picture in testing to png file

def postprocess_output_img(X_lab, y, orig_X, orig_Y, image_size=None):
    y *= 127.
    X_lab = (X_lab + 1) * 50.
    
    image_size = IMG_SIZE if image_size is None else image_size
    
    for i in range(len(y)):
        cur = np.zeros((image_size, image_size, 3))
        cur[:, :, 0] = X_lab[i, :, :, 0]
        cur[:, :, 1:] = y[i]
        res = lab2rgb(cur)
        res = resize(res, (orig_X[i], orig_Y[i], 3))
        imsave("predicted_images_cat/img_%d_predicted.png" % (i+1), res)

In [5]:
IMG_SIZE = 128 ## all images are resized to IMGxIMG
EMBEDDING_IMG_SIZE = 299 ## embedding size, InceptionResNetV2 only accept image with size of 299x299x3

#### Generate train and validation data (as TFRecords)

In [35]:
generate_train_data(100, 5000, 5)

../dd2424_project/imagenet_obelisk
../dd2424_project/imagenet_palm
../dd2424_project/imagenet_cat
../dd2424_project/imagenet_shinkansen
../dd2424_project/imagenet_people
panjang file: 5000
Done 100 / 5000 images
Done 200 / 5000 images
Done 300 / 5000 images
Done 400 / 5000 images
Done 500 / 5000 images
Done 600 / 5000 images
Done 700 / 5000 images
Done 800 / 5000 images
Done 900 / 5000 images
Done 1000 / 5000 images
Done 1100 / 5000 images
Done 1200 / 5000 images
Done 1300 / 5000 images
Done 1400 / 5000 images
Done 1500 / 5000 images
Done 1600 / 5000 images
Done 1700 / 5000 images
Done 1800 / 5000 images
Done 1900 / 5000 images
Done 2000 / 5000 images
Done 2100 / 5000 images
Done 2200 / 5000 images
Done 2300 / 5000 images
Done 2400 / 5000 images
Done 2500 / 5000 images
Done 2600 / 5000 images
Done 2700 / 5000 images
Done 2800 / 5000 images
Done 2900 / 5000 images
Done 3000 / 5000 images
Done 3100 / 5000 images
Done 3200 / 5000 images
Done 3300 / 5000 images
Done 3400 / 5000 images
Done

In [36]:
generate_val_data(100, 500)

../dd2424_project/imagenet_obelisk
../dd2424_project/imagenet_palm
../dd2424_project/imagenet_cat
../dd2424_project/imagenet_shinkansen
../dd2424_project/imagenet_people
panjang file: 500
Done 100 / 500 images
Done 200 / 500 images
Done 300 / 500 images
Done 400 / 500 images
Done 500 / 500 images
Done 500 / 500 images
Finished preprocessed data to TFRecoeds


#### Construct the Deep Autoencoder CNN model

In [37]:
from keras.layers import Conv2D, Input, Reshape, RepeatVector, concatenate, UpSampling2D, Flatten, Conv2DTranspose
from keras.models import Model
from keras.optimizers import Adam

In [38]:
def y_true_min(yt, yp):
    return K.min(yt)
def y_true_max(yt, yp):
    return K.max(yt)
def y_pred_min(yt, yp):
    return K.min(yp)
def y_pred_max(yt, yp):
    return K.max(yp)

In [39]:
def gen_model(img_size=128):
    
    # encoder
    encoder_input = Input(shape=(img_size, img_size, 1))
    encoder = Conv2D(64, (3, 3), padding='same', activation='relu', strides=(2,2))(encoder_input)
    encoder = Conv2D(128, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(128, (3, 3), padding='same', activation='relu', strides=(2,2))(encoder)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu', strides=(2,2))(encoder)
    encoder = Conv2D(512, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(512, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu')(encoder)
    
    # fusion
    batch, height, width, channels = K.int_shape(encoder)
    
    embed_input = Input(shape=(1000,))
    fusion = RepeatVector(height * width)(embed_input)
    fusion = Reshape((height, width, 1000))(fusion)
    fusion = concatenate([encoder, fusion], axis=-1)
    fusion = Conv2D(256, (1, 1), padding='same', activation='relu')(fusion)
    
    # decoder
    decoder = Conv2D(128, (3, 3), padding='same', activation='relu')(fusion)
    decoder = UpSampling2D((2, 2))(decoder)
    decoder = Conv2D(64, (3, 3), padding='same', activation='relu')(decoder)
    decoder = UpSampling2D((2, 2))(decoder)
    decoder = Conv2D(32, (3, 3), padding='same', activation='relu')(decoder)
    decoder = Conv2D(16, (3, 3), padding='same', activation='relu')(decoder)
    decoder = Conv2D(2, (3, 3), padding='same', activation='tanh')(decoder)
    decoder = UpSampling2D((2, 2))(decoder)
    
    model = Model(inputs=[encoder_input, embed_input], outputs=decoder, name='colorizer')
    model.compile(optimizer=Adam(1e-3), loss='mse', metrics=[y_true_max, y_true_min, y_pred_max, y_pred_min])
    
    print("Model built and compiled")
    return model
    

In [40]:
model = gen_model()
model.summary()

Model built and compiled
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 128, 128, 1)  0                                            
__________________________________________________________________________________________________
conv2d_218 (Conv2D)             (None, 64, 64, 64)   640         input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_219 (Conv2D)             (None, 64, 64, 128)  73856       conv2d_218[0][0]                 
__________________________________________________________________________________________________
conv2d_220 (Conv2D)             (None, 32, 32, 128)  147584      conv2d_219[0][0]                 
____________________________________________________________________________________

#### Experiment

In [42]:
# train model
from keras.callbacks import ModelCheckpoint

n_image = 5000
batch_size = 125

tensorboard = TensorBoard(log_dir="./board")
checkpoint = ModelCheckpoint('model_weights_full_imagenet/color_model_5k.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=True)
callbacks = [checkpoint, tensorboard]

model.fit_generator(generator=train_generator(batch_size),
                   steps_per_epoch=n_image//batch_size,
                   epochs=150,
                   verbose=1,
                   callbacks=callbacks,
                   validation_data=val_batch_generator(batch_size),
                   validation_steps=1)

Epoch 1/150

Epoch 00001: loss improved from inf to 0.01379, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 2/150

Epoch 00002: loss improved from 0.01379 to 0.01303, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 3/150

Epoch 00003: loss improved from 0.01303 to 0.01284, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 4/150

Epoch 00004: loss did not improve from 0.01284
Epoch 5/150

Epoch 00005: loss improved from 0.01284 to 0.01281, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 6/150

Epoch 00006: loss improved from 0.01281 to 0.01274, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 7/150

Epoch 00007: loss did not improve from 0.01274
Epoch 8/150

Epoch 00008: loss did not improve from 0.01274
Epoch 9/150

Epoch 00009: loss did not improve from 0.01274
Epoch 10/150

Epoch 00010: loss did not improve from 0.01274
Epoch 11/150

Epoch 00011: loss improved from 0.01274 to 0.01266, sa


Epoch 00024: loss did not improve from 0.01254
Epoch 25/150

Epoch 00025: loss did not improve from 0.01254
Epoch 26/150

Epoch 00026: loss did not improve from 0.01254
Epoch 27/150

Epoch 00027: loss did not improve from 0.01254
Epoch 28/150

Epoch 00028: loss improved from 0.01254 to 0.01250, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 29/150

Epoch 00029: loss did not improve from 0.01250
Epoch 30/150

Epoch 00030: loss did not improve from 0.01250
Epoch 31/150

Epoch 00031: loss did not improve from 0.01250
Epoch 32/150

Epoch 00032: loss did not improve from 0.01250
Epoch 33/150

Epoch 00033: loss did not improve from 0.01250
Epoch 34/150

Epoch 00034: loss did not improve from 0.01250
Epoch 35/150

Epoch 00035: loss did not improve from 0.01250
Epoch 36/150

Epoch 00036: loss did not improve from 0.01250
Epoch 37/150

Epoch 00037: loss did not improve from 0.01250
Epoch 38/150

Epoch 00038: loss did not improve from 0.01250
Epoch 39/150

Epoch 00039: loss


Epoch 00048: loss did not improve from 0.01250
Epoch 49/150

Epoch 00049: loss did not improve from 0.01250
Epoch 50/150

Epoch 00050: loss did not improve from 0.01250
Epoch 51/150

Epoch 00051: loss did not improve from 0.01250
Epoch 52/150

Epoch 00052: loss did not improve from 0.01250
Epoch 53/150

Epoch 00053: loss did not improve from 0.01250
Epoch 54/150

Epoch 00054: loss did not improve from 0.01250
Epoch 55/150

Epoch 00055: loss did not improve from 0.01250
Epoch 56/150

Epoch 00056: loss did not improve from 0.01250
Epoch 57/150

Epoch 00057: loss did not improve from 0.01250
Epoch 58/150

Epoch 00058: loss did not improve from 0.01250
Epoch 59/150

Epoch 00059: loss did not improve from 0.01250
Epoch 60/150

Epoch 00060: loss did not improve from 0.01250
Epoch 61/150

Epoch 00061: loss did not improve from 0.01250
Epoch 62/150

Epoch 00062: loss did not improve from 0.01250
Epoch 63/150

Epoch 00063: loss did not improve from 0.01250
Epoch 64/150

Epoch 00064: loss did n


Epoch 00096: loss did not improve from 0.01250
Epoch 97/150

Epoch 00097: loss did not improve from 0.01250
Epoch 98/150

Epoch 00098: loss did not improve from 0.01250
Epoch 99/150

Epoch 00099: loss improved from 0.01250 to 0.01245, saving model to model_weights_full_imagenet/color_model_5k.h5
Epoch 100/150

Epoch 00100: loss did not improve from 0.01245
Epoch 101/150

Epoch 00101: loss did not improve from 0.01245
Epoch 102/150

Epoch 00102: loss did not improve from 0.01245
Epoch 103/150

Epoch 00103: loss did not improve from 0.01245
Epoch 104/150

Epoch 00104: loss did not improve from 0.01245
Epoch 105/150

Epoch 00105: loss did not improve from 0.01245
Epoch 106/150

Epoch 00106: loss did not improve from 0.01245
Epoch 107/150

Epoch 00107: loss did not improve from 0.01245
Epoch 108/150

Epoch 00108: loss did not improve from 0.01245
Epoch 109/150

Epoch 00109: loss did not improve from 0.01245
Epoch 110/150

Epoch 00110: loss did not improve from 0.01245
Epoch 111/150

Epoch


Epoch 00144: loss did not improve from 0.01245
Epoch 145/150

Epoch 00145: loss did not improve from 0.01245
Epoch 146/150

Epoch 00146: loss did not improve from 0.01245
Epoch 147/150

Epoch 00147: loss did not improve from 0.01245
Epoch 148/150

Epoch 00148: loss did not improve from 0.01245
Epoch 149/150

Epoch 00149: loss did not improve from 0.01245
Epoch 150/150

Epoch 00150: loss did not improve from 0.01245




Exception ignored in: <generator object val_batch_generator at 0x7fcc4907bfc0>
Traceback (most recent call last):
  File "<ipython-input-17-fd606f3dff74>", line 15, in val_batch_generator
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1587, in __exit__
    self._default_graph_context_manager.__exit__(exec_type, exec_value, exec_tb)
  File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 5257, in get_controller
    context.context().context_switches.pop()
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/context.py", line 199, in pop
    self.stack.pop()
IndexError: pop from empty list
Exception ignored in: <generator object train_generator at 0x7fcc4905f200>
Traceback (most recent call last):
  File "<ipython-input-16-054f9dc49f37>", line 15, in train_generator
  File "/usr/local/lib/p

<keras.callbacks.History at 0x7fcc49257208>

In [None]:
# test the model to colorize grayscaled images
TEST_IMAGE_PATH = r"../dd2424_project/imagenet_cat/images"

batch_size = 10
image_size = 128

X = []
orig_X = []
orig_Y = []
files = glob.glob(TEST_IMAGE_PATH+'*/*')
files = files[-10:]

model = gen_model()
model.load_weights('model_weights_class_overfit/color_model_cat.h5')

for i, file in enumerate(files):
    img = img_to_array(load_img(file))/255.
    temp_orig_X, temp_orig_Y = img.shape[0:2]
    orig_X.append(temp_orig_X)
    orig_Y.append(temp_orig_Y)
    img = resize(img, (image_size, image_size, 3))*255.
    X.append(img)
    
X = np.array(X, dtype='float')
X_lab, X_features = preprocess_input_img_batch(X, batch_size=batch_size)
predictions = model.predict([X_lab, X_features], batch_size, verbose=1)

postprocess_output_img(X_lab, predictions, orig_X, orig_Y, image_size=image_size)

Model built and compiled


