# Image Colorization Final Project
Authors: Aret Tinoco, Keshav Gupta, Hal Halberstadt

Dataset: https://www.kaggle.com/datasets/darthgera/colorization

---

## Imports

We have to import some unsual libraries in order to get the RGB values of our target images into HSL format, and a few more for ease of viewing and on the same note ease displaying data.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import colorsys
from pathlib import Path
from PIL import Image # for resizing images

import tensorflow as tf
from tensorflow.keras import models, layers, Input, Model, callbacks, utils
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

from IPython.display import display, HTML

Ease of reading

In [2]:
pd.set_option('display.max_columns', 200)
pd.options.display.width = 120
pd.options.display.max_colwidth = 50
display(HTML("<style>.container { width:100% !important; }</style>"))

Now to state the directory of the data to retrieve from.

In [3]:
data_dir = Path("C:/Users/smhal/Desktop/archive2") 
img_shape = (512, 512, 3)

folder_paths = ['color', 'bw', 'color_val', 'bw_val']

---

## Useful Functions

We need a function to make conversion of each image easier

In [4]:
def hls_conv(image, width=512, height=512):
    image_ = np.array(image, dtype='float32')
    for x in range(width):
        for y in range(height):
            pixel = image_[x][y]
            image_[x][y] = colorsys.rgb_to_hls(pixel[0], pixel[1], pixel[2])
    return image_

---

## Data Generator

Since we cannot hope to hold onto 18000 images in our kernel, we have to use a generator in order to be able to get data from the file and then train on that data.

In [5]:
class DataGenerator(utils.Sequence): 
    '''
    adapted from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
    
    Generates a color and black & white image for training data
    '''
    def __init__(self, list_IDs_color, list_IDs_bw, batch_size=8, 
                 dim=img_shape, n_channels=3, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs_bw
        self.list_IDs = list_IDs_color
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp_bw = [self.list_IDs[k] for k in indexes]
        list_IDs_temp_color = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp_bw, list_IDs_temp_color)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp_bw, list_IDs_temp_color):
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size, *self.dim))

        # Generate data
        for i, ID in enumerate(list_IDs_temp_bw):
            # B/W image
            X[i] = load_img(ID)
            X[i] = np.array(X[i], dtype='float32')

            # target image
            y[i] = load_img(list_IDs_temp_color[i])
            y[i] = np.array(hls_conv(y[i]), dtype='float32')

        return X, y

In [6]:
params = {'dim': img_shape,
          'n_channels': 1,
          'shuffle': True}

# Datasets
bw_dir_train = data_dir / folder_paths[1]
color_dir_train = data_dir / folder_paths[0]

partition_bw = list(bw_dir_train.glob('*.jpg'))
partition_color = list(bw_dir_train.glob('*.jpg'))

# Generators
training_generator = DataGenerator(partition_bw, partition_color)

In [7]:
params = {'dim': img_shape,
          'n_channels': 1,
          'shuffle': True}

bw_dir_val = data_dir / folder_paths[3]
color_dir_val = data_dir / folder_paths[2]

partition_bw = list(bw_dir_val.glob('*.jpg'))
partition_color = list(color_dir_val.glob('*.jpg'))


validation_generator = DataGenerator(partition_bw, partition_color)

---

## Model(s)

Next I want to read the data from the files and then just to make sure I am getting the right data from the right files

In [8]:
K.clear_session()  # delete old models
act_fun='relu'

inputs=Input(img_shape)

# YOUR CONVOLUTIONAL LAYERS GO HERE
filters=3

x = layers.SeparableConv2D(filters, 2, padding='same', activation=act_fun)(inputs)
x = layers.SeparableConv2D(filters, 2, padding='same', activation=act_fun)(x)
# x = layers.MaxPooling2D(2, padding='same', strides=1)(x)

# x = layers.SeparableConv2D(filters, 2, padding='same', activation=act_fun)(x)
# x = layers.SeparableConv2D(filters, 2, padding='same', activation=act_fun)(x)

output = x
# Flatten and output Block
embedding_model = Model(inputs, output)

In [9]:
embedding_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 512, 512, 3)]     0         
_________________________________________________________________
separable_conv2d (SeparableC (None, 512, 512, 3)       24        
_________________________________________________________________
separable_conv2d_1 (Separabl (None, 512, 512, 3)       24        
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


In [10]:
# input0 = Input(img_shape, name='input0')
# input1 = Input(img_shape, name='input1')
# input2 = Input(img_shape, name='input2')

# sub0 = embedding_model(input0)
# sub1 = embedding_model(input1)
# sub2 = embedding_model(input2)

# pred = layers.Concatenate(axis=1)([sub0, sub1, sub2])

# model = Model([input0, input1, input2], pred)

# model.compile(optimizer='rmsprop', loss='mse',  metrics=['accuracy'])

In [11]:
# model.summary()

---

## Training

In [None]:

embedding_model.compile(optimizer='rmsprop', loss='mse',  metrics=['accuracy'])
embedding_model.fit(training_generator, 
                    epochs=5,
                    use_multiprocessing=True,
                    workers=32)
# embedding_model.fit(training_generator, 
#                     epochs=5)

Epoch 1/5


---

## Conclusions

Para1...