<a href="https://colab.research.google.com/github/elilaird/CS83212-Advanced-Neural-Networks/blob/main/Lab2_Style_Transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 2: Style Transfer

**Group Members:**
* Clay Harper
* Eli Laird


In [1]:
import tensorflow as tf
from tensorflow import keras

print(f'Tensorflow version: {tf.__version__}')
print(f'Keras version: {keras.__version__}')

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from PIL import Image
from io import BytesIO
import requests
from tqdm import tqdm
import copy

from tensorflow.keras.preprocessing import image
from tensorflow.keras import models, Model, Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D, Input, UpSampling2D

Tensorflow version: 2.4.1
Keras version: 2.4.0


## VGG Manipulation

Here, we need to manipulate the given VGG code (courtesy of Justin Ledford) to make use of pooling layers or strided convolutions alternatively.  We chose to use strided convolutions because it is less computationally expensive.

In [2]:
# Load VGG
pre_trained_model = tf.keras.applications.VGG19(include_top=False,
                                                      weights='imagenet')

def vgg_layers(inputs, target_layer):
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
    if target_layer == 1:
        return x
    # Strides instead of maxpooling 
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', strides=2)(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    if target_layer == 2:
        return x
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', strides=2)(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    if target_layer == 3:
        return x
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4', strides=2)(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    if target_layer == 4:
        return x
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4', strides=2)(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    return x

def load_weights(trained_model, model):
    layer_names = [layer.name for layer in trained_model.layers]

    for layer in model.layers:
        b_name = layer.name.encode()
        if b_name in layer_names:
            layer.set_weights(trained_model.get_layer(b_name).get_weights())
            layer.trainable = False

def VGG19(trained_model, input_tensor=None, input_shape=None, target_layer=1):
    """
    VGG19, up to the target layer (1 for relu1_1, 2 for relu2_1, etc.)
    """
    if input_tensor is None:
        inputs = Input(shape=input_shape)
    else:
        inputs = Input(tensor=input_tensor, shape=input_shape)
    model = Model(inputs, vgg_layers(inputs, target_layer), name='vgg19')
    load_weights(trained_model, model)
    return model

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


Create an encoder network from the pretrained VGG network 

In [3]:
target_layer = 3
vgg_model = VGG19(pre_trained_model, input_shape=(300, 300, 3), target_layer=target_layer)
vgg_model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 300, 300, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 300, 300, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 150, 150, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 75, 75, 256)       295168    
Total params: 555,328
Trainable params: 555,328
Non-trainable params: 0
_______________________________________________________

## Decoder Network Architecture

In [4]:
def decoder_layers(inputs, layer):
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block5_conv1')(inputs)
    if layer == 5:
        return x

    x = UpSampling2D((2, 2), name='decoder_block4_upsample')(x)
    x = Conv2D(512, (4, 4), activation='relu', padding='same', name='decoder_block4_conv4')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv3')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv1')(x)
    if layer == 4:
        return x

    x = UpSampling2D((2, 2), name='decoder_block3_upsample')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv4')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv3')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv1')(x)
    if layer == 3:
        return x

    x = UpSampling2D((2, 2), name='decoder_block2_upsample')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv2')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)
    if layer == 2:
        return x

    x = UpSampling2D((2, 2), name='decoder_block1_upsample')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv2')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv1')(x)
    if layer == 1:
        return x

## Encoder-Decoder Architecture

In [5]:
LAMBDA=1

def l2_loss(x):
  return K.sum(K.square(x)) / 2

class EncoderDecoder:
  def __init__(self, trained_model, input_shape=(256, 256, 3), target_layer=5,
                decoder_path=None):
    self.input_shape = input_shape
    self.target_layer = target_layer
    self.trained_model = trained_model

    self.encoder = VGG19(trained_model, input_shape=input_shape, target_layer=target_layer)
    if decoder_path:
      self.decoder = load_model(decoder_path)
    else:
      self.decoder = self.create_decoder(target_layer)

    self.model = Sequential()
    self.model.add(self.encoder)
    self.model.add(self.decoder)

    self.loss = self.create_loss_fn(self.encoder)

    self.model.compile('adam', self.loss)

  def create_loss_fn(self, encoder):
    def get_encodings(inputs):
      encoder = VGG19(trained_model, inputs, self.input_shape, self.target_layer)
      return encoder.output

    def loss(img_in, img_out):
      encoding_in = get_encodings(img_in)
      encoding_out = get_encodings(img_out)
      return l2_loss(img_out - img_in) + \
              LAMBDA*l2_loss(encoding_out - encoding_in)
    return loss

  def summary(self):
    self.model.summary()

  def create_decoder(self, target_layer):
    inputs = Input(shape=self.encoder.output_shape[1:])
    layers = decoder_layers(inputs, target_layer)
    output = Conv2D(3, (3, 3), activation='relu', padding='same',
                    name='decoder_out')(layers)
    return Model(inputs, output, name='decoder_%s' % target_layer)

  def export_decoder(self):
    self.decoder.save('decoder_%s.h5' % self.target_layer)


## Train Two Decoders 

Decoders will be created based on the outputs of 2 different layers in the encoder model.

In [6]:
encoder_decoder = EncoderDecoder(pre_trained_model, target_layer=target_layer)
encoder_decoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Functional)           (None, 64, 64, 256)       555328    
_________________________________________________________________
decoder_3 (Functional)       (None, 256, 256, 3)       15411459  
Total params: 15,966,787
Trainable params: 15,966,787
Non-trainable params: 0
_________________________________________________________________


In [18]:
import sys
import os

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import Callback
#from scipy.misc import imresize, imsave DEPRACATED
from cv2 import resize
import numpy as np


#from model import EncoderDecoder
#from util import count_num_samples

TRAIN_PATH = 'data'
TARGET_SIZE = (256, 256)
BATCH_SIZE = 4
epochs = 2

datagen = ImageDataGenerator()
gen = datagen.flow_from_directory(TRAIN_PATH, target_size=TARGET_SIZE,
                                  batch_size=BATCH_SIZE, class_mode=None)


def create_gen(img_dir, target_size, batch_size):
    datagen = ImageDataGenerator()
    gen = datagen.flow_from_directory(img_dir, target_size=target_size,
                                      batch_size=batch_size, class_mode=None)

    def tuple_gen():
        for img in gen:
            if img.shape[0] != batch_size:
                continue

            # (X, y)
            yield (img, img)

    return tuple_gen()

# This needs to be in scope where model is defined
class OutputPreview(Callback):
    def __init__(self, model, test_img_path, increment, preview_dir_path):
        test_img = image.load_img(test_img_path)
        test_img = resize(src=test_img, dsize=(256,256,3)) #imresize(test_img, (256, 256, 3))
        test_target = image.img_to_array(test_img)
        test_target = np.expand_dims(test_target, axis=0)
        self.test_img = test_target
        self.model = model

        self.preview_dir_path = preview_dir_path

        self.increment = increment
        self.iteration = 0

    def on_batch_end(self, batch, logs={}):
        if (self.iteration % self.increment == 0):
            output_img = self.model.predict(self.test_img)[0]
            fname = '%d.jpg' % self.iteration
            out_path = os.path.join(self.preview_dir_path, fname)
            imsave(out_path, output_img)

        self.iteration += 1


gen = create_gen(TRAIN_PATH, TARGET_SIZE, BATCH_SIZE)

num_samples = 1 #count_num_samples(TRAIN_PATH)
steps_per_epoch = num_samples // BATCH_SIZE

target_layer = 1 #int(sys.argv[1])

encoder_decoder = EncoderDecoder(target_layer=target_layer)

callbacks = [OutputPreview(encoder_decoder, './doge.jpg', 5000, './preview-%d' % target_layer)]
encoder_decoder.model.fit_generator(gen, steps_per_epoch=steps_per_epoch,
        epochs=epochs, callbacks=callbacks)
encoder_decoder.export_decoder()

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


TypeError: ignored