# Lab 1: CNN Visualization

*Team Members:*
- Yasmin Femerling
- Alejandro de Leon

March 21, 2021

In this lab we implement a photo realistic style transfer algorithm using the work of Li et al. in their universal style transfer paper. 

In [1]:
from tensorflow import keras 
from tensorflow.keras import layers, applications
from tensorflow.keras.layers import Input, Conv2D, UpSampling2D
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Conv2D, Input, MaxPooling2D
import tensorflow.keras.backend as K
from tensorflow.keras.utils import get_file

import sys
import os
import h5py

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import Callback
import numpy as np
from PIL import Image


- vgg tiene stride de 1
- convolutional layers de 3x3
- cambiamos el numero de las layers
- cambiamos el size del conv2d del upsampling pq tiene que ser un factor

In [2]:
def decoder_layers(inputs, layer):
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block5_conv1')(inputs)
    if layer == 1:
        return x

    x = UpSampling2D((2, 2), name='decoder_block4_upsample')(x)
    x = Conv2D(512, (4, 4), activation='relu', padding='same', name='decoder_block4_conv4')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv3')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv1')(x)
    if layer == 2:
        return x

    x = UpSampling2D((2, 2), name='decoder_block3_upsample')(x)
    x = Conv2D(256, (4, 4), activation='relu', padding='same', name='decoder_block3_conv4')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv3')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv1')(x)
    if layer == 3:
        return x

    x = UpSampling2D((2, 2), name='decoder_block2_upsample')(x)
    x = Conv2D(128, (4, 4), activation='relu', padding='same', name='decoder_block2_conv2')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)
    if layer == 4:
        return x

    x = UpSampling2D((2, 2), name='decoder_block1_upsample')(x)
    x = Conv2D(64, (4, 4), activation='relu', padding='same', name='decoder_block1_conv2')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv1')(x)
    if layer == 5:
        return x

In [9]:

def count_num_samples(directory):
    total=0
    for root, dirs, files in os.walk(directory):
        total += len(files)
    return total

WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'

MEAN_PIXEL = np.array([103.939, 116.779, 123.68])

WEIGHTS_PATH = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                        WEIGHTS_PATH_NO_TOP,
                        cache_subdir='models',
                        file_hash='253f8cb515780f3b799900260a226db6')

def vgg_layers(inputs, target_layer):
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
    if target_layer == 1:
        return x
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    if target_layer == 2:
        return x
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    if target_layer == 3:
        return x
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    if target_layer == 4:
        return x
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    return x


def load_weights(model):
    f = h5py.File(WEIGHTS_PATH)
    layer_names = [name for name in f.attrs['layer_names']]

    for layer in model.layers:
        b_name = layer.name.encode()
        if b_name in layer_names:
            g = f[b_name]
            weights = [g[name] for name in g.attrs['weight_names']]
            layer.set_weights(weights)
            layer.trainable = False

    f.close()


def VGG19(input_tensor=None, input_shape=None, target_layer=1):
    """
    VGG19, up to the target layer (1 for relu1_1, 2 for relu2_1, etc.)
    """
    if input_tensor is None:
        inputs = Input(shape=input_shape)
    else:
        inputs = Input(tensor=input_tensor, shape=input_shape)       
        
    print("Before model", inputs)
    
    model = Model(inputs, vgg_layers(inputs, target_layer))
    load_weights(model)
    return model


def preprocess_input(x):
    # Convert 'RGB' -> 'BGR'
    if type(x) is np.ndarray:
        x = x[..., ::-1]
    else:
        x = tf.reverse(x, [-1])

    return x - MEAN_PIXEL

- training decoder from early layer ('block2_conv2)

In [10]:
LAMBDA=1

def l2_loss(x):
    return K.sum(K.square(x)) / 2

def create_loss_fn(y_true,y_pred):
    out_pred = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(y_pred)
    out_true = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(y_true)
    loss = l2_loss(y_pred - y_true) + l2_loss(out_pred - out_true)
        
    return loss
    
## STYLE-TRANSFER LOSS
CONTENT_TRAINING_SIZE = (256, 256, 3)

class EncoderDecoder:
    def __init__(self, input_shape=(256, 256, 3), target_layer=5,
                 decoder_path=None):
        self.input_shape = input_shape
        self.target_layer = target_layer
        
        print("Creando modelo...")
        self.encoder = VGG19(input_shape=self.input_shape, target_layer=target_layer)
        if decoder_path:
            self.decoder = load_model(decoder_path)
        else:
            self.decoder = self.create_decoder(target_layer)
        

        self.model = Sequential()
        self.model.add(self.encoder)
        self.model.add(self.decoder)
        
        self.finalmodel = Model(self.model.inputs, [self.encoder, self.model])

        self.model.compile('adam', create_loss_fn)
        self.model.summary()
        print("Termine modelo...")
        
    def create_loss_fn(self):
        def get_encodings(inputs):
            self.encoder = VGG19(inputs, self.input_shape, self.target_layer)
            return encoder.output

        def loss(img_in, img_out):
            encoding_in = get_encodings(img_in)
            encoding_out = get_encodings(img_out)
            return l2_loss(img_out - img_in) + \
                   LAMBDA*l2_loss(encoding_out - encoding_in)
        return loss

    def create_decoder(self, target_layer):
        inputs = Input(shape=self.encoder.output_shape[1:])
        layers = decoder_layers(inputs, target_layer)
        output = Conv2D(3, (3, 3), activation='relu', padding='same',
                        name='decoder_out')(layers)
        return Model(inputs, output, name='decoder_%s' % target_layer)

    def export_decoder(self):
        self.decoder.save('decoder_%s.h5' % self.target_layer)

Train model

In [11]:
TRAIN_PATH = 'data/train'
VAL_PATH = 'data/val'
TARGET_SIZE = (256, 256)
BATCH_SIZE = 4
epochs = 2
target_layer = 2

from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

datagen = ImageDataGenerator()
gen = datagen.flow_from_directory(TRAIN_PATH, target_size=TARGET_SIZE,
                                  batch_size=BATCH_SIZE, class_mode=None)


def create_gen(img_dir, target_size, batch_size):
    datagen = ImageDataGenerator()
    gen = datagen.flow_from_directory(img_dir, target_size=target_size,
                                      batch_size=batch_size, class_mode=None)

    def tuple_gen():
        for img in gen:
            if img.shape[0] != batch_size:
                continue

            # (X, y)
            yield (img, img)

    return tuple_gen()


# Creating generators
train_gen = create_gen(TRAIN_PATH, TARGET_SIZE, BATCH_SIZE)
#validation_gen = create_gen(VAL_PATH, TARGET_SIZE, BATCH_SIZE)

# Steps per epoc calculation
num_samples = count_num_samples(TRAIN_PATH)
steps_per_epoch = num_samples // BATCH_SIZE

# Validation steps calculation
#num_samples = count_num_samples(VAL_PATH)
#validation_steps = num_samples // BATCH_SIZE

# Initializing encoder-decoder
K.clear_session()
encoder_decoder = EncoderDecoder(target_layer=target_layer)


# Training model
H = encoder_decoder.model.fit(train_gen, 
                              steps_per_epoch = steps_per_epoch,
                              epochs = epochs, 
                             )

# Plot model training loss and validation loss
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, epochs), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, epochs), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss and Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot-%d.png" % target_layer)

# Saving decoder
encoder_decoder.export_decoder()

Found 100000 images belonging to 200 classes.
Found 100000 images belonging to 200 classes.
Creando modelo...
Before model Tensor("input_1:0", shape=(None, 256, 256, 3), dtype=float32)


  f = h5py.File(WEIGHTS_PATH)


ValueError: in user code:

    <ipython-input-5-9aa955534e49>:8 create_loss_fn  *
        out_true = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(y_true)
    c:\program files\python38\lib\site-packages\tensorflow\python\keras\engine\base_layer_v1.py:766 __call__  **
        self._maybe_build(inputs)
    c:\program files\python38\lib\site-packages\tensorflow\python\keras\engine\base_layer_v1.py:2106 _maybe_build
        self.build(input_shapes)
    c:\program files\python38\lib\site-packages\tensorflow\python\keras\layers\convolutional.py:188 build
        input_channel = self._get_input_channel(input_shape)
    c:\program files\python38\lib\site-packages\tensorflow\python\keras\layers\convolutional.py:360 _get_input_channel
        raise ValueError('The channel dimension of the inputs '

    ValueError: The channel dimension of the inputs should be defined. Found `None`.


In [None]:
whos