In [1]:
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt

# NST

- input: content image, style image
- output: generate image like (content + style) from noise image by using gradient descent

## References

- https://keras.io/examples/generative/neural_style_transfer/

# Load VGG-19 Model

In [2]:
# neural style transfer output

content_layers = ['block5_conv2']
style_layers = [
    'block1_conv1',
    'block2_conv1',
    'block3_conv1',
    'block4_conv1',
    'block5_conv1',
]

In [3]:
def load_vgg():
    vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')
    # vgg.trainable = False
    
    # extracted feature
    layers = content_layers + style_layers
    outputs_dict = dict([(layer, vgg.get_layer(layer).output) for layer in layers])
    
    # model for nst
    feature_extractor = tf.keras.Model(inputs=vgg.input, outputs=outputs_dict)
    
    return feature_extractor

# Image Preprocessing

In [4]:
def preprocess_image(image_path, image_size=(500, 500)):
    """
        return: tensor type array. shape (1, height, width, channel).
    """
    
    
    img = tf.keras.preprocessing.image.load_img(
        image_path, target_size=image_size 
    )
    
    # image to numpy array shape (h, w, c)
    img = tf.keras.preprocessing.image.img_to_array(img)
    
    # to shape (1, h, w, c)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.vgg19.preprocess_input(img)
    
    return tf.convert_to_tensor(img)

In [5]:
def deprocess_image(tensor_img):
    img = np.array(tensor_img.numpy()[0], dtype=np.float64)
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    img = img[:, :, ::-1]
    img = np.clip(img, 0, 255).astype("uint8")

    return img

In [6]:
img_path = "./img/2valk.jpg"
img_tensor = preprocess_image(img_path)

In [7]:
img_tensor

<tf.Tensor: shape=(1, 500, 500, 3), dtype=float32, numpy=
array([[[[  26.060997 ,   -0.7789993,  -13.68     ],
         [  42.060997 ,   14.221001 ,    4.3199997],
         [  70.061    ,   42.221    ,   33.32     ],
         ...,
         [ -57.939003 ,  -89.779    , -104.68     ],
         [ -53.939003 ,  -85.779    , -100.68     ],
         [ -57.939003 ,  -89.779    , -104.68     ]],

        [[  44.060997 ,   21.221    ,   -2.6800003],
         [  73.061    ,   50.221    ,   26.32     ],
         [  36.060997 ,   13.221001 ,  -10.68     ],
         ...,
         [ -47.939003 ,  -83.779    ,  -98.68     ],
         [ -49.939003 ,  -85.779    , -100.68     ],
         [ -48.939003 ,  -84.779    ,  -99.68     ]],

        [[  80.061    ,   55.221    ,   42.32     ],
         [  59.060997 ,   33.221    ,   12.32     ],
         [  61.060997 ,   38.221    ,   14.32     ],
         ...,
         [ -44.939003 ,  -84.779    , -101.68     ],
         [ -50.939003 ,  -90.779    , -107.68   

In [8]:
img_np = deprocess_image(img_tensor)

In [9]:
img_np

array([[[109, 116, 129],
        [127, 131, 145],
        [156, 159, 173],
        ...,
        [ 18,  27,  45],
        [ 22,  31,  49],
        [ 18,  27,  45]],

       [[120, 138, 147],
        [149, 167, 176],
        [112, 130, 139],
        ...,
        [ 24,  33,  55],
        [ 22,  31,  53],
        [ 23,  32,  54]],

       [[165, 172, 183],
        [135, 150, 162],
        [137, 155, 164],
        ...,
        [ 21,  32,  58],
        [ 15,  26,  52],
        [ 20,  31,  57]],

       ...,

       [[ 59,  58,  60],
        [ 65,  64,  66],
        [ 69,  68,  70],
        ...,
        [217, 183, 152],
        [217, 187, 155],
        [213, 184, 149]],

       [[ 66,  65,  67],
        [ 60,  59,  61],
        [ 53,  52,  54],
        ...,
        [207, 167, 134],
        [199, 165, 132],
        [218, 185, 149]],

       [[ 71,  70,  72],
        [ 64,  63,  65],
        [ 72,  71,  73],
        ...,
        [169, 122,  83],
        [168, 125,  87],
        [198, 156, 120]]

# Generated Image Tensor

In [10]:
def load_random_noise_tensor(img_tensor):
    random_noise = np.array(np.random.randint(0, 255, img_tensor.shape))
    gen_tensor = tf.Variable(random_noise, dtype=tf.float32)
    return gen_tensor

In [11]:
gen_tensor = load_random_noise_tensor(img_tensor)

# Compute Loss

## Content loss

In [12]:
def compute_content_loss(content_feature, gen_feature, loss_weight=1e-6):
    return loss_weight*tf.reduce_sum(tf.square(content_feature - gen_feature))

## Style loss

In [13]:
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

In [14]:
def style_loss(style_feature, gen_feature, img_size, loss_weight=1e-6):
    S = gram_matrix(style_feature)
    G = gram_matrix(gen_feature)
    channels = 3
    size = img_size[0] * img_size[1]
    return loss_weight*tf.reduce_sum(tf.square(S - G))/(4.0*(channels**2)*(size**2))

## Total Loss

In [15]:
def compute_total_loss(content_tensor, style_tensor, gen_tensor, content_layers, style_layers, img_size, a=0.8, b=0.2):
    input_tensor = tf.concat(
        [content_tensor, style_tensor, gen_tensor], axis=0
    )
    features = model(input_tensor)
    
    content_features = features[content_layers[0]]
    
    loss = tf.zeros(shape=())
    
    content_loss = compute_content_loss(content_features[0], content_features[2])
    loss += a*content_loss
    
    total_style_loss = tf.zeros(shape=())
    for layer in style_layers:
        layer_feature = features[layer]
        style_feature = layer_feature[1]
        gen_style_feature = layer_feature[2]
        layer_style_loss = style_loss(style_feature, gen_style_feature, img_size)
        loss +=  (b/len(style_layers)) * layer_style_loss
        total_style_loss += layer_style_loss
        
    return loss, content_loss, total_style_loss

# Gradient

In [16]:
@tf.function
def compute_loss_and_grads(content_image_tensor, style_image_tensor, gen_image_tensor, content_layers, style_layers, img_size):
    with tf.GradientTape() as tape:
        loss, content_loss, total_style_loss = compute_total_loss(content_image_tensor, style_image_tensor, gen_image_tensor, content_layers, style_layers, img_size)
    grads = tape.gradient(loss, gen_image_tensor)
    return loss, content_loss, total_style_loss, grads

# Training Loop

In [17]:
def training_loop(content_tensor, style_tensor, gen_tensor, content_layers, style_layers, img_size, optimizer, max_iterations=10000):
    for i in range(1, max_iterations+1):
        if (i % 100 == 0) or (i == 1) or (i == max_iterations):
            loss, content_loss, total_style_loss, grads = compute_loss_and_grads(content_tensor, style_tensor, gen_tensor, content_layers, style_layers, img_size)
            optimizer.apply_gradients([(grads, gen_tensor)])
            print(f"iteration {i:8}, loss {loss:.2f}, content loss {content_loss:.2f}, style loss {total_style_loss:.2f}")
            
        # show image
        if (i % 1000 == 0) or (i == 1) or (i == max_iterations):
            fig = plt.figure(figsize=(5, 5))
            plt.title(f"iteration {i}")
            plt.imshow(deprocess_image(gen_tensor))
            plt.show()
        
        # save image
        if (i % 200 == 0) or (i == 1) or (i == max_iterations):
            save_img_arr = deprocess_image(gen_tensor)
            tf.keras.preprocessing.image.save_img(f"./regenerate_output_img/{i}.jpg", save_img_arr)
    
    fig = plt.figure(figsize=(5, 5))
    plt.title("final result")
    plt.imshow(deprocess_image(gen_tensor))
    plt.show()

# Train Model

In [18]:
# neural style transfer output

content_layers = ['block5_conv2']
style_layers = [
    'block1_conv1',
    'block2_conv1',
    'block3_conv1',
    'block4_conv1',
    'block5_conv1',
]

In [19]:
model = load_vgg()

optimizer = tf.keras.optimizers.SGD(
    tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1000.0, decay_steps=500, decay_rate=0.9
    )
)

In [20]:
# load image
content_img_path = "./img/remember.jpg"
style_image_path = "./img/best_nzk.jpg"
img_size = (500, 500)

# base image/target image tensor
content_img_tensor = preprocess_image(content_img_path, img_size)
style_img_tensor = preprocess_image(style_image_path, img_size)
# random image
gen_tensor = load_random_noise_tensor(img_tensor)

In [21]:
training_loop(content_img_tensor, style_img_tensor, gen_tensor, 
              content_layers, style_layers, img_size, optimizer, 15000)

ResourceExhaustedError:  OOM when allocating tensor with shape[256,256,3,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node model/block3_conv2/Conv2D (defined at <ipython-input-15-9b763fffab19>:5) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_compute_loss_and_grads_1260]

Function call stack:
compute_loss_and_grads


In [None]:
fig = plt.figure(figsize=(5, 5))
plt.imshow(deprocess_image(img_tensor))
plt.show()

In [None]:
fig = plt.figure(figsize=(5, 5))
plt.imshow(deprocess_image(gen_tensor))
plt.show()