## Neural style transfer

### Neural style transfer in TF/Keras

**Getting the style and content images**

In [17]:

import os
import tensorflow as tf
from tensorflow import keras
import numpy as np


In [18]:
print("TF version    =", tf.__version__)
print("Keras version =", keras.__version__)
print("Numpu version =", np.__version__)

TF version    = 2.19.0
Keras version = 3.9.0
Numpu version = 2.1.3


In [19]:
#base_image_path = keras.utils.get_file(
#    "sf.jpg", origin="https://img-datasets.s3.amazonaws.com/sf.jpg")

localFilePath = "/drv3/hm3/code/python/tf2.19/local/GenAI/Angel_CDMX.png"
file_name = os.path.basename( localFilePath )


#base_image_path = keras.utils.get_file(
#    "Angel_CDMX.png", origin="/drv3/hm3/code/python/tf2.19/local/GenAI/Angel_CDMX.png")

try:
    base_image_path = tf.keras.utils.get_file(
        fname=file_name,
        origin="file://" + localFilePath,
        extract=False,
    )
    print(f"File accessed: {base_image_path}")

    # Now you can work with the local_file path.
except Exception as e:
    print(f"Error accessing file: {e}")


style_reference_image_path = keras.utils.get_file(
    "starry_night.jpg", origin="https://img-datasets.s3.amazonaws.com/starry_night.jpg")

#original_width, original_height = keras.utils.load_img(base_image_path).size
#img_height = 400
#img_width = round(original_width * img_height / original_height)

original_width, original_height = keras.utils.load_img(file_name).size
img_height = 400
img_width = round(original_width * img_height / original_height)


File accessed: /home/juan/.keras/datasets/Angel_CDMX.png


**Auxiliary functions**

In [20]:

def preprocess_image(image_path):
    img = keras.utils.load_img(
        image_path, target_size=(img_height, img_width))
    img = keras.utils.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = keras.applications.vgg19.preprocess_input(img)
    return img

def deprocess_image(img):
    img = img.reshape((img_height, img_width, 3))
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    img = img[:, :, ::-1]
    img = np.clip(img, 0, 255).astype("uint8")
    return img

**Use a pretrained VGG19 model to create the feature extractor**

In [21]:
model = keras.applications.vgg19.VGG19(weights="imagenet", include_top=False)

outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict)

**Content loss**

In [22]:
def content_loss(base_img, combination_img):
    return tf.reduce_sum(tf.square(combination_img - base_img))

**Style loss**

It uses a Gam Matrix. A good reference is at [Wikipedia](https://en.wikipedia.org/wiki/Gram_matrix)



In [23]:
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

def style_loss(style_img, combination_img):
    S = gram_matrix(style_img)
    C = gram_matrix(combination_img)
    channels = 3
    size = img_height * img_width
    return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

**Total variation loss**

loss = distance(style(ref_img) - style(gen_img)) + distance(content(orig_img) - content(gen_img))

+ Distance is a norm function such as the L2 norm.

+ Content is a function that takes an image and computes a representation of its content

+ Style is a function that takes an image and computes a representation of its style.

+ Minimizing this loss causes 

   - style(generated_image)   to be close to the style(reference_image)
   - content(generated_image) to be close to the content(generated_image)


In [24]:
def total_variation_loss(x):
    a = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]
    )
    b = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]
    )
    return tf.reduce_sum(tf.pow(a + b, 1.25))

**Defining the final loss to minimize**

In [25]:
style_layer_names = [
    "block1_conv1",
    "block2_conv1",
    "block3_conv1",
    "block4_conv1",
    "block5_conv1",
]
content_layer_name = "block5_conv2"
total_variation_weight = 1e-6
style_weight = 1e-6
content_weight = 2.5e-8

def compute_loss(combination_image, base_image, style_reference_image):
    input_tensor = tf.concat(
        [base_image, style_reference_image, combination_image], axis=0
    )
    features = feature_extractor(input_tensor)
    loss = tf.zeros(shape=())
    layer_features = features[content_layer_name]
    base_image_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss = loss + content_weight * content_loss(
        base_image_features, combination_features
    )
    for layer_name in style_layer_names:
        layer_features = features[layer_name]
        style_reference_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        style_loss_value = style_loss(
          style_reference_features, combination_features)
        loss += (style_weight / len(style_layer_names)) * style_loss_value

    loss += total_variation_weight * total_variation_loss(combination_image)
    return loss

###Setting up the gradient-descent process**

Note that we use @tf.function

The @tf.function is a decorator that converts standard Python functions into TensorFlow graphs. 

This conversion offers several benefits, particularly for performance and deployment.   

#### Graph Compilation:

+ TF graphs are optimized representations of computations. When we apply @tf.function, TF traces the function execution and constructs an equivalent graph.
   
+ The graph can then be optimized by TensorFlow's runtime, leading to faster execution.
   
#### Performance Improvement:

+ Python is an interpreter with significant overhead that can slow down TF operations, especially in loops and complex computations.

+ @tf.function reduces that overhead by executing the graph directly in TF C++ runtime, bypassing the Python interpreter for each operation.

+ This is especially important in training loops, where many operations are performed repeatedly.


#### Portability and Deployment:

+ TF graphs can be saved and deployed to various platforms (servers, mobile, embedded) without requiring the Python interpreter.
   
+ @tf.function makes it easier to export models for those deployment scenarios.


#### Automatic Differentiation:

+ TF automatic differentiation works seamlessly with graphs.

+ @tf.function ensures that the operations within the Python function are captured within the graph, allowing TF to compute gradients efficiently.

#### Optimization:

+ TF can perform optimizations such as constant folding, and other graph optimizations.   

+ This can lead to significant improvements of model execution.

+ Tracing: @tf.function traces the Python function based on the input argument's types and shapes. If the input types or shapes change, TF retraces the function, which can introduce overhead.   

+ Python Side Effects: Avoid Python side effects (e.g., printing, file I/O) within @tf.function as they may not behave as expected or may only execute during tracing. Prefer Tensorflow operations.

+  When possible stick to TF operations within @tf.function for optimal performance.

In [16]:


@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_image):
    with tf.GradientTape() as tape:
        loss = compute_loss(combination_image, base_image, style_reference_image)
    grads = tape.gradient(loss, combination_image)
    return loss, grads

optimizer = keras.optimizers.SGD(
    keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96
    )
)

base_image = preprocess_image(base_image_path)
style_reference_image = preprocess_image(style_reference_image_path)
combination_image = tf.Variable(preprocess_image(base_image_path))

iterations = 4000
for i in range(1, iterations + 1):
    loss, grads = compute_loss_and_grads(
        combination_image, base_image, style_reference_image
    )
    optimizer.apply_gradients([(grads, combination_image)])
    if i % 100 == 0:
        print(f"Iteration {i}: loss={loss:.2f}")
        img = deprocess_image(combination_image.numpy())
        fname = f"combination_image_at_iteration_{i}.png"
        keras.utils.save_img(fname, img)

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(3, 400, 555, 3))
I0000 00:00:1743260537.729531   12769 cuda_dnn.cc:529] Loaded cuDNN version 90501


Iteration 100: loss=9233.24
Iteration 200: loss=7688.99
Iteration 300: loss=7009.55
Iteration 400: loss=6607.16
Iteration 500: loss=6331.62
Iteration 600: loss=6127.18
Iteration 700: loss=5967.48
Iteration 800: loss=5838.44
Iteration 900: loss=5731.58
Iteration 1000: loss=5641.18
Iteration 1100: loss=5563.62
Iteration 1200: loss=5496.23
Iteration 1300: loss=5437.28
Iteration 1400: loss=5385.33
Iteration 1500: loss=5339.27
Iteration 1600: loss=5298.25
Iteration 1700: loss=5261.47
Iteration 1800: loss=5228.17
Iteration 1900: loss=5197.96
Iteration 2000: loss=5170.43
Iteration 2100: loss=5145.31
Iteration 2200: loss=5122.27
Iteration 2300: loss=5101.14
Iteration 2400: loss=5081.70
Iteration 2500: loss=5063.78
Iteration 2600: loss=5047.17
Iteration 2700: loss=5031.79
Iteration 2800: loss=5017.52
Iteration 2900: loss=5004.27
Iteration 3000: loss=4991.90
Iteration 3100: loss=4980.36
Iteration 3200: loss=4969.55
Iteration 3300: loss=4959.43
Iteration 3400: loss=4949.97
Iteration 3500: loss=49

### Conclusions