In [1]:
import os
import numpy as np
from scipy.optimize import minimize

from tensorflow import keras
import tensorflow as tf
from keras.applications.vgg19 import VGG19

2024-06-06 11:21:58.014717: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-06 11:21:58.538805: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def get_model():
    # Build a VGG19 model loaded with pre-trained ImageNet weights
    model = VGG19(weights = 'imagenet', include_top = False)

    # Get the symbolic outputs of each "key" layer (we gave them unique names).
    outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

    # Set up a model that returns the activation values for every layer in VGG19 (as a dict).
    return keras.Model(inputs = model.inputs, outputs = outputs_dict)

In [3]:
# Generated image size
RESIZE_HEIGHT = 500

NUM_ITER = 3000

# Weights of the different loss components
CONTENT_WEIGHT = 8e-4 # 8e-4
STYLE_WEIGHT = 8e-3 # 8e-4

# The layer to use for the content loss.
CONTENT_LAYER_NAME = "block5_conv2" # "block2_conv2"

# List of layers to use for the style loss.
STYLE_LAYER_NAMES = [
    "block1_conv1",
    "block2_conv1",
    "block3_conv1",
    "block4_conv1",
    "block5_conv1",
]

In [4]:
def get_result_image_size(image_path, result_height):
    image_width, image_height = keras.preprocessing.image.load_img(image_path).size
    result_width = int(image_width * result_height / image_height)
    return result_height, result_width

In [5]:
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

def style_loss(style_features, combination_features, combination_size):
    S = gram_matrix(style_features)
    C = gram_matrix(combination_features)
    channels = style_features.shape[2]
    return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (combination_size ** 2))

def compute_content_loss(content_features, combination_features):
    original_image = content_features[CONTENT_LAYER_NAME]
    generated_image = combination_features[CONTENT_LAYER_NAME]

    return tf.reduce_sum(tf.square(generated_image - original_image)) / 2

def compute_style_loss(style_features, combination_features, combination_size):
    loss_style = 0

    for layer_name in STYLE_LAYER_NAMES:
        style_feature = style_features[layer_name][0]
        combination_feature = combination_features[layer_name][0]
        loss_style += style_loss(style_feature, combination_feature, combination_size) / len(STYLE_LAYER_NAMES)

    return loss_style

def compute_loss(feature_extractor, combination_image, content_features, style_features):
    combination_features = feature_extractor(combination_image)
    loss_content = compute_content_loss(content_features, combination_features)
    loss_style = compute_style_loss(style_features, combination_features, combination_image.shape[1] * combination_image.shape[2])

    return CONTENT_WEIGHT * loss_content + STYLE_WEIGHT * loss_style, loss_content, loss_style

In [6]:
def preprocess_image(image_path, target_height, target_width):
    img = keras.preprocessing.image.load_img(image_path, target_size = (target_height, target_width))
    arr = keras.preprocessing.image.img_to_array(img)
    arr = np.expand_dims(arr, axis = 0)
    arr = keras.applications.vgg16.preprocess_input(arr)
    return tf.convert_to_tensor(arr)

In [7]:
path = os.path.abspath(os.getcwd())
content_image_path = "./content-image-pusgiwa.jpeg"
style_image_path = "./great-wave.jpg"

result_height, result_width = get_result_image_size(content_image_path, RESIZE_HEIGHT)
print("result resolution: (%d, %d)" % (result_height, result_width))

result resolution: (500, 500)


In [9]:
content_tensor = preprocess_image(content_image_path, result_height, result_width)
style_tensor = preprocess_image(style_image_path, result_height, result_width)

generated_image = tf.Variable(tf.random.uniform(style_tensor.shape, dtype=tf.dtypes.float32))

In [10]:
vgg19 = get_model()
vgg19.summary()

In [11]:
content_features = vgg19(content_tensor)
style_features = vgg19(style_tensor)

In [12]:
def save_result(generated_image, result_height, result_width, name):
    img = deprocess_image(generated_image, result_height, result_width)
    keras.preprocessing.image.save_img(name, img)

# Util function to convert a tensor into a valid image
def deprocess_image(tensor, result_height, result_width):
    tensor = tensor.numpy()
    tensor = tensor.reshape((result_height, result_width, 3))

    # Remove zero-center by mean pixel
    tensor[:, :, 0] += 103.939
    tensor[:, :, 1] += 116.779
    tensor[:, :, 2] += 123.680

    # 'BGR'->'RGB'
    tensor = tensor[:, :, ::-1]
    return np.clip(tensor, 0, 255).astype("uint8")

In [None]:
# continue_generated_image_path = "./generated_rektorat_starry_result_5000_0.000800_0.008000.png"
# continue_generated_tensor = preprocess_image(continue_generated_image_path, result_height, result_width)
# generated_image.assign(continue_generated_tensor)

In [15]:
def compute_loss_and_grads(image):
    with tf.GradientTape() as tape:
        tape.watch(image)
        loss, content_loss, style_loss = compute_loss(vgg19, image, content_features, style_features)
    grads = tape.gradient(loss, image)
    return loss, grads, content_loss, style_loss

# Convert the TensorFlow variable to a NumPy array
generated_image_np = generated_image.numpy().flatten()

def objective_function(image_flat):
    # Reshape the flattened image back to its original shape
    image_tensor = tf.convert_to_tensor(image_flat.reshape(style_tensor.shape), dtype=tf.float32)
    # Compute the loss and gradient using TensorFlow
    loss, grads, content_loss, style_loss = compute_loss_and_grads(image_tensor)
    # Convert the loss and gradient to NumPy arrays
    loss_np = loss.numpy()
    grads_np = grads.numpy().flatten()
    content_loss_np = content_loss.numpy()
    style_loss_np = style_loss.numpy() 

    return loss_np, grads_np, content_loss_np, style_loss_np

# Define a callback function to print the loss
iteration = [0]  # List to store the iteration count as a mutable object
def callback(xk):
    loss, _, content_loss, style_loss = objective_function(xk)
    iteration[0] += 1
    print(f"iter: {iteration[0]}, loss: {loss:.6f}, content loss: {content_loss:.6f}, style_loss: {style_loss:.6f}")
    if iteration[0] % 100 == 0:
        # Convert the current state back to a TensorFlow tensor and save the result
        current_image_np = xk.reshape(style_tensor.shape)
        current_image = tf.convert_to_tensor(current_image_np, dtype=tf.float32)
        name = f"generated_pusgiwa_wave_at_iteration_{iteration[0]}.png"
        save_result(current_image, result_height, result_width, name)

In [16]:
# Optimize using L-BFGS-B
result = minimize(
    fun=lambda x: objective_function(x)[0],
    x0=generated_image_np,
    jac=lambda x: objective_function(x)[1],
    method='L-BFGS-B',
    options={
        'maxiter': NUM_ITER,    # NUM_ITER should match the intended maximum iterations
        'maxcor': 20,           # maximum number of variable metric corrections
        'ftol': 1e-9,           # function value change tolerance
        'gtol': 1e-5            # gradient norm change tolerance}
        },           
    callback=callback
)

# Convert the optimized result back to a TensorFlow tensor
optimized_image_np = result.x.reshape(style_tensor.shape)
optimized_image = tf.convert_to_tensor(optimized_image_np, dtype=tf.float32)

# Update the TensorFlow variable with the optimized values
generated_image.assign(optimized_image)

# Save the final result
name = "generated_pusgiwa_wave_result_%d_%f_%f.png" % (NUM_ITER, CONTENT_WEIGHT, STYLE_WEIGHT)
save_result(generated_image, result_height, result_width, name)

iter: 1, loss: 613046.750000, content loss: 753214912.000000, style_loss: 1309351.500000
iter: 2, loss: 611952.750000, content loss: 751848256.000000, style_loss: 1309270.875000
iter: 3, loss: 610488.500000, content loss: 750018944.000000, style_loss: 1309172.875000
iter: 4, loss: 608799.375000, content loss: 747908928.000000, style_loss: 1309033.500000
iter: 5, loss: 606667.000000, content loss: 745245440.000000, style_loss: 1308839.000000
iter: 6, loss: 603918.062500, content loss: 741811712.000000, style_loss: 1308586.500000
iter: 7, loss: 600146.625000, content loss: 737101376.000000, style_loss: 1308195.500000
iter: 8, loss: 595102.000000, content loss: 730803328.000000, style_loss: 1307422.750000
iter: 9, loss: 587288.562500, content loss: 721054464.000000, style_loss: 1305622.000000
iter: 10, loss: 568846.437500, content loss: 698081664.000000, style_loss: 1297643.250000
iter: 11, loss: 515243.218750, content loss: 631900928.000000, style_loss: 1215313.750000
iter: 12, loss: 470

In [16]:
# for iter in range(NUM_ITER):
#     with tf.GradientTape() as tape:
#         loss = compute_loss(vgg16, generated_image, content_features, style_features)

#     grads = tape.gradient(loss, generated_image)

#     print("iter: %4d, loss: %8.f" % (iter, loss))
#     optimizer.apply_gradients([(grads, generated_image)])

#     if (iter + 1) % 100 == 0:
#         name = "generated_at_iteration_%d.png" % (iter + 1)
#         save_result(generated_image, result_height, result_width, name)

# name = "result_%d_%f_%f.png" % (NUM_ITER, CONTENT_WEIGHT, STYLE_WEIGHT)
# save_result(generated_image, result_height, result_width, name)

In [17]:
style_tensor.shape

TensorShape([1, 500, 500, 3])