<a href="https://colab.research.google.com/github/bensivo/CS445-Project4/blob/main/style-transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras tensorflow
!pip install tqdm

In [None]:
# The paper, uses the VGG model as a feature encoder.
# Because VGG was trained for object recognition, its internal layers are a good representation of an image's shape / content.
#
# Source: https://franky07724-57962.medium.com/using-keras-pre-trained-models-for-feature-extraction-in-image-clustering-a142c6cdf5b1

from tensorflow.keras.applications.vgg19 import VGG19

model = VGG19(weights='imagenet', include_top=False)  # Setting include_top=False removes the final prediction layers, running this model as an encoder, not a classifier.
print(model.summary())


# Paper authors say they replaced "MaxPooling" with "AveragePooling" to get better results. This might be a future improvement.


In [None]:
from tensorflow.keras.models import Model

# Define a new model, which extracts features from the VGG model
# The "outputs" array here defines which layers to pull from. You can see all the layers in the summary above.
feature_extraction_model = Model(
    inputs=model.input,
    outputs=[
        model.get_layer('block4_conv1').output
    ]
)

In [None]:
from tensorflow.keras.applications.vgg19 import preprocess_input
from PIL import Image
import numpy as np

# Original image
original_img = Image.open('./cup.jpg')

# Features of original image. Reshaping input data, then running it through our model.
original_img_input = np.array(original_img)
original_img_input = np.expand_dims(original_img_input, axis=0)
original_img_input = preprocess_input(original_img_input)
original_img_features = feature_extraction_model.predict(original_img_input)

In [None]:
# Image recreation from features taken from one or more layers of the original network
#
# The general process for this is:
#   1. Generate an image of all random values
#   2. N times:
#       a. Run our image through the feature-extractor
#       b. Calculate the loss between the original image's features and the new image's features
#       c. Use tf's GradientTape to calculate the gradients of the loss
#       d. Use tf's optimizer to apply the gradients to the image, nudging it closer to the original image

import tensorflow as tf
from tqdm import tqdm

# Random generated image input
generated_img_input_tensor = tf.Variable(tf.random.uniform(original_img_input.shape, 0, 255))

optimizer = tf.optimizers.Adam(learning_rate=0.1)
for i in tqdm(range(1000)):
    with tf.GradientTape() as tape:
        generated_img_features = feature_extraction_model(generated_img_input_tensor)  # Notice we're calling feature_extraction_model as a function, not with .predict(). For some reason this is important
        loss = tf.reduce_mean(tf.square(original_img_features - generated_img_features))

    gradients = tape.gradient(loss, generated_img_input_tensor)
    optimizer.apply_gradients([(gradients, generated_img_input_tensor)])

    if i % 100 == 0:
        print(f'Loss @ iteration {i} = {loss}')
        output = tf.clip_by_value(generated_img_input_tensor, 0, 255)
        output = output.numpy().squeeze().astype(np.uint8)
        tf.keras.preprocessing.image.save_img(f'generated_image_{i}.jpg', output)

# Clip generated image to valid pixel range
generated_image = tf.clip_by_value(generated_img_input_tensor, 0, 255)
generated_image = generated_image.numpy().squeeze().astype(np.uint8)


tf.keras.preprocessing.image.save_img('generated_image.jpg', generated_image)