<a href="https://colab.research.google.com/github/anna-dang/neural_style_transfer_implementation/blob/main/neural_style_transfer_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implementing a Neural Style Transfer Paper with Tensorflow and Keras

Link to paper:  
https://arxiv.org/pdf/1508.06576v2.pdf

In this notebook, I translate their equations into code 

In [None]:
# First, download the images from my github repo and unzip them (:
%%shell

wget https://github.com/kathleenisrad/style-transfer-implementation/archive/main.zip
unzip main.zip

### Import things

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from tensorflow import concat, convert_to_tensor, GradientTape, transpose, reshape, shape, matmul, Variable, zeros
from tensorflow.math import reduce_mean
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
model = VGG19(include_top=False, #no fully connected layers
              weights="imagenet",
              input_shape=(500, 500, 3),
              pooling='avg' #replace max-pooling with average pooling
             )

In [None]:
#We don't want to train the layers because all it's doing is extracting features
for layer in model.layers:
    layer.trainable=False

### Setting up image preprocessing

In [None]:
# function to turn the input images into the correct size and then into tensors
def image_to_tensor(img_path):
    # load an image, reshape to 500x500
    image = load_img(img_path, target_size=(500, 500))
    # convert to a numpy array
    image = img_to_array(image)
    # reshape data for the model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # return a tensor 
    return convert_to_tensor(image)

### Setting up the feature extractor

In [None]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])  #outputs for every layers
feature_extractor = Model(inputs=model.inputs, outputs=outputs_dict) #make a new model that spits out the output 

### Defining "style" and "content" layers

> "Higher layers in the network capture the high-level content in terms of objects and their
arrangement in the input image but do not constrain the exact pixel values of the reconstruction ... We therefore refer to the **feature responses in higher layers of the network as the content
representation.**"

> "**Style** can also be defined more locally by including only a **smaller number of lower layers**, leading to different visual experiences"

> "For the images shown in Fig 2 we matched the **content representation on layer ‘conv4 2’** and the
**style representations on layers ‘conv1 1’, ‘conv2 1’, ‘conv3 1’, ‘conv4 1’ and ‘conv5 1’** (wl =
1/5 in those layers, wl = 0 in all other layers) ."

In [None]:
layer_names = [layer.name for layer in model.layers]

content_layer = layer_names[13]
content_layer

In [None]:
#In the paper, they used block2_conv1, but I'm using block2_conv2 because it gave me better results
style_layers = layer_names[1:6:4] + layer_names[7:8] + layer_names[12:20:5]
style_layers

### Defining Content Loss

> "So let ~p and ~x be the original image and the image that is generated and P and F their
respective feature representation in layer l. We then define the squared-error loss between the
two feature representations:"

![](https://drive.google.com/uc?export=view&id=1YoJpKgYnYy3W7s7DYE3CaIERt3HcFJUu)

In [None]:
def calc_content_loss(F, P):
    L = zeros(shape=())
    #I used reduce sum at firt, but it gives you a HUGE number
    #used reduce mean instead to keep the number a bit smaller
    L += 0.5*reduce_mean((F-P)**2).numpy()
    return L

### Defining Style Loss
> "These feature correlations are given by the Gram matrix..."

![](https://drive.google.com/uc?export=view&id=1_EU7qvqPtN4JdySQqE324VtDa3bO0NNy)


> "...generate a texture that matches the style of a given image ... by minimising the mean-squared distance between the entries of the Gram matrix from the original image and the Gram matrix of the
image to be generated..."

![](https://drive.google.com/uc?export=view&id=11pCUnGZmtTbbjZiGUDLEODymJh6jc3jd)

> "... and the total loss is:"

![](https://drive.google.com/uc?export=view&id=1pSYHMNPOVhF21ydfFQtwb7ZyvgrRhyh5)


> "where wl are weighting factors of the contribution of each layer to the total loss ... (wl =
1/5 in those layers)"

In [None]:
#first define gram matrix
def calc_gram_matrix(x):
    x = transpose(x, (2, 0, 1))
    features = reshape(x, (shape(x)[0], -1))
    gram = matmul(features, transpose(features))
    return gram

In [None]:
#next, define style loss:

def calc_style_loss(G, A):
  #calculate the mean squared distance between the two matrices:
  # I kinda just chose N and M randomly
  N = 32
  M = 5000

  #style loss equation
  E = (1/(4*(N**2)*(M**2)))*sum((G-A)**2) 
  style_loss = reduce_mean((1/5)*E)
  return style_loss

### Define Total Loss

> "The loss function we minimize is:"

![](https://drive.google.com/uc?export=view&id=1p_Dp3rVrorH0CymOIqD7cknBW806K6td)

> "...where α and β are the weighting factors for content and style reconstruction respectively."

In [None]:
#putting it all together:

def calc_total_loss(content_image, style_image, generated_image, alpha=0.2, beta=0.8):  
    total_loss = zeros(shape=())
    style_loss = zeros(shape=())
    
    input_tensor = concat([content_image, style_image, generated_image], axis=0)
    features = feature_extractor(input_tensor)
    
    #calculate the content loss:
    F = features[content_layer][0,:,:,:]
    P = features[content_layer][2,:,:,:]
    content_loss = calc_content_loss(F, P)

    for layer_name in style_layers:
        layer_features = features[layer_name]
        style_features = layer_features[1, :, :, :]
        generated_features = layer_features[2,: ,: ,:]
        
        #calculate the gram matrix:
        G = calc_gram_matrix(style_features)
        A = calc_gram_matrix(generated_features)

        #calculate style loss:
        style_loss += calc_style_loss(G, A)
        
    #calculate total loss:
    total_loss += alpha*content_loss + beta*style_loss
    return total_loss

In [None]:
#calculate gradients
def compute_grads(content_array, style_array, generated_array):
    with GradientTape() as tape:
        loss = calc_total_loss(content_array, style_array, generated_array)
        grad = tape.gradient(loss, generated_array)
    return loss, grad

### Putting it all together into a function!

In [None]:
def style_transfer(content_path, style_path, iterations=1000, learning_rate=10, beta_1=0.9, epsilon=0.01):
    try: 
        os.mkdir('./images/')
    except:
        pass
    
    try:
      os.mkdir(f'./images/{style_path[57:-4]}_{content_path[59:-4]}')
    except:
      pass
      
    #turn images into tensors
    style = image_to_tensor(style_path)
    content = image_to_tensor(content_path)
    generated = Variable(content)

    #create an optimizer
    optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1, epsilon=epsilon, amsgrad=True, name='Adam')
    
    #transfer the style over 1001 iterations
    for i in range(iterations+1):
        loss, grads = compute_grads(content, style, generated)
        optimizer.apply_gradients([(grads, generated)])

        if i%100 == 0:
            print(f'-------------------------\nEpoch: {i} \nTotal Loss: {loss.numpy()}')
            img = generated.numpy().squeeze()
            img = np.clip(img, 0, 255)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            cv2.imwrite(f'./images/{style_path[57:-4]}_{content_path[59:-4]}/{style_path[57:-4]}_{content_path[59:-4]}_{i}.jpg', img)
            Image.open(f'./images/{style_path[57:-4]}_{content_path[59:-4]}/{style_path[57:-4]}_{content_path[59:-4]}_{i}.jpg')

In [None]:
style_images = os.listdir('./style-transfer-implementation-main/images/style_images')
style_images

In [None]:
#transfer all styles to cat image
# feel free to play around with the number of iterations
# to view the image, click on the little folder icon on the very lefthand side 
# the images are saved under ./images/{style_name}_{content_name}

iterations = 500
learning_rate = 10

for style in style_images:
    content_path = './style-transfer-implementation-main/images/content_images/cat.jpg'
    style_path = os.path.join('./style-transfer-implementation-main/images/style_images', style)
    style_transfer(content_path, style_path, iterations = iterations, learning_rate = learning_rate)

In [None]:
#transfer all styles to zion image

iterations = 500
learning_rate = 10

for style in style_images:
    content_path = './style-transfer-implementation-main/images/content_images/zion.jpg'
    style_path = os.path.join('./style-transfer-implementation-main/images/style_images', style)
    style_transfer(content_path, style_path, iterations = iterations, learning_rate = learning_rate)