# Neural Style Transfer

This project implements a basic 1-to-1 Neural Style Transfer network.

In [6]:
import torch
import torch.nn  as nn

import torch.optim as optim

from PIL import Image
import torchvision.transforms as transforms
import torchvision.models as models

from torchvision.utils import save_image

Importing the pretrained VGG19 model, from which we will select 3x5 convolutional layers as a source in calculating the style/content.

In [7]:
model = models.vgg19(pretrained=True).features

print(model)
# In the original paper, the conv layers are separated via MaxPools.
# Printing it out, we see that this occurs at indices 0, 5, 10, 19, 28

featureLayers = ['0', '5', '10', '19', '28']



Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

Creating a new class that only includes the chosen features, for simplicity

In [8]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        
        self.chosen_features = featureLayers
        self.model = models.vgg19(pretrained=True).features[:29]
        
    def forward(self, x):
        features = []
        
        for layer_num, layer in enumerate(self.model):
            # Source the output of the image at each feature layer
            x = layer(x)
            
            if str(layer_num) in self.chosen_features:
                features.append(x)
                
        return features

Loading the Images:
- Note that we are starting with a clone of the content image, rather than pure noise.

In [12]:
def load_image(image_name):
    image = Image.open(image_name)
    image = loader(image).unsqueeze(0) # add an extra dimension at index 0 for batch size
    return image.to(device)

# Mac does not compile torch with cuda enabled
# device = torch.device("cuda" if torch.cuda.is_available else "cpu")
device = torch.device("cpu")

image_size = 356


# Defining transforms
loader = transforms.Compose(
    [
        transforms.Resize((image_size, image_size)), 
        transforms.ToTensor(),
        # transforms.Normalize(mean=[], std=[]) remember to normalize back in the end
    ]
)

original_img = load_image("./content_image/stairsScenery.png")
style_img = load_image("./style_image/ghibliStyle.png")

generated = original_img.clone().requires_grad_(True)

Defining the Hyperparameters:

In [13]:
total_steps = 6000
learning_rate = 0.001
alpha = 1 # different from original paper
beta = 0.01
optimizer = optim.Adam([generated], lr=learning_rate)

model = VGG().to(device).eval() # model with frozen weights



Training the model:
- Here, total_steps refers to how many times the image is to be modified. 

In [None]:
for step in range(total_steps):
    generated_features = model(generated)
    original_image_features = model(original_img)
    style_features = model(style_img)
    
    style_loss = original_loss = 0
    
    for gen_feature, orig_feature, style_feature in zip(
        generated_features, original_image_features, style_features
    ):
        batch_size, channel, height, width = gen_feature.shape
        original_loss += torch.mean((gen_feature - orig_feature)**2)
        
        # Computing the Gram Matrix
        gen_gram = gen_feature.view(channel, height*width).mm(
            gen_feature.view(channel, height*width).t()
        )
        
        style_gram = style_feature.view(channel, height*width).mm(
            style_feature.view(channel, height*width).t()
        )
        
        style_loss += torch.mean((gen_gram - style_gram)**2)
    
    total_loss = alpha*original_loss + beta*style_loss
    
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    if step % 300 == 0:
        print(total_loss)
        save_image(generated, "generated.png")