In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/Kaggle"
# /content/drive/My Drive/Kaggle is the path where kaggle.json is present in the Google Drive

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.utils import save_image

In [4]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG,self).__init__()
        
        ## using multiple layers since the early ones measure relatively lower level features like edges and later layers which measure high level features
        ## this helps the neural network to take both lower level and higher level correlations in account when computing style
        #self.chosen_features = ['0','5','10','19','28'] ## this is usually suggested 
        
        self.chosen_features = ['3','8','17','26'] ## However, I got better result with these
        
        self.model = models.vgg19(pretrained=True).features[:29] #dropping the classification head as well as a few convolution layers
        
    def forward(self,x):
        features =[]
        
        for layer_num, layer in enumerate(self.model):
            x= layer(x)
            
            if str(layer_num) in self.chosen_features:
                features.append(x)
        
        return features

In [5]:
def load_image(image_name):
    image = Image.open(image_name)
    image = loader(image).unsqueeze(0) ## adding additional dimenstion for the batch size
    return image.to(device)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
image_size_h = 300
image_size_w = 450

In [7]:
loader = transforms.Compose(
    [
        transforms.Resize((image_size_h,image_size_w)),
        transforms.ToTensor(),
    ]
)

In [15]:
original_img = load_image("/content/drive/MyDrive/Kaggle/content images/best bird.jpeg")
style_img= load_image("/content/drive/MyDrive/Kaggle/cstyle images/Duchamp_-_Nude_Descending_a_Staircase.jpg")

In [16]:
#freezing the model's parameters and setting it to evaluation mode

model = VGG().to(device).eval().requires_grad_(False)

In [17]:
print(model)

VGG(
  (model): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding

In [18]:
## since I have frozen the model, the only thing that needs to be changed is the generated image

generated = original_img.clone().requires_grad_(True)

In [19]:
## using the noise vector requires more computation as well as a better hardware to give a satisfiable result

#generated = torch.randn(original_img.shape, device=device, requires_grad=True)

In [20]:
#Hyperparameters
total_steps = 60000
learning_rate = 0.0001
alpha = 1
beta = 0.01
optimizer = optim.Adam([generated], lr= learning_rate)

###Style is defined as correlation between activations across channels.

Correlation tells us which of the high level features tend to occur together or never occur together. <br> Here, we use the degree of correlation between channels as a measure of style. We construct a style loss which minimizes the Gram Matrix(which is a correlation matrix) of the style and the generated images so that the generated image learns a similar correlation between the activations of the style in a particular layer as the style.
<br>
Here, the Gram Matrix is calculating the unnormalized Cross covariance, which is used here as a proxy for correlation.

In [None]:
for step in range(total_steps):
    generated_features = model(generated)
    original_img_features = model(original_img)
    style_features = model(style_img)
    # During each step, the code is passing the generated, original, and style images through the model (CNN) to generate their respective features.

    style_loss = 0
    original_loss =0
    
    for gen_feature, orig_feature, style_feature in zip(generated_features, original_img_features, style_features):
        batch_size , channel, height, width = gen_feature.shape
        
        original_loss += torch.mean((gen_feature-orig_feature)**2) #also works with Mean Absolute Error loss
        
        ## Gram Matrix for Generated image
        
        G= gen_feature.view(channel, height*width).mm(gen_feature.view(channel,height*width).t())
        
        ## Gram Matrix for Style image

        S= style_feature.view(channel, height*width).mm(style_feature.view(channel,height*width).t())

        style_loss +=torch.mean((G-S)**2)
        
    total_loss = alpha*original_loss +beta*style_loss
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    
    if step % 200 == 0:
        print(total_loss)
        save_image(generated,f"generated{step}.jpeg")

tensor(615628., device='cuda:0', grad_fn=<AddBackward0>)
tensor(251591.9844, device='cuda:0', grad_fn=<AddBackward0>)
tensor(134070.3125, device='cuda:0', grad_fn=<AddBackward0>)
tensor(84691.3750, device='cuda:0', grad_fn=<AddBackward0>)
tensor(61132.1992, device='cuda:0', grad_fn=<AddBackward0>)
tensor(46942.5898, device='cuda:0', grad_fn=<AddBackward0>)
tensor(37134.8320, device='cuda:0', grad_fn=<AddBackward0>)
tensor(29935.0723, device='cuda:0', grad_fn=<AddBackward0>)
tensor(24484.3945, device='cuda:0', grad_fn=<AddBackward0>)
tensor(20278.4336, device='cuda:0', grad_fn=<AddBackward0>)
tensor(17007.6523, device='cuda:0', grad_fn=<AddBackward0>)
tensor(14437.0859, device='cuda:0', grad_fn=<AddBackward0>)
tensor(12392.3945, device='cuda:0', grad_fn=<AddBackward0>)
tensor(10754.6094, device='cuda:0', grad_fn=<AddBackward0>)
tensor(9430.2617, device='cuda:0', grad_fn=<AddBackward0>)
tensor(8352.6338, device='cuda:0', grad_fn=<AddBackward0>)
tensor(7469.4585, device='cuda:0', grad_fn=