In [None]:
! pip install torch_snippets

# TODO: Import relevant packages
from torch_snippets import *
from torchvision import transforms
from torch.nn import functional as F
from torchvision.models import vgg19

In [None]:
# Shifted to the cuda
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# TODO: Define the functions to prepossess and postprocessing the data
pre_process = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Lambda(lambda x: x.mul_(255))
])

post_process = transforms.Compose([
    transforms.Lambda(lambda x: x.mul_(1./255)),
    transforms.Normalize(mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225], std=[1/0.229, 1/0.224, 1/0.225])
])

In [None]:
# TODO: Define GramMatrix module
class GramMatrix(nn.Module):
    def forward(self, input):
        b, c, h, w = input.size()
        feat = input.view(b, c, h * w)
        G = feat @ feat.transpose(1, 2)  # It multiplied (inner product) by transpose itself
        G.div_(h * w)
        return G

In [None]:
# TODO: GramMatrix corresponding to the MSELoss, GramMSELoss
class GramMSELoss(nn.Module):
    def forward(self, input, target):
        out = F.mse_loss(GramMatrix()(input), target)
        return out
    

Once we have the gram vectors for both feature sets, it is important that
they match as closely as possible, and hence the `mse_loss`.

In [None]:
# TODO: Define the model class, vgg19_modified
class vgg19_modified(nn.Module):
    def __init__(self):
        super().__init__()
        # Extract features
        features = list(vgg19(pretrained=True).features)
        self.features = nn.ModuleList(features).eval()

    # TODO: define the forward method
    def forward(self, x, layers=[]):
        order = np.argsort(layers)
        _results, results = [], []
        for idx, model in enumerate(self.features):
            x = model(x)
            if idx in layers: _results.append(x)
        for o in order: results.append(_results[o])
        return results if layers is not [] else x

In [None]:
# TODO: Define the object model
vgg = vgg19_modified().to(device)

#### Import the content and style images:

In [None]:
# TODO: improt the content and style images
# !wget https://www.dropbox.com/s/z1y0fy2r6z6m6py/60.jpg
# !wget https://www.dropbox.com/s/1svdliljyo0a98v/style_image.png
!wget https://raw.githubusercontent.com/bensains1/fast-style-transfer-master/master/examples/content/chicago.jpg
!wget https://raw.githubusercontent.com/bensains1/fast-style-transfer-master/master/examples/style/the_shipwreck_of_the_minotaur.jpg


In [None]:
# TODO: Make sure that the images are resized to be of the same shape, 512 x 512 x 3
imgs = [Image.open(path).resize((512, 412)).convert('RGB') for path in ['./the_shipwreck_of_the_minotaur.jpg', './chicago.jpg']]
style_image, content_image = [pre_process(img).to(device)[None] for img in imgs]

In [None]:
# TODO: Specify that the content image is to modified with requires_grad = True
opt_img = content_image.data.clone()
opt_img.requires_grad = True

**Specify the layers that define content loss and style loss, that is, which intermediate VGG layers we are using, to compare gram matrices for style and raw feature vectors for content**

In [None]:
style_layers = [0, 5, 10, 19, 28]
content_layers = [21]
loss_layers = style_layers + content_layers

In [None]:
# TODO: Define the loss function for content and style loss values:
loss_fns = [GramMSELoss()] * len(style_layers) + [nn.MSELoss()] * len(content_layers)
loss_fns = [loss_fn.to(device) for loss_fn in loss_fns]

In [None]:
# TODO: Define the weightage associated with content and style loss
style_weights = [1000/n**2 for n in [64, 128, 256, 512, 512]]
content_weights = [1]
weights = style_weights + content_weights

***We need to manipulate our image such that the style of the target image
resembles`style_image` as much as possible. Hence we compute the
`style_targets` values of `style_image` by computing GramMatrix of
features obtained from a few chosen layers of VGG. Since the overall
content should be preserved, we choose the `content_layer` variable at
which we compute the raw features from VGG:***

In [None]:
style_targets = [GramMatrix()(A).detach() for A in vgg(style_image, style_layers)]
content_targets = [A.detach() for A in vgg(content_image, content_layers)]
targets = style_targets + content_targets

Define the optimizer and the number of iterations ***(max_iters).*** Even
though we could have used Adam or any other optimizer, **LBFGS** is an
optimizer that has been observed to work best in deterministic scenarios.
Additionally, since we are dealing with exactly one image, there is nothing
random. Many experiments have revealed that **LBFGS** converges faster and
to lower losses in neural transfer settings, so we will use this optimizer:

In [None]:
max_iters = 6000
optimizer = optim.LBFGS([opt_img], lr = 0.1)
log = Report(max_iters)

*Perform the optimization. In deterministic scenarios where we are iterating
on the same tensor again and again, we can wrap the optimizer step as a
function with zero arguments and repeatedly call it, as shown here:*

In [None]:
iters = 0
while iters < max_iters:
    def closure():
        global iters
        iters += 1
        optimizer.zero_grad()
        out = vgg(opt_img, loss_layers)
        layer_losses = [weights[a] * loss_fns[a](A, targets[a]) for a,A in enumerate(out)]
        loss = sum(layer_losses)
        loss.backward()
        log.record(pos=iters, loss=loss, end='\r')
        return loss
    optimizer.step(closure)

In [None]:
# Plot the variation in the loss:
log.plot(log=True)

In [None]:
# Plot the image with the combination of content and style images:
with torch.no_grad():
    out_img = post_process(opt_img[0]).permute(1,2,0)
show(out_img)