In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
%cd /content/drive/My Drive/CS 577 Project/helper functions/
%ls

/content/drive/My Drive/CS 577 Project/helper functions
data_loader.py  models.py  [0m[01;34m__pycache__[0m/  utility.py  vgg_nets.py  video_utility.py


In [3]:
import utility as utils
from video_utility import create_video_from_intermediate_results

import torch
from torch.optim import Adam, LBFGS
from torch.autograd import Variable
import numpy as np
import os
import argparse

%cd /content/drive/My Drive/CS 577 Project/
%ls

/content/drive/My Drive/CS 577 Project
'Code Reference.txt'    [0m[01;34m'helper functions'[0m/   [01;34m__pycache__[0m/          StyleTransfer.py   U2Net.py
'CS 577 Project.ipynb'   [01;34moutput_experiment[0m/   [01;34msaved_models[0m/         [01;34mtest_data[0m/
 Grabcut.ipynb           [01;34mPhotoWakeUp[0m/         StyleTransfer.ipynb   U2Net.ipynb


In [4]:
def build_loss(neural_net, optimizing_img, target_representations, content_feature_maps_index, style_feature_maps_indices, config):
    target_content_representation = target_representations[0]
    target_style_representation = target_representations[1]

    current_set_of_feature_maps = neural_net(optimizing_img)

    current_content_representation = current_set_of_feature_maps[content_feature_maps_index].squeeze(axis=0)
    content_loss = torch.nn.MSELoss(reduction='mean')(target_content_representation, current_content_representation)

    style_loss = 0.0
    current_style_representation = [utils.gram_matrix(x) for cnt, x in enumerate(current_set_of_feature_maps) if cnt in style_feature_maps_indices]
    for gram_gt, gram_hat in zip(target_style_representation, current_style_representation):
        style_loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
    style_loss /= len(target_style_representation)

    tv_loss = utils.total_variation(optimizing_img)

    total_loss = config['content_weight'] * content_loss + config['style_weight'] * style_loss + config['tv_weight'] * tv_loss

    return total_loss, content_loss, style_loss, tv_loss

In [5]:
def make_tuning_step(neural_net, optimizer, target_representations, content_feature_maps_index, style_feature_maps_indices, config):
    # Builds function that performs a step in the tuning loop
    def tuning_step(optimizing_img):
        total_loss, content_loss, style_loss, tv_loss = build_loss(neural_net, optimizing_img, target_representations, content_feature_maps_index, style_feature_maps_indices, config)
        # Computes gradients
        total_loss.backward()
        # Updates parameters and zeroes gradients
        optimizer.step()
        optimizer.zero_grad()
        return total_loss, content_loss, style_loss, tv_loss

    # Returns the function that will be called inside the tuning loop
    return tuning_step

In [12]:
def neural_style_transfer(config):
    content_img_path = os.path.join(config['content_images_dir'], config['content_img_name'])
    style_img_path = os.path.join(config['style_images_dir'], config['style_img_name'])

    out_dir_name = 'combined_' + os.path.split(content_img_path)[1].split('.')[0] + '_' + os.path.split(style_img_path)[1].split('.')[0]
    dump_path = os.path.join(config['output_img_dir'], out_dir_name)
    os.makedirs(dump_path, exist_ok=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    content_img = utils.prepare_img(content_img_path, config['height'], device)
    style_img = utils.prepare_img(style_img_path, config['height'], device)

    if config['init_method'] == 'random':
        # white_noise_img = np.random.uniform(-90., 90., content_img.shape).astype(np.float32)
        gaussian_noise_img = np.random.normal(loc=0, scale=90., size=content_img.shape).astype(np.float32)
        init_img = torch.from_numpy(gaussian_noise_img).float().to(device)
    elif config['init_method'] == 'content':
        init_img = content_img
    else:
        # init image has same dimension as content image - this is a hard constraint
        # feature maps need to be of same size for content image and init image
        style_img_resized = utils.prepare_img(style_img_path, np.asarray(content_img.shape[2:]), device)
        init_img = style_img_resized

    # we are tuning optimizing_img's pixels! (that's why requires_grad=True)
    optimizing_img = Variable(init_img, requires_grad=True)

    neural_net, content_feature_maps_index_name, style_feature_maps_indices_names = utils.prepare_model(config['model'], device)
    print(f'Using {config["model"]} in the optimization procedure.')

    content_img_set_of_feature_maps = neural_net(content_img)
    style_img_set_of_feature_maps = neural_net(style_img)

    target_content_representation = content_img_set_of_feature_maps[content_feature_maps_index_name[0]].squeeze(axis=0)
    target_style_representation = [utils.gram_matrix(x) for cnt, x in enumerate(style_img_set_of_feature_maps) if cnt in style_feature_maps_indices_names[0]]
    target_representations = [target_content_representation, target_style_representation]

    # magic numbers in general are a big no no - some things in this code are left like this by design to avoid clutter
    num_of_iterations = {
        "lbfgs": 100,
        "adam": 100,
    }

    #
    # Start of optimization procedure
    #
    if config['optimizer'] == 'adam':
        optimizer = Adam((optimizing_img,), lr=1e1)
        tuning_step = make_tuning_step(neural_net, optimizer, target_representations, content_feature_maps_index_name[0], style_feature_maps_indices_names[0], config)
        for cnt in range(num_of_iterations[config['optimizer']]):
            total_loss, content_loss, style_loss, tv_loss = tuning_step(optimizing_img)
            with torch.no_grad():
                print(f'Adam | iteration: {cnt:03}, total loss={total_loss.item():12.4f}, content_loss={config["content_weight"] * content_loss.item():12.4f}, style loss={config["style_weight"] * style_loss.item():12.4f}, tv loss={config["tv_weight"] * tv_loss.item():12.4f}')
                utils.save_and_maybe_display(optimizing_img, dump_path, config, cnt, num_of_iterations[config['optimizer']], should_display=True)
    elif config['optimizer'] == 'lbfgs':
        # line_search_fn does not seem to have significant impact on result
        optimizer = LBFGS((optimizing_img,), max_iter=num_of_iterations['lbfgs'], line_search_fn='strong_wolfe')
        cnt = 0

        def closure():
            nonlocal cnt
            if torch.is_grad_enabled():
                optimizer.zero_grad()
            total_loss, content_loss, style_loss, tv_loss = build_loss(neural_net, optimizing_img, target_representations, content_feature_maps_index_name[0], style_feature_maps_indices_names[0], config)
            if total_loss.requires_grad:
                total_loss.backward()
            with torch.no_grad():
                print(f'L-BFGS | iteration: {cnt:03}, total loss={total_loss.item():12.4f}, content_loss={config["content_weight"] * content_loss.item():12.4f}, style loss={config["style_weight"] * style_loss.item():12.4f}, tv loss={config["tv_weight"] * tv_loss.item():12.4f}')
                utils.save_and_maybe_display(optimizing_img, dump_path, config, cnt, num_of_iterations[config['optimizer']], should_display=False)

            cnt += 1
            return total_loss

        optimizer.step(closure)

    return dump_path

In [13]:
config = {'content_images_dir': '/content/drive/My Drive/CS 577 Project/test_data/images/content/',  # where the images are saved.
          'content_img_name': 'figures.jpg',
          'style_images_dir': '/content/drive/My Drive/CS 577 Project/test_data/images/style/',  # where the images are saved.
          'style_img_name': 'candy.jpg',
          'output_img_dir': '/content/drive/My Drive/CS 577 Project/images/output/',    # where to output images.
          'height': 512,                                                                # (height, width) of the image.
          'init_method': 'content',                                                     # what to use for as content image.
          'model': 'vgg16',                                                             # which NN model to use.
          'optimizer': 'lbfgs',                                                         # which optimizer to use.
          'content_weight': 1e4,                                                        # weights for the loss function - controls the influence of the content image on the final image.
          'style_weight': 1e4,                                                          # weights for the loss function - controls the influence of the style image on the final image.
          'tv_weight': 5e0,                                                             # total varionation weight for the loss function - controls the smoothness between content and style images.
          'saving_freq': -1,                                                            # how often to save the output of the network.
          'img_format': ('combined', '.jpg') }


In [14]:
neural_style_transfer(config)

Using vgg16 in the optimization procedure.
L-BFGS | iteration: 000, total loss=1067104731136.0000, content_loss=      0.0000, style loss=1067066240000.0000, tv loss=38456435.0000
L-BFGS | iteration: 001, total loss=1067104534528.0000, content_loss=      0.0000, style loss=1067066080000.0000, tv loss=38456435.0000
L-BFGS | iteration: 002, total loss=1067103354880.0000, content_loss=      0.0008, style loss=1067064880000.0000, tv loss=38456492.5000
L-BFGS | iteration: 003, total loss=1067102765056.0000, content_loss=      0.0016, style loss=1067064320000.0000, tv loss=38456515.0000
L-BFGS | iteration: 004, total loss=1067102044160.0000, content_loss=      0.0033, style loss=1067063600000.0000, tv loss=38456555.0000
L-BFGS | iteration: 005, total loss=1067090444288.0000, content_loss=      0.1000, style loss=1067052000000.0000, tv loss=38457095.0000
L-BFGS | iteration: 006, total loss=1066963042304.0000, content_loss=      9.9934, style loss=1066924560000.0000, tv loss=38463065.0000
L-BFG

KeyboardInterrupt: ignored