In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.models import vgg19, VGG19_Weights
from torchvision.utils import save_image

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# model = models.vgg19(pretrained=True).features
model = vgg19(init_weights=VGG19_Weights.IMAGENET1K_V1).features
print(model)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

In [4]:


class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        # The first number x in convx_y gets added by 1 after it has gone
        # through a maxpool, and the second y if we have several conv layers
        # in between a max pool. These strings (0, 5, 10, ..) then correspond
        # to conv1_1, conv2_1, conv3_1, conv4_1, conv5_1 mentioned in NST paper
        self.chosen_features = ["0", "5", "10", "19", "28"]

        # We don't need to run anything further than conv5_1 (the 28th module in vgg)
        # Since remember, we dont actually care about the output of VGG: the only thing
        # that is modified is the generated image (i.e, the input).


        
        # self.model = models.vgg19(pretrained=True).features[:29]
        self.model = vgg19(init_weights=VGG19_Weights.IMAGENET1K_V1).features[:29]

    def forward(self, x):
        # Store relevant features
        features = []

        # Go through each layer in model, if the layer is in the chosen_features,
        # store it in features. At the end we'll just return all the activations
        # for the specific layers we have in chosen_features
        for layer_num, layer in enumerate(self.model):
            x = layer(x)

            if str(layer_num) in self.chosen_features:
                features.append(x)

        return features



In [15]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
imsize = 356

# Here we may want to use the Normalization constants used in the original
# VGG network (to get similar values net was originally trained on), but
# I found it didn't matter too much so I didn't end of using it. If you
# use it make sure to normalize back so the images don't look weird.

loader = transforms.Compose(
    [
        transforms.Resize((imsize, imsize)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


def load_image(image_name):
    # 這邊會強制轉RGB，不然沒有加convert 的話有可能 chaanel = 4 
    image = Image.open(image_name).convert('RGB')
    image = loader(image).unsqueeze(0)
    return image.to(device)

In [22]:


original_img = load_image("/content/drive/MyDrive/Colab Notebooks/ithome/NST/peaceful_building.png")
style_img = load_image("/content/drive/MyDrive/Colab Notebooks/ithome/NST/styles/style3.jpg")

# initialized generated as white noise or clone of original image.
# Clone seemed to work better for me.

# generated = torch.randn(original_img.data.shape, device=device, requires_grad=True)
generated = original_img.clone().requires_grad_(True)
model = VGG().to(device).eval()

In [23]:

# Hyperparameters
total_steps = 8000
learning_rate = 0.001
alpha = 1
beta = 0.01
optimizer = optim.Adam([generated], lr=learning_rate)

In [21]:

for step in range(total_steps):
    # Obtain the convolution features in specifically chosen layers
    generated_features = model(generated)
    original_img_features = model(original_img)
    style_features = model(style_img)

    # Loss is 0 initially
    style_loss = original_loss = 0

    # iterate through all the features for the chosen layers
    for gen_feature, orig_feature, style_feature in zip(
        generated_features, original_img_features, style_features
    ):

        # batch_size will just be 1
        batch_size, channel, height, width = gen_feature.shape
        original_loss += torch.mean((gen_feature - orig_feature) ** 2)
        # Compute Gram Matrix of generated
        G = gen_feature.view(channel, height * width).mm(
            gen_feature.view(channel, height * width).t()
        )
        # Compute Gram Matrix of Style
        A = style_feature.view(channel, height * width).mm(
            style_feature.view(channel, height * width).t()
        )
        style_loss += torch.mean((G - A) ** 2)

    total_loss = alpha * original_loss + beta * style_loss
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if step % 200 == 0:
        print(total_loss)
        save_image(generated, "/content/drive/MyDrive/Colab Notebooks/ithome/NST/generated_3.png")

tensor(104074.7266, device='cuda:0', grad_fn=<AddBackward0>)


KeyboardInterrupt: ignored

In [24]:

for i in ['2','3','4','5','6','7']:
  original_img = load_image("/content/drive/MyDrive/Colab Notebooks/ithome/NST/peaceful_building.png")
  style_img = load_image(f"/content/drive/MyDrive/Colab Notebooks/ithome/NST/styles/style{i}.jpg")

  # initialized generated as white noise or clone of original image.
  # Clone seemed to work better for me.

  # generated = torch.randn(original_img.data.shape, device=device, requires_grad=True)
  generated = original_img.clone().requires_grad_(True)
  model = VGG().to(device).eval()


  # Hyperparameters
  total_steps = 8000
  learning_rate = 0.001
  alpha = 1
  beta = 0.01
  optimizer = optim.Adam([generated], lr=learning_rate)


  for step in range(total_steps):
      # Obtain the convolution features in specifically chosen layers
      generated_features = model(generated)
      original_img_features = model(original_img)
      style_features = model(style_img)

      # Loss is 0 initially
      style_loss = original_loss = 0

      # iterate through all the features for the chosen layers
      for gen_feature, orig_feature, style_feature in zip(
          generated_features, original_img_features, style_features
      ):

          # batch_size will just be 1
          batch_size, channel, height, width = gen_feature.shape
          original_loss += torch.mean((gen_feature - orig_feature) ** 2)
          # Compute Gram Matrix of generated
          G = gen_feature.view(channel, height * width).mm(
              gen_feature.view(channel, height * width).t()
          )
          # Compute Gram Matrix of Style
          A = style_feature.view(channel, height * width).mm(
              style_feature.view(channel, height * width).t()
          )
          style_loss += torch.mean((G - A) ** 2)

      total_loss = alpha * original_loss + beta * style_loss
      optimizer.zero_grad()
      total_loss.backward()
      optimizer.step()

      if step % 200 == 0:
          print(total_loss)
          save_image(generated, f"/content/drive/MyDrive/Colab Notebooks/ithome/NST/generated_{i}.png")

tensor(481050.4375, device='cuda:0', grad_fn=<AddBackward0>)
tensor(295434.6250, device='cuda:0', grad_fn=<AddBackward0>)
tensor(189277.7969, device='cuda:0', grad_fn=<AddBackward0>)
tensor(127481.6328, device='cuda:0', grad_fn=<AddBackward0>)
tensor(89043.2422, device='cuda:0', grad_fn=<AddBackward0>)
tensor(64258.3477, device='cuda:0', grad_fn=<AddBackward0>)
tensor(47701.5820, device='cuda:0', grad_fn=<AddBackward0>)
tensor(36145.7656, device='cuda:0', grad_fn=<AddBackward0>)
tensor(27770.0273, device='cuda:0', grad_fn=<AddBackward0>)
tensor(21537.5664, device='cuda:0', grad_fn=<AddBackward0>)
tensor(16819.7969, device='cuda:0', grad_fn=<AddBackward0>)
tensor(13206.0801, device='cuda:0', grad_fn=<AddBackward0>)
tensor(10413.3467, device='cuda:0', grad_fn=<AddBackward0>)
tensor(8239.7461, device='cuda:0', grad_fn=<AddBackward0>)
tensor(6537.9360, device='cuda:0', grad_fn=<AddBackward0>)
tensor(5198.9028, device='cuda:0', grad_fn=<AddBackward0>)
tensor(4140.9717, device='cuda:0', grad