<a href="https://colab.research.google.com/github/amyxjhuang/amyxjhuang.github.io/blob/master/nst_painter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Using 19-layer VGG Network, to extract style from the image

Sources:
* https://www.kaggle.com/code/just4jcgeorge/styletransfer-using-vgg19-pytorch

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models


In [None]:
from google.colab import drive
import os
drive.mount('/content/drive', force_remount=True)

# image = Image.open("your_image.jpg")  # Load image
folder_path = "/content/drive/My Drive/NST Painter/images/"
image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))]



In [None]:
print("Found images:", image_files)

In [None]:
std = [0.229, 0.224, 0.225]
mean = [0.485, 0.456, 0.406]

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image


# Define the transformation (resizing, converting to tensor, normalization)
transform = transforms.Compose([
    transforms.Resize((512, 512)),  # Resize to 512 pixels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load all images into a list of tensors
image_tensors = [transform(Image.open(img).convert("RGB")).unsqueeze(0) for img in image_files]

print(f"Loaded {len(image_tensors)} images as tensors.")

In [None]:
import matplotlib.pyplot as plt

def show_image(tensor):
    image = tensor.cpu().clone().detach().squeeze(0)  # Remove batch dimension
    image = image * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)  # Unnormalize
    image = image.clamp(0, 1)  # Clamp values between 0 and 1
    plt.imshow(image.permute(1, 2, 0))  # Convert from CxHxW to HxWxC
    plt.axis("off")
    plt.show()



In [None]:
import numpy as np
def load_image(image_path, max_size=512):
    image = Image.open(image_path).convert('RGB')  # Ensure it's in RGB format

    # Define image transformation pipeline
    transform = transforms.Compose([
        transforms.Resize((max_size, max_size)),  # Resize to a square
        transforms.ToTensor(),  # Convert to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Match VGG-19 input
    ])

    # Apply transformations
    image = transform(image).unsqueeze(0)  # Add batch dimension
    return image


def im_convert(tensor: torch.Tensor) -> np.ndarray:
    """Convert a PyTorch tensor to a NumPy image."""
    image = tensor.to("cpu").clone().detach().numpy().squeeze().transpose(1, 2, 0)  # Convert tensor to NumPy array
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))  # Reverse normalization
    image = image.clip(0, 1)  # Clip values to the valid range [0, 1]
    return image

# content_image = load_image('content_image.png')
# style_image = load_image('style_image.png')


In [None]:
for tensor in image_tensors:
  show_image(tensor)

In [None]:
vgg = models.vgg19(pretrained=True).features

for param in vgg.parameters():
    param.requires_grad_(False)


If a GPU is available we will move the vgg model to that device.

In [None]:
device = torch.device("cpu")

if torch.cuda.is_available():
    print("Using GPU")
    device = torch.device("cuda")

vgg.to(device)


In [None]:
def tensor_to_image(tensor):
    # Detach tensor from the computational graph in Pytorch
    image = tensor.cpu().clone().detach()

    # Convert the tensor to Numpy format
    # Squeeze function that we invoke on Numpy will get rid of the batch dimension
    # of size 1 dimensions
    # The squeeze function in general squeezes out all size 1 dimensions

    image = image.cpu().numpy().squeeze()

    # The tensor representation of our image has channels in the first dimensions
    # then height and width of the image. We need to perform a transpose operation
    # so that we get the image in the format where the dimensions are first height
    # then width and then the number of channels
    # This is what matplotlib expects
    image = image.transpose(1, 2, 0)

    # Multiply pixel values by std deviation and add the mean
    # so that we normalize the image.
    image *= np.array(std) + np.array(mean)

    # Clip all pixel values in the range 0 to 1 and return this image
    image = image.clip(0, 1)

    return image


In [None]:
content_image = image_tensors[0]
style_image = image_tensors[0]

In [None]:
img = tensor_to_image(style_image)
fig = plt.figure()
fig.suptitle('Style Image')
plt.imshow(img)

img = tensor_to_image(content_image)
fig = plt.figure()
fig.suptitle('Content Image')
plt.imshow(img)


Layers of Interest - these are the layers of our deep CNN model


In [None]:
LAYERS_OF_INTEREST = {
    # '0': 'conv1_1',
    # '5': 'conv2_1',
    '10': 'conv3_1',
    '19': 'conv4_1',
    '21': 'conv4_2',
    '28': 'conv5_1'
    }

def apply_model_and_extract_features(image, model):
  """ Pass the image through the VGG model and store the intermediate activations / features
      for the layers of interest

  """
  x = image

  features = {}

  for name, layer in model._modules.items():
      x = layer(x)

      if name in LAYERS_OF_INTEREST:
          features[LAYERS_OF_INTEREST[name]] = x

  return features


In [None]:
content_image = content_image.to(device)
style_image = style_image.to(device)

content_img_features = apply_model_and_extract_features(content_image, vgg)
style_img_features   = apply_model_and_extract_features(style_image, vgg)


In [None]:
def transformation(img):
    # Convert image into a tensor [C, H, W] format
    # Normalize the input image
    tasks = tf.Compose([tf.Resize(512),
                        tf.ToTensor(),
                        tf.Normalize(mean, std)])

    img = tasks(img)
    img = img.unsqueeze(0)

    return img



In [None]:
def calculate_gram_matrix(tensor):
    # Gram Matrix can be used to capture the style of an image G = dot(F, F.T)
    # Specifically it captures the correlation between different feature channels
    # which represents textures and patterns instead of exact pixel values
    _, channels, height, width = tensor.size()

    tensor = tensor.view(channels, height * width)

    gram_matrix = torch.mm(tensor, tensor.t())

    gram_matrix = gram_matrix.div(channels * height * width)

    return gram_matrix


In [None]:
style_features_gram_matrix = {layer: calculate_gram_matrix(style_img_features[layer]) for layer in
                                                    style_img_features}

style_features_gram_matrix


In [None]:
import torch
import torch.optim as optim

weights = {
    # 'conv1_1': 1.0,
    # 'conv2_1': 0.75,
    'conv3_1': 0.35,
    'conv4_1': 0.25,
    'conv5_1': 0.15}
target = torch.nn.Parameter(content_image.clone().to(device))

optimizer = optim.Adam([target], lr=0.003)


In [None]:
plt.figure()

plt.imshow(tensor_to_image(target))


In [None]:

from torchvision import transforms as tf
import torch.nn.functional as F

# 1000000 - too
style_weight = 10000000
for i in range(1, 1000):

    target_features = apply_model_and_extract_features(target, vgg)

    content_loss = F.mse_loss (target_features['conv4_2'], content_img_features['conv4_2'])

    style_loss = 0
    for layer in weights:

        target_feature = target_features[layer]

        target_gram_matrix = calculate_gram_matrix(target_feature)
        style_gram_matrix = style_features_gram_matrix[layer]

        layer_loss = F.mse_loss (target_gram_matrix, style_gram_matrix)
        layer_loss *= weights[layer]

        _, channels, height, width = target_feature.shape

        style_loss += layer_loss

    total_loss = style_weight * style_loss + content_loss

    if i % 50 == 0:
        print ('Epoch {}:, Style Loss : {:4f}, Content Loss : {:4f}'.format( i, style_loss, content_loss))

    optimizer.zero_grad()

    total_loss.backward()

    optimizer.step()

In [None]:
import torch
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Define the ImageNet mean and std

def denormalize(tensor):
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

    """Convert normalized tensor back to [0,1] range for visualization."""
    tensor = tensor.cpu().clone().detach()  # Avoid modifying the original tensor
    tensor = tensor * std + mean  # Reverse normalization
    tensor = torch.clamp(tensor, 0, 1)  # Ensure values are in [0,1]
    return tensor

# Assuming `image_tensor` is a single image (1,3,H,W)
image_tensor = image_tensors[0].squeeze(0)  # Remove batch dimension
denorm_image = denormalize(image_tensor)

# Convert to numpy and plot
plt.imshow(denorm_image.permute(1, 2, 0))  # Convert from (C,H,W) to (H,W,C)
plt.axis("off")
plt.show()


In [None]:
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
# ax1.imshow(tensor_to_image(content_image))
# ax2.imshow(tensor_to_image(target))

plt.figure()

plt.imshow(tensor_to_image(target))


In [None]:
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
# ax1.imshow(tensor_to_image(denormalize(content_image)))
# ax2.imshow(tensor_to_image(denormalize(target)))
plt.figure()

plt.imshow(tensor_to_image(denormalize(target)))


In [None]:
def apply_style_on_image(content_image, style_image):
    tensor_image = transform(Image.open(img).convert("RGB")).unsqueeze(0)
