<a href="https://colab.research.google.com/github/bhavya6701/comp473-project/blob/main/comp473_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implementation of Artistic Style Transfer Using Convolutional Neural Networks
**Authors:** Shibin Koshy [40295019], Ruturajsinh Vihol [40154693], Bhavya Manjibhai Ruparelia [40164863]

## Imports

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.optim as optim
from torchvision import transforms, models
from tqdm import tqdm
import warnings

In [None]:
# Filter out warnings
warnings.filterwarnings('ignore')

# Set the home directory
HOME = os.getcwd()

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Load Models

**VGG-16**: A convolutional neural network with 16 layers, popular for style transfer due to its ability to capture detailed hierarchical features across layers. Pre-trained on ImageNet, it offers a balance between depth and computational efficiency.

**VGG-19**: An extended version of VGG-16 with 19 layers, providing deeper feature representations. This model can capture more complex details, enhancing style extraction for artistic image synthesis.

<!-- **ResNet-50**: A 50-layer residual network with skip connections, which helps retain both high- and low-level features. This architecture is well-suited for extracting intricate textures and patterns in style transfer tasks. -->

In [None]:
# Load pre-trained models from torchvision
model_dict = {}

# VGG-16
model_dict["vgg-16"] = models.vgg16(weights=models.VGG16_Weights.DEFAULT)

# VGG-19
model_dict["vgg-19"] = models.vgg19(weights=models.VGG19_Weights.DEFAULT)

# For each model, freeze all the parameters
for model in model_dict.values():
    for param in model.parameters():
        param.requires_grad = False

    # Move the model to the specified device (GPU or CPU)
    model.to(device)

## Load Images

In [None]:
def load_image(img_path, max_size=400, shape=None):
    # Load in and transform an image, resize it, and convert it to a PyTorch tensor
    image = Image.open(img_path).convert("RGB")

    # Set target size based on max size or shape
    if shape is not None:
        target_size = shape
    else:
        target_size = min(max(image.size), max_size)

    # Define the transformation pipeline
    in_transform = transforms.Compose(
        [
            transforms.Resize(target_size),  # Resize to target size
            transforms.ToTensor(),  # Convert image to tensor
            transforms.Normalize(
                mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
            ),  # Normalize
        ]
    )

    image = in_transform(image)[:3, :, :].unsqueeze(0)

    return image


content = load_image(HOME + "/data/input_images/content.jpg").to(device)
style = load_image(HOME + "/data/input_images/style.jpg", shape=content.shape[-2:]).to(
    device
)

In [None]:
# Convert the images to numpy arrays
def tensor_to_image(tensor):
    # Invert normalization by reversing the mean and std
    denormalize = transforms.Normalize(
        mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
        std=[1 / 0.229, 1 / 0.224, 1 / 0.225],
    )

    # Apply denormalization, convert to numpy, squeeze and transpose (C x H x W -> H x W x C)
    image = denormalize(tensor).cpu().numpy().squeeze().transpose(1, 2, 0)

    # Clip values to stay within the [0, 1] range
    return np.clip(image, 0, 1)


# Display the images
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))

ax1.title.set_text("Content Image")
ax1.imshow(tensor_to_image(content), label="Content")

ax2.title.set_text("Style Image")
ax2.imshow(tensor_to_image(style), label="Style")
plt.show()

## Feature Extraction and Gram Matrix

In [None]:
def extract_features(image, model, layers):
    features = {}
    x = image

    # Iterate through the model layers using named_children()
    for name, layer in model.named_children():
        # Apply each layer to the image and store the result
        x = layer(x)
        if name in layers:
            features[layers[name]] = x

    return features


def gram_matrix(tensor):
    # Get the batch size, depth, height, and width of the tensor
    _, d, h, w = tensor.size()
    # Reshape the tensor to have the shape (depth, height * width)
    tensor = tensor.view(d, h * w)
    # Compute the Gram matrix
    gram = torch.mm(tensor, tensor.t())
    return gram

## Loss Functions and Optimization

In [None]:
# Load the JSON file
with open("model_config.json", "r") as json_file:
    data = json.load(json_file)

# Extracting data into variables
layers = data["layers"]
style_weights = data["style_weights"]
content_weight = data["content_weight"]
style_weight = data["style_weight"]

## Training and Evaluation of Style Transfer Models

In [None]:
# Function to calculate the content loss
def calculate_content_loss(target_features, content_features, layer):
    return torch.mean((target_features[layer] - content_features[layer]) ** 2)


# Function to calculate the style loss
def calculate_style_loss(target_features, style_grams, style_weights):
    style_loss = 0
    for layer, weight in style_weights.items():
        target_feature = target_features[layer]
        target_gram = gram_matrix(target_feature)
        _, d, h, w = target_feature.shape
        style_gram = style_grams[layer]
        layer_style_loss = weight * (0.25 * torch.mean((target_gram - style_gram) ** 2))
        style_loss += layer_style_loss / (d * h * w)
    return style_loss


# Function to show the images and total loss
def plot_images_graph(images, total_losses, steps, checkpoints):
    # Create a plot with 2 rows and 6 columns
    fig, axes = plt.subplots(2, 6, figsize=(18, 6))

    # Flatten axes for easy iteration
    axes = axes.flatten()
    for i, image in enumerate(images):
        title = (
            f"Iteration {i * (steps // checkpoints):,}"
            if i > 0 and i < len(images) - 1
            else "Initial Content"
            if i == 0
            else "Final Image"
        )
        axes[i].imshow(image)
        axes[i].axis("off")
        axes[i].set_title(title)

    # Show the plot
    plt.tight_layout()
    plt.show()

    # Plot the total loss values
    plt.figure(figsize=(10, 5))
    plt.plot(total_losses, label="Total Loss")
    plt.xlabel("Steps")
    plt.ylabel("Iterations")
    plt.title("Total Loss vs Iterations")
    plt.legend()
    plt.show()

In [None]:
content_features = {}
style_features = {}
style_grams = {}

# Compute the content and style features for the content and style images
content_features["vgg-16"] = extract_features(content, model_dict["vgg-16"].features, layers["vgg-16"])
content_features["vgg-19"] = extract_features(content, model_dict["vgg-19"].features, layers["vgg-19"])

style_features["vgg-19"] = extract_features(style, model_dict["vgg-19"].features, layers["vgg-19"])
style_features["vgg-16"] = extract_features(style, model_dict["vgg-16"].features, layers["vgg-16"])

# Compute the gram matrices for the style features
style_grams["vgg-16"] = {layer: gram_matrix(style_features["vgg-16"][layer]) for layer in style_features["vgg-16"]}
style_grams["vgg-19"] = {layer: gram_matrix(style_features["vgg-19"][layer]) for layer in style_features["vgg-19"]}

In [None]:
#  Function for the style transfer algorithm
def style_transfer_algorithm(
    model_name,
    content_loss_layer,
    iterations=2000,
    lr=0.003,
    checkpoints=10,
    alpha=1,
    beta=1e-3,
):
    model = model_dict[model_name]
    model_content_features = content_features[model_name]
    model_style_grams = style_grams[model_name]
    model_style_weights = style_weights[model_name]
    model_layers = layers[model_name]

    # Ensure that the content tensor is moved to the correct device and requires gradient
    target = content.clone().requires_grad_(True).to(device)

    # Define the optimizer
    optimizer = optim.Adam([target], lr=lr)

    # Create a list to store the images and the total loss values
    images = []
    total_losses = []

    # Iterate through the steps
    for step in tqdm(range(1, iterations + 1)):
        # Extract the features from the target image
        target_features = extract_features(target, model.features, model_layers)

        # Calculate the content and style loss
        content_loss = calculate_content_loss(
            target_features, model_content_features, content_loss_layer
        )
        style_loss = calculate_style_loss(
            target_features, model_style_grams, model_style_weights
        )

        # Compute the total loss
        total_loss = content_weight * content_loss + style_weight * style_loss

        # Update the target image
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # Append the total loss to the list
        total_losses.append(total_loss.item())

        # Display the images and total loss at the checkpoint steps
        if step % (iterations // checkpoints) == 0:
            images.append(tensor_to_image(target.detach()))
            print(
                f"Step {step}/{iterations} - Total loss: {total_loss.item():.4f}, "
                f"Content loss: {content_loss.item():.4f}, Style loss: {style_loss.item():.4f}"
            )

    # Include the original content image at the start and the final target image at the end
    images.insert(0, tensor_to_image(content))
    images.append(tensor_to_image(target.detach()))

    return images, total_losses

In [None]:
# Define hyperparameters and optimization settings
iterations = 2000
checkpoints = 10
alpha = 1
beta = 1e-3
lr = 0.003

## VGG-16 Style Transfer

In [None]:
# Call the style transfer function for the VGG-16 model
saved_images, total_losses = style_transfer_algorithm(
    "vgg-16", "conv4_2", iterations, lr, checkpoints, alpha, beta
)

# Create a plot with the images and total loss values
plot_images_graph(saved_images, total_losses, iterations, checkpoints)

## VGG-19 Style Transfer

In [None]:
# Call the style transfer function for the VGG-16 model
saved_images, total_losses = style_transfer_algorithm(
    "vgg-19",
    "conv4_3",
    iterations,
    lr,
    checkpoints,
)

# Create a plot with the images and total loss values
plot_images_graph(saved_images, total_losses, iterations, checkpoints)

## Hyperparameter Tuning (VGG-19)

In [None]:
learning_rates = [0.0001, 0.0005, 0.001, 0.005, 0.01]
iterations = 1000
alphas = [1, 1e-1, 1e-2, 1e-3, 1e-4]
betas = [1e-3, 1e-4, 1e-5, 1e-6, 1e-7]

In [None]:
best_loss = float("inf")
best_hyperparameters = {}

# Iterate through the learning rates
for lr in learning_rates:
    # Iterate through the alpha values
    for alpha in alphas:
        # Iterate through the beta values
        for beta in betas:
            print(f"-> Learning Rate: {lr} | Alpha: {alpha} | Beta: {beta}")

            # Call the style transfer function for the VGG-16 model
            _, total_losses = style_transfer_algorithm(
                "vgg-16",
                "conv4_2",
                iterations,
                lr,
                1,
            )

            # Check if the total loss is the best so far
            if total_losses[-1] < best_loss:
                best_loss = total_losses[-1]
                best_hyperparameters = {
                    "lr": lr,
                    "alpha": alpha,
                    "beta": beta,
                }

# Print the best hyperparameters
print("Best Hyperparameters:")
print(best_hyperparameters)

In [None]:
# Call the style transfer function for the VGG-19 model with the best hyperparameters
saved_images, total_losses = style_transfer_algorithm(
    "vgg-19",
    "conv4_3",
    iterations,
    best_hyperparameters["lr"],
    1,
    best_hyperparameters["alpha"],
    best_hyperparameters["beta"],
)

# Create a plot with the images and total loss values
plot_images_graph(saved_images, total_losses, iterations, checkpoints)