In [1]:
import os
import random

import numpy as np
import pandas as pd
from optuna import trial
from sklearn.model_selection import train_test_split
from sympy.physics.units import length
from torchvision import transforms, models
from torchvision.transforms.functional import to_pil_image
from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset

import torch
import torch.nn as nn
import torch.optim as optim
import optuna
import wandb
# Project utilities
import utils
from train import train_model_with_hyperparams

# Set seed
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

np.random.seed(SEED)
random.seed(SEED)
# torch.backends.cudnn.deterministic = True
# torch.use_deterministic_algorithms = True

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Check if you're working locally or not
if not (os.path.exists(utils.ROOT) and os.path.exists(utils.OPTIMIZED_DIR)):
    print(f"[!] You are NOT on the project's directory [!]\n"
          f"Please run the following command (in either CMD or anaconda prompt): \n"
          f"jupyter notebook --notebook-dir PROJECT_DIR\n"
          r"Where PROJECT_DIR is the project's directory in your computer e.g: C:\Users\amitr5\PycharmProjects\deep_van_gogh")

### Loading our data
We will load the optimized datasets from our custom dataset object


In [3]:
class NumPyDataset(Dataset):
    def __init__(self, file_path):
        data = np.load(file_path)
        self.images = data["images"]
        self.labels = data["labels"]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        x = torch.tensor(self.images[idx], dtype=torch.float32)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

dataset = NumPyDataset(utils.OPTIMIZED_DIR + '/dataset.npz')

You can find the optimized dataset files <a href="https://drive.google.com/drive/folders/1TBlNcRsRHJ7_rxh_h7_yn_-Ak66Uj_mp?usp=sharing">HERE</a><br/>
Loading the train and test datasets:

In [4]:
classes = pd.read_csv(utils.CSV_PATH)
train_indices, val_indices = train_test_split(classes[classes['subset'] == 'train'].index.tolist(), test_size=0.2, random_state=SEED)
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, classes[classes['subset'] == 'test'].index.tolist())

### Data Augmentation

In [38]:
# torch.manual_seed(SEED)
# n_times = 25
# dropout_transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     *([transforms.RandomErasing(p=0.5, scale=(0.01, 0.01), ratio=(1, 1))]*n_times),
#     transforms.Grayscale(num_output_channels=3),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224, 0.225])
# ])
# dropout_dataset = Subset(preprocessing.ImageFolderForBinaryClassification(root=ROOT, transform=dropout_transform, target='is_van_gogh'), train_indices)
# augmented_train_dataset = ConcatDataset([dropout_dataset, train_dataset])
# augmented_train_dataset = train_dataset
# train_loader = DataLoader(augmented_train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=8)
# val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4)

# Fine tuning VGG19

In [5]:
# Load pre-trained VGG19 model
vgg_model = models.vgg19(weights=models.VGG19_Weights.DEFAULT).to(device)
for param in vgg_model.features.parameters():
        param.requires_grad = False

# Modify the classifier to fit our problem (2 classes)
vgg_model.classifier[-1] = nn.Linear(4096, 2).to(device) # Replaces the final layer of the VGG16 classifier with a new fully connected layer.
vgg_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [6]:
# Optuna objective function
def objective_vgg(trial):
    # Hyperparameter suggestions
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True) # same as suggest_loguniform
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
    batch_size = trial.suggest_int("batch_size", 32, 128, step=16)
    patience = 10
    epochs = 5

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Load the train DataLoader with the chosen batch_size
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # Load the val DataLoader with the chosen batch_size

    # Define optimizer and loss function
    criterion = nn.CrossEntropyLoss() # Classification.
    optimizer = optim.Adam(vgg_model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Adam, like always, with the chosen parameters from Optuna

    # Initialize Weights & Biases - the values in the config are the properties of each trial.
    wandb.init(project="deep_van_gogh",
               config={
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "patience": patience,
        "batch_size": batch_size,
        "epochs": epochs,
        "architecture": "VGG19",
        "dataset": "Post_Impressionism"
    },
    name=f"trial_{trial.number}") # The name that will be saved in the W&B platform

    # Train the model and get the best validation loss
    best_val_loss = train_model_with_hyperparams(vgg_model, train_loader, val_loader, optimizer, criterion, epochs=epochs, patience=patience, trial=trial, device=device)

    # Finish the Weights & Biases run
    wandb.finish()

    # Return best validation loss as the objective to minimize
    return best_val_loss

# TODO:
# make an objective with best accuracy

In [7]:
study = optuna.create_study(direction='minimize')
study.optimize(objective_vgg, n_trials=3)

[I 2025-01-21 20:13:12,818] A new study created in memory with name: no-name-37ba53f2-8534-4a81-af06-1c10d0469c7a
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: amitr5 (amitr5-tel-aviv-university). Use `wandb login --relogin` to force relogin


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▇██
Train Loss,█▃▂▁▁
Validation Accuracy,█▃▆▁▁
Validation Loss,▁▄▆█▇

0,1
Epoch,5.0
Train Accuracy,0.99111
Train Loss,0.05113
Validation Accuracy,0.95486
Validation Loss,0.51677


[I 2025-01-21 20:14:46,671] Trial 0 finished with value: 0.14874800785907152 and parameters: {'learning_rate': 0.000407379705417458, 'weight_decay': 2.3231833057759346e-06, 'batch_size': 32}. Best is trial 0 with value: 0.14874800785907152.


0,1
Epoch,▁▃▅▆█
Train Accuracy,█▃▁▆▆
Train Loss,▁▃█▅▄
Validation Accuracy,▅▁▆▇█
Validation Loss,▁█▇▂▆

0,1
Epoch,5.0
Train Accuracy,0.99384
Train Loss,0.04594
Validation Accuracy,0.96306
Validation Loss,1.16989


[I 2025-01-21 20:17:18,284] Trial 1 finished with value: 0.6112649130899963 and parameters: {'learning_rate': 0.00028294468154355996, 'weight_decay': 4.243060529516283e-06, 'batch_size': 48}. Best is trial 0 with value: 0.14874800785907152.


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁██▅▅
Train Loss,▇▁▁█▄
Validation Accuracy,▆▆▆█▁
Validation Loss,▄█▆▆▁

0,1
Epoch,5.0
Train Accuracy,0.99932
Train Loss,0.00425
Validation Accuracy,0.96033
Validation Loss,0.90429


[I 2025-01-21 20:18:56,843] Trial 2 finished with value: 0.904289045953351 and parameters: {'learning_rate': 1.274243947801123e-05, 'weight_decay': 1.3021623342915633e-06, 'batch_size': 32}. Best is trial 0 with value: 0.14874800785907152.


# Fine tuning AlexNet

In [5]:
# Load the AlexNet model 
alexnet = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).to(device)
for param in alexnet.features.parameters():
        param.requires_grad = False

# Modify the classifier to fit our problem (2 classes)
alexnet.classifier[-1] = nn.Linear(4096, 2).to(device) # Replaces the final layer of the VGG16 classifier with a new fully connected layer.
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [10]:
def objective_alexnet(trial):
    # Hyperparameter suggestions
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True) # same as suggest_loguniform
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
    batch_size = trial.suggest_int("batch_size", 32, 128, step=16)
    patience = 10
    epochs = 5

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Load the train DataLoader with the chosen batch_size
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # Load the val DataLoader with the chosen batch_size

    # Define optimizer and loss function
    criterion = nn.CrossEntropyLoss() # Classification.
    optimizer = optim.Adam(alexnet.parameters(), lr=learning_rate, weight_decay=weight_decay) # Adam, like always, with the chosen parameters from Optuna

    # Initialize Weights & Biases - the values in the config are the properties of each trial.
    wandb.init(project="deep_van_gogh",
               config={
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "patience": patience,
        "batch_size": batch_size,
        "epochs": epochs,
        "architecture": "AlexNet",
        "dataset": "Post_Impressionism"
    },
    name=f"trial_{trial.number}") # The name that will be saved in the W&B platform

    # Train the model and get the best validation loss
    best_val_loss = train_model_with_hyperparams(alexnet, train_loader, val_loader, optimizer, criterion, epochs=epochs, patience=patience, trial=trial, device=device)

    # Finish the Weights & Biases run
    wandb.finish()

    # Return best validation loss as the objective to minimize
    return best_val_loss

In [11]:
study = optuna.create_study(direction='minimize')
study.optimize(objective_alexnet, n_trials=3)

[I 2025-01-21 20:45:37,200] A new study created in memory with name: no-name-5ee93e63-640f-4393-921e-74ad86a050fc
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: amitr5 (amitr5-tel-aviv-university). Use `wandb login --relogin` to force relogin


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▄▃▂▁
Validation Accuracy,▁████
Validation Loss,█▂▂▁▂

0,1
Epoch,5.0
Train Accuracy,0.99316
Train Loss,0.02918
Validation Accuracy,0.96717
Validation Loss,0.10635


[I 2025-01-21 20:45:58,907] Trial 0 finished with value: 0.10180227587154968 and parameters: {'learning_rate': 3.5392983321532025e-05, 'weight_decay': 2.7288121399769764e-06, 'batch_size': 80}. Best is trial 0 with value: 0.10180227587154968.


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▆▆██
Train Loss,█▅▃▂▁
Validation Accuracy,█▁▁▁█
Validation Loss,▁▂▅█▄

0,1
Epoch,5.0
Train Accuracy,0.99932
Train Loss,0.00633
Validation Accuracy,0.97127
Validation Loss,0.11309


[I 2025-01-21 20:46:14,757] Trial 1 finished with value: 0.10745318776548225 and parameters: {'learning_rate': 1.9875104143045633e-05, 'weight_decay': 3.0896973704863426e-05, 'batch_size': 80}. Best is trial 0 with value: 0.10180227587154968.


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▇██
Train Loss,█▂▁▁▁
Validation Accuracy,▁█▂▁▅
Validation Loss,▁▁█▄▃

0,1
Epoch,5.0
Train Accuracy,0.98222
Train Loss,0.05821
Validation Accuracy,0.96033
Validation Loss,0.19284


[I 2025-01-21 20:46:33,206] Trial 2 finished with value: 0.11496180523742403 and parameters: {'learning_rate': 0.0007753536839104351, 'weight_decay': 6.0926614960472714e-06, 'batch_size': 48}. Best is trial 0 with value: 0.10180227587154968.


analysing results

# Style transfer function

In [4]:
from PIL.Image import Image


#define a function to load an image and pre-process it
def load_image(img_path, shape=(224, 224)):
    image = Image.open(img_path).convert('RGB')
    # Define transformation to resize, normalize, and convert to tensor
    in_transform = transforms.Compose([
        transforms.Resize(shape),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    # Apply transformations, remove alpha channel, and add batch dimension
    image = in_transform(image)[:3, :, :].unsqueeze(0)
    return image.to(device)
#define a function to extract features from the network
def get_features(image, model, layers):
    features = {}
    x = image
    for name, layer in model._modules.items():
        x = layer(x)
        if name in layers:
            features[layers[name]] = x
    return features

In [None]:
def style_transfer(model, style_img_path, content_img_path, content_weight=1, style_weight=1e3, num_steps=5001, model_name='vgg19_pretrained'):
    model = model.features #Gives us access to the layers of features

    layers = {
         '0': 'conv1_1', '5': 'conv2_1', '10': 'conv3_1', '19': 'conv4_1',
         '21': 'conv4_2'
    }

    style_weights = {
        'conv1_1': 0.5, 'conv2_1': 0.5, 'conv3_1': 0.5, 'conv4_1': 0.3
    }

    content_layer = 'conv4_2'
    # Prepare model for evaluation, disabling gradient computation
    model.to(device).eval()
    for param in model.parameters():
         param.requires_grad_(False)
        # Load and preprocess the content and style images
    content = load_image(content_img_path).to(device)
    style = load_image(style_img_path).to(device)
         # Extract features from content and style images
    content_features = get_features(content, model, layers)
    style_features = get_features(style, model, layers)
    target = content.clone().requires_grad_(True).to(device)