In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from transformers import AutoImageProcessor, AutoModel
from tqdm.notebook import tqdm
from PIL import Image
import numpy as np

In [2]:
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-small")
model = AutoModel.from_pretrained("facebook/dinov2-small")
model = model.to("mps")

In [5]:
target_img = Image.open("./grant-headshot.png")

In [6]:
def get_img_vects(imgs):
    inputs = processor(images=imgs, return_tensors="pt")
    inputs = {k: v.to("mps") for k, v in inputs.items()}
    outputs = model(**inputs)
    # vects = outputs.last_hidden_state.mean(axis=1).detach().cpu().numpy()
    vects = outputs.last_hidden_state[:, 1:, :].detach().cpu().numpy()
    # normalize
    # vects /= np.linalg.norm(vects, axis=1, keepdims=True)
    return vects

In [7]:
target_vect = get_img_vects(target_img)[0]

  return torch._C._nn.upsample_bicubic2d(input, output_size, align_corners, scale_factors)


In [10]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_dim, output_size):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 2, hidden_dim * 4),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 4, output_size),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

In [14]:
# Hyperparameters
input_size = 100  # Size of the noise vector
hidden_dim = 128
img_size = 32
output_size = img_size * img_size  # Assuming we're generating images of size 32x32
learning_rate = 0.0002
batch_size = 32
epochs = 500

# Initialize models
generator = Generator(input_size, hidden_dim, output_size)
# discriminator = Discriminator(output_size, hidden_dim)

# Loss and optimizer
# criterion = nn.BCELoss()
g_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)
# d_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)

def criterion(x):
    # Convert individual to image
    x = x.detach().cpu().numpy().astype(np.uint8).reshape(-1, img_size, img_size).astype(np.uint8)

    imgs = []
    for i in range(len(x)):
        img = Image.fromarray(x[i] * 255).convert("RGB")
        # img.save(f"gen_img_{i}.png")
        imgs.append(img)

    # Calculate similarity to target image (placeholder function)
    img_vects = get_img_vects(imgs)
    errs = ((img_vects - target_vect) ** 2).mean(axis=-1).mean(axis=-1)
    return torch.tensor(errs)


pbar = tqdm(range(epochs))
# Training loop
for epoch in pbar:
    # for i in range(0, len(train_data), batch_size):
        # Load a batch & transform to vectors
        # batch = train_data[i:i+batch_size]
        # batch = get_img_vects(batch)
        # Train generator
    g_optimizer.zero_grad()
    g_loss = criterion(generator(torch.randn(batch_size, input_size)))
    g_loss.backward()
    g_optimizer.step()

    # Train discriminator
    # d_optimizer.zero_grad()
    # d_loss = criterion(discriminator(batch))
    # d_loss.backward()
    # d_optimizer.step()

    # Show progress
    pbar.update(1)
    pbar.set_description(f"Epoch [{epoch}/{epochs}], g_loss: {g_loss.item():.4f}")


# Save the model
# torch.save(generator.state_dict(), "generator.ckpt")

  0%|          | 0/500 [00:00<?, ?it/s]

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
# Define the generator
class Generator(nn.Module):
    def __init__(self, input_size, hidden_dim, output_size):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 2, hidden_dim * 4),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 4, output_size),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

# Define the discriminator
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_dim):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_dim * 4),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 4, hidden_dim * 2),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# Hyperparameters
input_size = 100  # Size of the noise vector
hidden_dim = 128
output_size = 32 * 32  # Assuming we're generating images of size 32x32
learning_rate = 0.0002
batch_size = 64
epochs = 200

# Initialize models
generator = Generator(input_size, hidden_dim, output_size)
discriminator = Discriminator(output_size, hidden_dim)

# Loss and optimizer
criterion = nn.BCELoss()
g_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)
d_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    for batch in DataLoader:  # Assuming 'DataLoader' is defined and loaded with your image dataset
        # Train discriminator
        real_images = batch
        real_labels = torch.ones(batch_size, 1)
        fake_images = generator(torch.randn(batch_size, input_size))
        fake_labels = torch.zeros(batch_size, 1)
        
        d_optimizer.zero_grad()
        real_loss = criterion(discriminator(real_images), real_labels)
        fake_loss = criterion(discriminator(fake_images.detach()), fake_labels)
        d_loss = real_loss + fake_loss
        d_loss.backward()
        d_optimizer.step()

        # Train generator
        g_optimizer.zero_grad()
        fake_images = generator(torch.randn(batch_size, input_size))
        g_loss = criterion(discriminator(fake_images), real_labels)  # Trick discriminator
        g_loss.backward()
        g_optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs} | D Loss: {d_loss.item()} | G Loss: {g_loss.item()}")