In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import legacy  # provided in the official StyleGAN2 repo
import dnnlib  # provided in the official StyleGAN2 repo

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------------------
# Load the pretrained StyleGAN2 generator
# -------------------------------
# This URL points to NVIDIA's pretrained FFHQ model.
network_pkl = "https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-f.pkl"
with dnnlib.util.open_url(network_pkl) as f:
    # G_ema is the exponential moving average version of the generator
    G = legacy.load_network_pkl(f)['G_ema'].to(device)

# -------------------------------
# Define a simple latent generator
# -------------------------------
class LatentGenerator(nn.Module):
    def __init__(self, input_dim=512, output_dim=512):
        super(LatentGenerator, self).__init__()
        # A simple linear layer (feel free to add non-linearities or additional layers)
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, z):
        # Map z to w
        w = self.fc(z)
        return w

latent_gen = LatentGenerator().to(device)
optimizer = optim.Adam(latent_gen.parameters(), lr=1e-4)

# -------------------------------
# Define an attribute loss function
# -------------------------------
# For demonstration we use MSE loss comparing the generated image with a target image.
# In practice, this might be replaced by a perceptual or attribute-specific loss.
def attribute_loss(generated_image, target_image):
    loss_fn = nn.MSELoss()
    loss = loss_fn(generated_image, target_image)
    return loss

# -------------------------------
# Prepare a dummy target image
# -------------------------------
# Replace this with an image (or features) that capture the desired attributes.
# Ensure the dimensions match the generator's output resolution.
target_image = torch.randn(1, 3, G.img_resolution, G.img_resolution, device=device)

# -------------------------------
# Training Loop
# -------------------------------
num_steps = 1000
for step in range(num_steps):
    optimizer.zero_grad()
    
    # 1. Sample a random latent vector z (shape: [batch_size, 512])
    z = torch.randn(1, 512, device=device)
    
    # 2. Map z to w using the latent generator
    w = latent_gen(z)
    
    # 3. StyleGAN2 expects one w vector per synthesis layer.
    #    Typically, for FFHQ models, this number is 18. We can access it via G.mapping.num_ws.
    num_ws = G.mapping.num_ws  # e.g., 18
    w = w.unsqueeze(1).repeat(1, num_ws, 1)  # shape becomes [1, num_ws, 512]
    
    # 4. Generate image using pretrained StyleGAN2 generator.
    #    'noise_mode' can be set to 'const' for deterministic output.
    generated_image = G.synthesis(w, noise_mode='const')
    
    # 5. Compute the loss comparing the generated image with the target image
    loss = attribute_loss(generated_image, target_image)
    
    # 6. Backpropagate the loss and update the latent generator
    loss.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(f"Step {step}, Loss: {loss.item()}")


Setting up PyTorch plugin "bias_act_plugin"... 

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... 

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


Done.
Step 0, Loss: 1.6445707082748413
Step 100, Loss: 1.1666024923324585
Step 200, Loss: 1.3411588668823242
Step 300, Loss: 1.1835885047912598
Step 400, Loss: 1.278956651687622
Step 500, Loss: 1.1310561895370483
Step 600, Loss: 1.5273606777191162


KeyboardInterrupt: 

In [1]:
import torch
import legacy  # from the official StyleGAN2 repo
import dnnlib  # from the official StyleGAN2 repo
import PIL.Image

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------------------
# Load the pretrained StyleGAN2 generator
# -------------------------------
# This URL points to the pretrained FFHQ model.
network_pkl = "https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-f.pkl"
with dnnlib.util.open_url(network_pkl) as f:
    G = legacy.load_network_pkl(f)['G_ema'].to(device)  # G_ema is the generator used at inference

# -------------------------------
# Generate an image from a latent vector
# -------------------------------
# 1. Define or sample your latent vector. Here we sample a random z.
# Generate latent vector z
z = torch.randn(1, 512, device=device)
w = G.mapping(z, None)  # This might return either [1, 512] or [1, num_ws, 512]

# If w is 2D (i.e. shape: [batch, 512]), then replicate it for each synthesis layer.
if w.ndim == 2:
    num_ws = G.mapping.num_ws  # e.g., 18 for FFHQ
    w = w.unsqueeze(1).repeat(1, num_ws, 1)
else:
    # If it's already 3D, you can optionally verify that the number of style vectors matches.
    if w.shape[1] != G.mapping.num_ws:
        w = w[:, :G.mapping.num_ws, :]


# # 2. Map z to the intermediate latent space w.
# #    Note: The mapping network usually takes a second argument (labels); here it's set to None.
# w = G.mapping(z, None)

# # 3. StyleGAN2's synthesis network expects a separate w for each synthesis layer.
# #    For example, the FFHQ model uses 18 layers. We replicate w accordingly.
# num_ws = G.mapping.num_ws  # typically 18 for FFHQ
# w = w.unsqueeze(1).repeat(1, num_ws, 1)

# 4. Generate the image using the synthesis network.
#    Setting noise_mode='const' makes the output deterministic.
generated_image = G.synthesis(w, noise_mode='const')

# 5. Post-process the image:
#    The generator outputs images in the range [-1, 1]. Rescale them to [0, 255].
img = (generated_image.clamp(-1, 1) + 1) / 2 * 255
img = img.permute(0, 2, 3, 1).to(torch.uint8)[0].cpu().numpy()

import matplotlib.pyplot as plt
plt.imshow(img)
plt.show()



KeyboardInterrupt: 

In [None]:
print(img.shape)

(1024, 1024, 3)


In [None]:
import os
import torch
from torchvision.utils import save_image
import dnnlib
import legacy

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained StyleGAN2 generator for cars
network_pkl = "http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-e.pkl"
with dnnlib.util.open_url(network_pkl) as f:
    G = legacy.load_network_pkl(f)['G_ema'].to(device)

# Create output directory if it doesn't exist
output_dir = '../styleGanImages'
os.makedirs(output_dir, exist_ok=True)

num_images = 1000

print(G.z_dim)

# Generate and save 1000 random images
for i in range(num_images):
    # Sample a random latent vector z of shape [1, G.z_dim]
    z = torch.randn([1, G.z_dim]).to(device)
    
    # Obtain intermediate latent code w using the mapping network.
    # The second argument is conditioning; here it is None.
    w = G.mapping(z, None)
    
    # Generate the image using the synthesis network.
    # noise_mode='const' ensures deterministic noise for reproducibility.
    img = G.synthesis(w, noise_mode='const')
    
    # The generated image is usually in the range [-1, 1]; convert it to [0, 1]
    img = (img.clamp(-1, 1) + 1) / 2
    
    # Save the image; adjust the filename formatting as needed.
    filename = os.path.join(output_dir, f'image_{i:04d}.png')
    save_image(img, filename)

print(f"Generated {num_images} images in {output_dir}")


512


KeyboardInterrupt: 