In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
jessicali9530_celeba_dataset_path = kagglehub.dataset_download('jessicali9530/celeba-dataset')

print('Data source import complete.')


Member 1 ID: 2020205177
Could not find another member as I finished the project really early and quickly. I am sorry for that.

In [None]:
import numpy as np
import pandas as pd

In [None]:
df_attr = pd.read_csv('/kaggle/input/celeba-dataset/list_attr_celeba.csv')
df_attr.info()

In [None]:
df_eval = pd.read_csv('/kaggle/input/celeba-dataset/list_eval_partition.csv')
df_eval.describe()

In [None]:
#cloning STYLEGAN2 ADA repository
!git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git

#change the current working directory to the cloned repository
import os
os.chdir('stylegan2-ada-pytorch')

In [None]:
import requests

#define the pretrained model URL and the local filename
ffhq_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl'
pkl_file_name = 'ffhq.pkl'

#download the file
print(f"Downloading {pkl_file_name}...")
response = requests.get(ffhq_url, stream=True)

with open(pkl_file_name, 'wb') as f:
    f.write(response.content)

print("Download complete.")

In [None]:
import torch
import pickle
from PIL import Image

#loading G_ema. This is the moving average generator, instead of a generator that
#is stopped somewhere along the training steps
print("Loading model...")
with open(pkl_file_name, 'rb') as f:
    G = pickle.load(f)['G_ema'].cuda()  # Move to GPU

In [None]:
#displaying 10 random images, generated by random sampling vectors from z-space
for i in range(10):
    i+=1
    #generating a random Z-space vector
    #the tensor shape is important
    seed = 123
    z = torch.randn([1, G.z_dim], device='cuda')

    #generating the image
    #G() is a shortcut for G.synthesis(G.mapping(z, c))
    #so we can just feed G with a z-vector and it will give us an image

    img_tensor = G(z, None)

    #to use PIL, we need to move the image to cpu
    #print(img_tensor[0].shape)
    img_numpy = img_tensor.cpu().numpy()[0]
    #for debugging
    #print(img_numpy.shape)
    #print("TRANSPOSE:")
    #print(img_numpy.transpose(1,2,0).shape)

    #the image that is generated by the model has pixel values of range -1 to 1.
    #we want the range 0,255 as normal.
    #also the tensor that we got is structured like C x H x W.
    #we want H x W x C. so we transpose.

    img_numpy = (img_numpy.transpose(1, 2, 0) * 127.5 + 128).clip(0, 255).astype(np.uint8)

    #creating the usable image

    img = Image.fromarray(img_numpy, 'RGB')
    img.save('generated_test_image.png')

    print("Image generated successfully! (generated_test_image.png)")
    from IPython.display import display
    display(img)

In [None]:
#creating the target column (I chose smiling attribute)
df_attr['Smiling'] = (df_attr['Smiling'] > 0).copy().astype(int)

In [None]:
for col in df_attr.columns:
    if col == 'image_id' or col == 'Smiling':
        continue
    else:
        #copying so that df_attr stays the same, and notebook's integration is intact.
        df_attr = df_attr.copy().drop(f'{col}', axis = 1)

In [None]:
y = df_attr
y

In [None]:
df_eval

In [None]:
#merging the eval df with y(target attribute) on image id.
df_merged = pd.merge(y, df_eval, on = 'image_id')

In [None]:
df_merged

In [None]:
#the list_eval_partition.csv file in CelebA dataset has the format that partition
#column can take values 0,1 or 2 and these correspond to train, validation, and test
#splits respectively. Implementing that, using my merged_df:
df_train = df_merged[df_merged['partition'] == 0].reset_index(drop = True)
df_val = df_merged[df_merged['partition'] == 1].reset_index(drop = True)
df_test = df_merged[df_merged['partition'] == 2].reset_index(drop = True)

In [None]:
print(f"Training Samples: {len(df_train)}")
print(f"Validation Samples: {len(df_val)}")
print(f"Validation Samples: {len(df_test)}")

In [None]:
#here I will be creating my custom pytorch dataset.
import torch
from torch.utils.data import Dataset
from torchvision import transforms

class CelebA_Dataset(Dataset):
    def __init__(self, dataframe, img_dir, transform = None):
        self.img_dir = img_dir
        self.dataframe = dataframe

        if transform is None:
            self.transform = transforms.Compose([
                #standard transforms, resizing is for ResNet18.
                transforms.Resize((224,224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        else:
            self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self,idx):
        row = self.dataframe.iloc[idx]
        img_id = row['image_id']
        label = row['Smiling']

        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path)

        if self.transform:
            image_tensor = self.transform(image)

        return image_tensor, torch.tensor(label, dtype = torch.float32)

In [None]:
#loading the data in batches
from torch.utils.data import DataLoader
IMG_DIR = '/kaggle/input/celeba-dataset/img_align_celeba/img_align_celeba'

train_dataset = CelebA_Dataset(df_train, IMG_DIR)
val_dataset = CelebA_Dataset(df_val, IMG_DIR)

BATCH_SIZE = 64
NUM_WORKERS = 4

train_loader = DataLoader(
    train_dataset,
    batch_size = BATCH_SIZE,
    num_workers = NUM_WORKERS,
    shuffle = True
)

val_loader = DataLoader(
    val_dataset,
    batch_size = BATCH_SIZE,
    num_workers = NUM_WORKERS,
    shuffle = False
)
print("DataLoaders created. Ready for model setup.")

In [None]:
#defining my model here.
import torch.nn as nn
from torchvision import models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Device: {device}')

#loading the pretrained ResNet18 (on Imagenet)
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
#getting the num of features in the input of last layer of ResNet18's last layer.
num_ftrs = model.fc.in_features
#modifying the last layer to have only 1 output, this will be the probability.
model.fc = nn.Linear(num_ftrs, 1)
model = model.to(device)
print(f"ResNet18 model loaded and modified. Final layer output size: {model.fc.out_features}")


In [None]:
import torch.optim as optim
import torch.nn.functional as F

#declaring that the loss function, the optimizer is Adam, and the training will continue for
#5 epochs.
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
NUM_EPOCHS = 5

In [None]:
#here is the training function. since the model is already in Pytorch, it is
#very high level and usually simple to implement
def train_model(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs.squeeze(), labels)

        loss.backward()

        optimizer.step()

        total_loss += loss.item() * inputs.size(0) #batch

    return total_loss / len(loader.dataset)

In [None]:
#evaluate function. will be using this on val_loader
def evaluate_model(model, loader, device):
    model.eval()
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)

            probabilities = torch.sigmoid(outputs).squeeze()
            predictions = torch.round(probabilities)

            correct_predictions += (predictions == labels).sum().item()
            total_samples += labels.size(0)
    accuracy = correct_predictions / total_samples
    return accuracy

In [None]:
best_accuracy = 0.0

#main loop for traububg abd validation.
for epoch in range(NUM_EPOCHS):
    train_loss = train_model(model, train_loader, criterion, optimizer, device)
    val_accuracy = evaluate_model(model, val_loader, device)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
    print(f"Training Loss: {train_loss:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Save the model with the best validation accuracy
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        #saving the weights because kaggle was disconnecting frequently and
        #training took some time. so I did not want to lose a high-acc model if I had one
        torch.save(model.state_dict(), 'best_classifier.pth')


print(f"Final Validation Accuracy for 'Smiling' Classifier: {best_accuracy:.4f}")

In [None]:
#helper for getting ResNet-input ready images
def get_resnet_transforms():
    #this defines the standard ResNet preprocessing (Resize, ToTensor, Normalize)
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

#instantiating
resnet_transforms = get_resnet_transforms()


#generating 5000 random images for data collection.

NUM_SAMPLES = 5000


z_vectors = []
w_vectors = []
labels = []

model.eval()
G.eval() #models should be in evaluation mode

with torch.no_grad():
    for i in range(NUM_SAMPLES):
        #generating z-space and w-space vectors, similar to before
        z = torch.randn([1, G.z_dim], device=device)
        w = G.mapping(z, None)

        #generating image (StyleGAN output: [1, C, H, W], range [-1, 1])
        img_tensor_gan_out = G.synthesis(w, noise_mode='const')


        #rescale and convert to NumPy (same logic from Part 1 visualization)
        #moving the image to cpu,numpy then transposing and rescaling like before.
        img_np = (img_tensor_gan_out.cpu().numpy()[0].transpose(1, 2, 0) * 127.5 + 128).clip(0, 255).astype(np.uint8)

        #pil image:
        image_pil = Image.fromarray(img_np, 'RGB')

        #applying transformations to get the image ready to go in ResNet18.
        classifier_input = resnet_transforms(image_pil).unsqueeze(0).to(device)
        classifier_output = model(classifier_input)

        #getting probabilities and predictions
        probability = torch.sigmoid(classifier_output).squeeze()
        prediction = torch.round(probability).item()

        #saving them
        z_vectors.append(z.cpu().numpy().flatten())

        # Ensure W is 512-dim (using mean if it was broadcast, which is good practice)
        if w.dim() == 3 and w.shape[1] > 1:
            w_avg = w.mean(dim=1).cpu().numpy().flatten()
        else:
            w_avg = w.cpu().numpy().flatten()

        w_vectors.append(w_avg)
        labels.append(prediction)

#converting to numpy arrays
Z = np.array(z_vectors)
W = np.array(w_vectors)
Y = np.array(labels)

In [None]:
#separating positive and negative indices
pos_indices = (Y == 1)
neg_indices = (Y == 0)

#calculating the direction vector (n) for z-space, using mean difference
mu_z_pos = Z[pos_indices].mean(axis = 0)
mu_z_neg = Z[neg_indices].mean(axis = 0)
n_Z_Mean = mu_z_pos - mu_z_neg
#print(f'Z-space mean-diff:\n {n_Z_Mean}')

#calculating the direction vector (n) for w-space, using mean difference
mu_w_pos = W[pos_indices].mean(axis = 0)
mu_w_neg = W[neg_indices].mean(axis = 0)
n_W_Mean = mu_w_pos - mu_w_neg
#print(f'\nW-space mean-diff:\n {n_W_Mean}')

print("Mean Difference directions calculated.")

In [None]:
from sklearn.linear_model import LogisticRegression

#setting c_value small to prevent overfitting
C_VALUE = 0.01

#fitting logreg to Z-space and have a discriminating boundary line.
#the vector perpendicular to this line will be giving the smiling attribute's direction.
logreg_Z = LogisticRegression(solver='liblinear', random_state=0, C=C_VALUE)
logreg_Z.fit(Z, Y)
#the coefficient array (coef_) holds the normal vector to the boundary
n_Z_LogReg = logreg_Z.coef_[0]

#the same thing but for W-space.
logreg_W = LogisticRegression(solver='liblinear', random_state=0, C=C_VALUE)
logreg_W.fit(W, Y)
n_W_LogReg = logreg_W.coef_[0]

print("Logistic Regression directions calculated.")

In [None]:
#helper function to manipulate an image constrained to the given parameters.
def generate_manipulated_image(v_old, n, alpha, is_w_space, G, device):

    #this is the given formula for manipulation.
    v_new_np = v_old + alpha * n

    # Convert to PyTorch tensor and move to device
    # The unsqueeze(0) makes it [1, 512]
    v_new = torch.from_numpy(v_new_np).float().unsqueeze(0).to(device)

    # 2. Prepare the W-space tensor (w_new)
    if not is_w_space:
        #if starting in Z-space, map to W-space. Output w_new is [1, 18, 512] (3D)
        w_new = G.mapping(v_new, None)
    else:
        #copy-paste from Gemini here. That is because the layers got me confused.
        # If starting in W-space (v_new is [1, 512]), it must be repeated for the synthesis network.
        # G.num_ws is the number of layers (typically 18).

        # We need to reshape v_new from [1, 512] to [1, Num_Layers, 512]
        num_layers = G.num_ws if hasattr(G, 'num_ws') else G.mapping.num_ws

        # Use .repeat() to broadcast the single W vector across all layers
        w_new = v_new.unsqueeze(1).repeat(1, num_layers, 1) # Shape: [1, 18, 512] (3D)

    #generating the image using w_new
    img_tensor = G.synthesis(w_new, noise_mode='const')

    #moving image to cpu,numpy, transpose and rescale as before. maybe should have
    #used another helper for this.
    img_np = (img_tensor.cpu().numpy()[0].transpose(1, 2, 0) * 127.5 + 128).clip(0, 255).astype(np.uint8)
    image = Image.fromarray(img_np, 'RGB')

    return image

In [None]:
import torch
import numpy as np

#as instructed, choosing a seed here.
SEED = 2020205172
torch.manual_seed(SEED)
np.random.seed(SEED)

#generating initial latent vectors (z and w)
with torch.no_grad():
    #z_old: The starting vector in Z-space
    z_old_tensor = torch.randn([1, G.z_dim], device=device)

    #w_old: The corresponding vector in W-space (output of the mapping network)
    w_old_tensor = G.mapping(z_old_tensor, None)

#convert to 512-D NumPy arrays for the manipulation formula
z_old_np = z_old_tensor.cpu().numpy().flatten()
#use the average W vector if it's layered, otherwise flatten the single vector
if w_old_tensor.dim() == 3 and w_old_tensor.shape[1] > 1:
    w_old_np = w_old_tensor.mean(dim=1).cpu().numpy().flatten()
else:
    w_old_np = w_old_tensor.cpu().numpy().flatten()

#generating and displaying the "original" image for this seed.
original_image = generate_manipulated_image(z_old_np, np.zeros_like(z_old_np), 0, False, G, device)
original_image.save('0_Original_Image.png')
print("Original Image Generated (Original_Image.png)")
display(original_image)

In [None]:
#normalization helper
def normalize_vector(n):
    return n / np.linalg.norm(n)

n_Z_LogReg = normalize_vector(n_Z_LogReg)
n_W_LogReg = normalize_vector(n_W_LogReg)
n_Z_Mean = normalize_vector(n_Z_Mean)
n_W_Mean = normalize_vector(n_W_Mean)


In [None]:
ALPHA = 3

print("Experiment 1: Z-space vs W-space")

#manipulation using z-space logreg direction
z_image = generate_manipulated_image(
    v_old = z_old_np,
    n = n_Z_LogReg,
    alpha = ALPHA,
    is_w_space = False,
    G = G,
    device = device
)
z_image.save('1_Z_LogReg_Manipulated.png')
print(f"Z-Space Image (LogReg, alpha={ALPHA}) saved.")

#manipulation using w-space logreg direction
w_image = generate_manipulated_image(
    v_old = w_old_np,
    n = n_W_LogReg,
    alpha = ALPHA,
    is_w_space = True,
    G = G,
    device = device
)
w_image.save('2_W_LogReg_Manipulated.png')
print(f"W-Space Image (LogReg, alpha={ALPHA}) saved.")


In [None]:
print(display(original_image))
print(display(z_image))
print(display(w_image))

As we can see, w-space transformation preserved the attributes other than smiling better than z-space transformation. Also, smiling is more apparent in w-space transform.

One important note is, in the first place I tried to do this comparison without normalizing the vectors n_Z_LogReg and n_W_LogReg. The results were the other way around, w-space transformations would look like mud. That is probably because the w-vector is larger than z-vector numerically, so even with a small alpha the photo gets rubbish.

In [None]:
ALPHA = 3

print("\n--- Experiment 2: MEAN VS SVM(LOGREG) ---")

#manipulation using w-space mean-diff direction
w_image1 = generate_manipulated_image(
    v_old = w_old_np,
    n = n_W_Mean,
    alpha = ALPHA,
    is_w_space = True,
    G = G,
    device = device
)
w_image1.save('1_W_Mean_Manipulated.png')
print(f"Z-Space Image (LogReg, alpha={ALPHA}) saved.")

#manipulation using w-space logreg direction
w_image2 = generate_manipulated_image(
    v_old = w_old_np,
    n = n_W_LogReg,
    alpha = ALPHA,
    is_w_space = True,
    G = G,
    device = device
)
w_image2.save('22_W_LogReg_Manipulated.png')
print(f"W-Space Image (LogReg, alpha={ALPHA}) saved.")

In [None]:
print(display(original_image))
print(display(w_image1))
print(display(w_image2))

In [None]:
ALPHA_VALUES = [-8.0, -5.0, -2.0, 0.0, 2.0, 5.0, 8]


print("Experiment 3: Alpha Scaling")

for alpha in ALPHA_VALUES:
    #manipulation using w-space logreg direction
    image = generate_manipulated_image(
        v_old = w_old_np,
        n = n_W_LogReg, # Use the normalized LogReg vector
        alpha = alpha,
        is_w_space = True,
        G = G,
        device = device
    )
    filename = f'4_W_LogReg_Alpha_{alpha:.1f}.png'
    image.save(filename)
    print(f"Image for alpha={alpha:.1f} saved as {filename}")
    display(image)