# __Project 3a: Advanced GAN Crystal Ball__

In [1]:
import scipy.io
import pandas as pd
import numpy as np
import datetime
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchgan.trainer import Trainer
import torchgan.models as models
print(dir(models))
if torch.cuda.is_available():
    print(torch.cuda.get_device_name())
else:
    print('cpu')

['ACGANDiscriminator', 'ACGANGenerator', 'AutoEncodingDiscriminator', 'AutoEncodingGenerator', 'ConditionalGANDiscriminator', 'ConditionalGANGenerator', 'DCGANDiscriminator', 'DCGANGenerator', 'Discriminator', 'Generator', 'InfoGANDiscriminator', 'InfoGANGenerator', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'acgan', 'autoencoding', 'conditional', 'dcgan', 'infogan', 'model']
NVIDIA GeForce RTX 4060 Laptop GPU


### __Download__ and __Extract__ the CelebA Dataset

In [2]:
#!curl -L https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/wiki_crop.tar -o wiki_crop.tar

In [3]:
#!tar -xvzf wiki_crop.tar -C .

### __Load the Celeb-WIKI Dataset__

In [4]:
mat_file_path = './wiki_crop/wiki.mat'
mat = scipy.io.loadmat(mat_file_path)

mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'wiki'])

In [5]:
mat.items()

dict_items([('__header__', b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sat Jan 16 16:25:20 2016'), ('__version__', '1.0'), ('__globals__', []), ('wiki', array([[(array([[723671, 703186, 711677, ..., 720620, 723893, 713846]]), array([[2009, 1964, 2008, ..., 2013, 2011, 2008]], dtype=uint16), array([[array(['17/10000217_1981-05-05_2009.jpg'], dtype='<U31'),
                array(['48/10000548_1925-04-04_1964.jpg'], dtype='<U31'),
                array(['12/100012_1948-07-03_2008.jpg'], dtype='<U29'), ...,
                array(['09/9998109_1972-12-27_2013.jpg'], dtype='<U30'),
                array(['00/9999400_1981-12-13_2011.jpg'], dtype='<U30'),
                array(['80/999980_1954-06-11_2008.jpg'], dtype='<U29')]],
              dtype=object), array([[1., 1., 1., ..., 1., 1., 0.]]), array([[array(['Sami Jauhojärvi'], dtype='<U15'),
                array(['Dettmar Cramer'], dtype='<U14'),
                array(['Marc Okrand'], dtype='<U11'), ...,
                array(['Mi

In [6]:
# Extract from .mat file
wiki = mat['wiki']

full_path = wiki['full_path'][0][0][0]
gender = wiki['gender'][0][0][0]
dob = wiki['dob'][0][0][0]
photo_taken = wiki['photo_taken'][0][0][0]
face_location = wiki['face_location'][0][0][0]
name = wiki['name'][0][0][0]
face_score = wiki['face_score'][0][0][0]
second_face_score = wiki['second_face_score'][0][0][0]

df = pd.DataFrame({
    'full_path': full_path,
    'gender': gender.flatten(),
    'dob': dob.flatten(),
    'photo_taken': photo_taken.flatten(),
    'face_location': face_location.tolist(),
    'name': name.flatten(),
    'face_score': face_score.flatten(),
    'second_face_score': second_face_score.flatten()
})

### __Data Cleaning__

In [7]:
# -inf for face score means that the confidence of a face being detected in the image is virtually NONEXISTENT!
num_neg_inf = (df['face_score'] == -np.inf).sum()
print(num_neg_inf)
df_filtered = df[df['face_score'] != -np.inf]
df_filtered

18016


Unnamed: 0,full_path,gender,dob,photo_taken,face_location,name,face_score,second_face_score
0,[17/10000217_1981-05-05_2009.jpg],1.0,723671,2009,"[[111.29109473290997, 111.29109473290997, 252....",[Sami Jauhojärvi],4.300962,
1,[48/10000548_1925-04-04_1964.jpg],1.0,703186,1964,"[[252.48330229530742, 126.68165114765371, 354....",[Dettmar Cramer],2.645639,1.949248
2,[12/100012_1948-07-03_2008.jpg],1.0,711677,2008,"[[113.52, 169.83999999999997, 366.08, 422.4]]",[Marc Okrand],4.329329,
4,[16/10002116_1971-05-31_2012.jpg],0.0,720044,2012,"[[171.61031405173117, 75.57451239763239, 266.7...",[Diana Damrau],3.408442,
5,[02/10002702_1960-11-09_2012.jpg],0.0,716189,2012,"[[274.76563240288175, 57.7700900839337, 376.88...",[Krista Tippett],4.748056,
...,...,...,...,...,...,...,...,...
62321,[38/9996938_1937-02-15_1968.jpg],1.0,707521,1968,"[[83.1122019227873, 83.1122019227873, 329.2488...",[Coen Moulijn],3.868307,
62322,[46/9996946_1943-11-01_1968.jpg],1.0,709971,1968,"[[123.45351931209424, 196.99763089935078, 343....",[Theo van Duivenbode],3.735573,
62323,[49/9996949_1937-04-17_1963.jpg],1.0,707582,1963,"[[128.92773553879837, 128.92773553879837, 320....",[Guus Haak],4.029268,
62325,[09/9998109_1972-12-27_2013.jpg],1.0,720620,2013,"[[169.88839785556354, 74.31669471981023, 235.2...",[Michael Wiesinger],3.494303,


In [8]:
num_nans = df_filtered['second_face_score'].isna().sum()
df_filtered = df_filtered[df_filtered['second_face_score'].isna()]
df_filtered

Unnamed: 0,full_path,gender,dob,photo_taken,face_location,name,face_score,second_face_score
0,[17/10000217_1981-05-05_2009.jpg],1.0,723671,2009,"[[111.29109473290997, 111.29109473290997, 252....",[Sami Jauhojärvi],4.300962,
2,[12/100012_1948-07-03_2008.jpg],1.0,711677,2008,"[[113.52, 169.83999999999997, 366.08, 422.4]]",[Marc Okrand],4.329329,
4,[16/10002116_1971-05-31_2012.jpg],0.0,720044,2012,"[[171.61031405173117, 75.57451239763239, 266.7...",[Diana Damrau],3.408442,
5,[02/10002702_1960-11-09_2012.jpg],0.0,716189,2012,"[[274.76563240288175, 57.7700900839337, 376.88...",[Krista Tippett],4.748056,
6,[41/10003541_1937-09-27_1971.jpg],1.0,707745,1971,"[[79.35580188693945, 26.65993396231315, 197.60...",[Bernie Whitebear],4.184828,
...,...,...,...,...,...,...,...,...
62321,[38/9996938_1937-02-15_1968.jpg],1.0,707521,1968,"[[83.1122019227873, 83.1122019227873, 329.2488...",[Coen Moulijn],3.868307,
62322,[46/9996946_1943-11-01_1968.jpg],1.0,709971,1968,"[[123.45351931209424, 196.99763089935078, 343....",[Theo van Duivenbode],3.735573,
62323,[49/9996949_1937-04-17_1963.jpg],1.0,707582,1963,"[[128.92773553879837, 128.92773553879837, 320....",[Guus Haak],4.029268,
62325,[09/9998109_1972-12-27_2013.jpg],1.0,720620,2013,"[[169.88839785556354, 74.31669471981023, 235.2...",[Michael Wiesinger],3.494303,


In [9]:
num_nans = df_filtered['gender'].isna().sum()
df_filtered = df_filtered[df_filtered['gender'].notna()]
num_nans = df_filtered['gender'].isna().sum()
print(num_nans)
df_filtered

0


Unnamed: 0,full_path,gender,dob,photo_taken,face_location,name,face_score,second_face_score
0,[17/10000217_1981-05-05_2009.jpg],1.0,723671,2009,"[[111.29109473290997, 111.29109473290997, 252....",[Sami Jauhojärvi],4.300962,
2,[12/100012_1948-07-03_2008.jpg],1.0,711677,2008,"[[113.52, 169.83999999999997, 366.08, 422.4]]",[Marc Okrand],4.329329,
4,[16/10002116_1971-05-31_2012.jpg],0.0,720044,2012,"[[171.61031405173117, 75.57451239763239, 266.7...",[Diana Damrau],3.408442,
5,[02/10002702_1960-11-09_2012.jpg],0.0,716189,2012,"[[274.76563240288175, 57.7700900839337, 376.88...",[Krista Tippett],4.748056,
6,[41/10003541_1937-09-27_1971.jpg],1.0,707745,1971,"[[79.35580188693945, 26.65993396231315, 197.60...",[Bernie Whitebear],4.184828,
...,...,...,...,...,...,...,...,...
62321,[38/9996938_1937-02-15_1968.jpg],1.0,707521,1968,"[[83.1122019227873, 83.1122019227873, 329.2488...",[Coen Moulijn],3.868307,
62322,[46/9996946_1943-11-01_1968.jpg],1.0,709971,1968,"[[123.45351931209424, 196.99763089935078, 343....",[Theo van Duivenbode],3.735573,
62323,[49/9996949_1937-04-17_1963.jpg],1.0,707582,1963,"[[128.92773553879837, 128.92773553879837, 320....",[Guus Haak],4.029268,
62325,[09/9998109_1972-12-27_2013.jpg],1.0,720620,2013,"[[169.88839785556354, 74.31669471981023, 235.2...",[Michael Wiesinger],3.494303,


In [10]:
df_filtered = df_filtered.drop(columns=['second_face_score'])
df_filtered

Unnamed: 0,full_path,gender,dob,photo_taken,face_location,name,face_score
0,[17/10000217_1981-05-05_2009.jpg],1.0,723671,2009,"[[111.29109473290997, 111.29109473290997, 252....",[Sami Jauhojärvi],4.300962
2,[12/100012_1948-07-03_2008.jpg],1.0,711677,2008,"[[113.52, 169.83999999999997, 366.08, 422.4]]",[Marc Okrand],4.329329
4,[16/10002116_1971-05-31_2012.jpg],0.0,720044,2012,"[[171.61031405173117, 75.57451239763239, 266.7...",[Diana Damrau],3.408442
5,[02/10002702_1960-11-09_2012.jpg],0.0,716189,2012,"[[274.76563240288175, 57.7700900839337, 376.88...",[Krista Tippett],4.748056
6,[41/10003541_1937-09-27_1971.jpg],1.0,707745,1971,"[[79.35580188693945, 26.65993396231315, 197.60...",[Bernie Whitebear],4.184828
...,...,...,...,...,...,...,...
62321,[38/9996938_1937-02-15_1968.jpg],1.0,707521,1968,"[[83.1122019227873, 83.1122019227873, 329.2488...",[Coen Moulijn],3.868307
62322,[46/9996946_1943-11-01_1968.jpg],1.0,709971,1968,"[[123.45351931209424, 196.99763089935078, 343....",[Theo van Duivenbode],3.735573
62323,[49/9996949_1937-04-17_1963.jpg],1.0,707582,1963,"[[128.92773553879837, 128.92773553879837, 320....",[Guus Haak],4.029268
62325,[09/9998109_1972-12-27_2013.jpg],1.0,720620,2013,"[[169.88839785556354, 74.31669471981023, 235.2...",[Michael Wiesinger],3.494303


In [11]:
def matlab_serial_to_year(serial_date):
    # MATLAB's serial dates start from 0000-01-01, Python starts from 0001-01-01
    origin = datetime.datetime(1, 1, 1)  # Using year 1
    delta = datetime.timedelta(days=int(serial_date) - 366)  # Subtract 366 to adjust MATLAB's start year (0)
    return (origin + delta).year

# Assuming your cleaned DataFrame is named 'df'
def get_age_bucket(age):
    if age <= 18:
        return 0
    elif 19 <= age <= 29:
        return 1
    elif 30 <= age <= 39:
        return 2
    elif 40 <= age <= 49:
        return 3
    elif 50 <= age <= 59:
        return 4
    else:
        return 5

final_df = df_filtered.copy()
final_df['dob'] = df_filtered['dob'].apply(matlab_serial_to_year)

# Add another feature
final_df['age'] = final_df['photo_taken'] - final_df['dob']
final_df = final_df.drop(columns=['dob', 'photo_taken'])

# Assign age bucket to each row
final_df['age_bucket'] = final_df['age'].apply(get_age_bucket)

# Add './wiki_crop/' prefix to the full_path column to get the correct paths
final_df['full_path'] = final_df['full_path'].apply(lambda x: f"./wiki_crop/{x[0]}")

# Convert gender to int
final_df['gender'] = final_df['gender'].astype(int)

# Check the updated DataFrame
final_df

Unnamed: 0,full_path,gender,face_location,name,face_score,age,age_bucket
0,./wiki_crop/17/10000217_1981-05-05_2009.jpg,1,"[[111.29109473290997, 111.29109473290997, 252....",[Sami Jauhojärvi],4.300962,28,1
2,./wiki_crop/12/100012_1948-07-03_2008.jpg,1,"[[113.52, 169.83999999999997, 366.08, 422.4]]",[Marc Okrand],4.329329,60,5
4,./wiki_crop/16/10002116_1971-05-31_2012.jpg,0,"[[171.61031405173117, 75.57451239763239, 266.7...",[Diana Damrau],3.408442,41,3
5,./wiki_crop/02/10002702_1960-11-09_2012.jpg,0,"[[274.76563240288175, 57.7700900839337, 376.88...",[Krista Tippett],4.748056,52,4
6,./wiki_crop/41/10003541_1937-09-27_1971.jpg,1,"[[79.35580188693945, 26.65993396231315, 197.60...",[Bernie Whitebear],4.184828,34,2
...,...,...,...,...,...,...,...
62321,./wiki_crop/38/9996938_1937-02-15_1968.jpg,1,"[[83.1122019227873, 83.1122019227873, 329.2488...",[Coen Moulijn],3.868307,31,2
62322,./wiki_crop/46/9996946_1943-11-01_1968.jpg,1,"[[123.45351931209424, 196.99763089935078, 343....",[Theo van Duivenbode],3.735573,25,1
62323,./wiki_crop/49/9996949_1937-04-17_1963.jpg,1,"[[128.92773553879837, 128.92773553879837, 320....",[Guus Haak],4.029268,26,1
62325,./wiki_crop/09/9998109_1972-12-27_2013.jpg,1,"[[169.88839785556354, 74.31669471981023, 235.2...",[Michael Wiesinger],3.494303,41,3


In [12]:
final_df['face_location'] = final_df['face_location'].apply(lambda x: x[0].tolist() if isinstance(x, np.ndarray) and x.ndim == 2 else x)
final_df['name'] = final_df['name'].apply(lambda x: x[0] if isinstance(x, np.ndarray) and x.ndim == 1 else x)

print(final_df.dtypes)

final_df

full_path         object
gender             int32
face_location     object
name              object
face_score       float64
age                int64
age_bucket         int64
dtype: object


Unnamed: 0,full_path,gender,face_location,name,face_score,age,age_bucket
0,./wiki_crop/17/10000217_1981-05-05_2009.jpg,1,"[111.29109473290997, 111.29109473290997, 252.6...",Sami Jauhojärvi,4.300962,28,1
2,./wiki_crop/12/100012_1948-07-03_2008.jpg,1,"[113.52, 169.83999999999997, 366.08, 422.4]",Marc Okrand,4.329329,60,5
4,./wiki_crop/16/10002116_1971-05-31_2012.jpg,0,"[171.61031405173117, 75.57451239763239, 266.76...",Diana Damrau,3.408442,41,3
5,./wiki_crop/02/10002702_1960-11-09_2012.jpg,0,"[274.76563240288175, 57.7700900839337, 376.886...",Krista Tippett,4.748056,52,4
6,./wiki_crop/41/10003541_1937-09-27_1971.jpg,1,"[79.35580188693945, 26.65993396231315, 197.609...",Bernie Whitebear,4.184828,34,2
...,...,...,...,...,...,...,...
62321,./wiki_crop/38/9996938_1937-02-15_1968.jpg,1,"[83.1122019227873, 83.1122019227873, 329.24880...",Coen Moulijn,3.868307,31,2
62322,./wiki_crop/46/9996946_1943-11-01_1968.jpg,1,"[123.45351931209424, 196.99763089935078, 343.2...",Theo van Duivenbode,3.735573,25,1
62323,./wiki_crop/49/9996949_1937-04-17_1963.jpg,1,"[128.92773553879837, 128.92773553879837, 320.1...",Guus Haak,4.029268,26,1
62325,./wiki_crop/09/9998109_1972-12-27_2013.jpg,1,"[169.88839785556354, 74.31669471981023, 235.25...",Michael Wiesinger,3.494303,41,3


### __Data Preprocessing__

In [13]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize cropped faces to 64x64
    transforms.ToTensor(),  # Convert PIL Image to Tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

In [14]:
class FaceAgingDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing image paths, face locations, and labels.
            transform (callable, optional): A function/transform to apply to the images.
        """
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Get the row corresponding to the index
        row = self.dataframe.iloc[idx]

        # Load the image
        img = Image.open(row['full_path']).convert("RGB")

        # Crop the face using face_location
        x_min, y_min, x_max, y_max = row['face_location']
        img = img.crop((x_min, y_min, x_max, y_max))

        # Apply transformations
        if self.transform:
            img = self.transform(img)

        # Get the age bucket (1-based, e.g., 1 for 0-18, 2 for 19-29, etc.)
        age_bucket = row['age_bucket'] - 1  # Convert to zero-indexed for PyTorch

        return img, age_bucket

In [15]:
# Create dataset and dataloader
dataset = FaceAgingDataset(final_df, transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=0, drop_last=True)

### __Modeling__

In [16]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1, bias=False),  # Output: [batch_size, 64, 32, 32]
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64, 64 * 2, 4, 2, 1, bias=False),  # Output: [batch_size, 128, 16, 16]
            nn.BatchNorm2d(64 * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False),  # Output: [batch_size, 256, 8, 8]
            nn.BatchNorm2d(64 * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False),  # Output: [batch_size, 512, 4, 4]
            nn.BatchNorm2d(64 * 8),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64 * 8, 1, 4, 1, 0, bias=False),  # Output: [batch_size, 1, 1, 1]
            nn.Sigmoid()  # Scalar probability
        )

    def forward(self, input):
        return self.model(input).view(-1)  # Flatten to [batch_size]

In [17]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.ConvTranspose2d(100, 64 * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.ReLU(True),

            nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.ReLU(True),

            nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.ReLU(True),

            nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),

            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.model(input)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize models
discriminator = Discriminator().to(device)
generator = Generator().to(device)

# Initialize optimizers
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))

# Loss function
loss_fn = nn.BCELoss()

In [19]:
def discriminator_train_step(real_data, fake_data):
    d_optimizer.zero_grad()

    # Train on real data
    prediction_real = discriminator(real_data)
    real_labels = torch.ones(real_data.size(0)).to(device)
    error_real = loss_fn(prediction_real, real_labels)
    error_real.backward()

    # Train on fake data
    prediction_fake = discriminator(fake_data.detach())  # Detach to avoid updating generator
    fake_labels = torch.zeros(fake_data.size(0)).to(device)
    error_fake = loss_fn(prediction_fake, fake_labels)
    error_fake.backward()

    d_optimizer.step()

    return error_real + error_fake

In [20]:
def generator_train_step(fake_data):
    g_optimizer.zero_grad()

    # Generate predictions
    prediction = discriminator(fake_data)
    real_labels = torch.ones(fake_data.size(0)).to(device)
    error = loss_fn(prediction, real_labels)  # Want discriminator to think fake is real
    error.backward()

    g_optimizer.step()

    return error

In [21]:
from torchvision.utils import save_image

epochs = 50
batch_size = 64
noise_dim = 100

# Dataset and DataLoader
dataloader = DataLoader(FaceAgingDataset(final_df, transform=transform), batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(epochs):
    for real_images, _ in dataloader:
        real_images = real_images.to(device)

        # Generate fake images
        noise = torch.randn(real_images.size(0), noise_dim, 1, 1).to(device)
        fake_images = generator(noise)

        # Train Discriminator
        d_loss = discriminator_train_step(real_images, fake_images)

        # Train Generator
        noise = torch.randn(real_images.size(0), noise_dim, 1, 1).to(device)
        fake_images = generator(noise)
        g_loss = generator_train_step(fake_images)

    print(f"Epoch [{epoch + 1}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")

    # Save sample images
    if (epoch + 1) % 10 == 0:
        save_image(fake_images.data[:25], f"output/epoch_{epoch + 1}.png", nrow=5, normalize=True)

Epoch [1/50] | D Loss: 0.7131 | G Loss: 2.0342
Epoch [2/50] | D Loss: 1.5533 | G Loss: 4.4452
Epoch [3/50] | D Loss: 0.9053 | G Loss: 5.7816
Epoch [4/50] | D Loss: 0.3141 | G Loss: 2.4069
Epoch [5/50] | D Loss: 0.1773 | G Loss: 3.6675
Epoch [6/50] | D Loss: 0.9434 | G Loss: 0.5895
Epoch [7/50] | D Loss: 0.9103 | G Loss: 1.7546
Epoch [8/50] | D Loss: 0.2490 | G Loss: 3.4542
Epoch [9/50] | D Loss: 0.6180 | G Loss: 5.4548
Epoch [10/50] | D Loss: 0.4244 | G Loss: 3.1929


FileNotFoundError: [Errno 2] No such file or directory: 'output/epoch_10.png'