In [None]:
# VAE Emotion Image Generator Script
import os
import torch
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
import torch.nn.functional as F
from torchvision.utils import save_image
from tqdm import tqdm


In [None]:

# Configuration
latent_dims = 100
batch_size = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_root = r"C:\Users\Victor Cardenas\Documents\msc\semestre-4\idi-4\fer-2025\data\processed\fer_autoencoders"
output_root = r"C:\Users\Victor Cardenas\Documents\msc\semestre-4\idi-4\fer-2025\data\processed\fer_generated"
num_images_to_generate = 3000


In [None]:

# Transforms
transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(256),
    T.ToTensor()
])

# Encoder
class Encoder(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 8, 3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(8, 16, 3, stride=2, padding=1)
        self.batch2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, stride=2, padding=0)
        self.linear1 = nn.Linear(31 * 31 * 32, 1024)
        self.linear2 = nn.Linear(1024, latent_dims)
        self.linear3 = nn.Linear(1024, latent_dims)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.batch2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.linear1(x))
        mu = self.linear2(x)
        std = torch.exp(self.linear3(x))
        eps = torch.randn_like(std)
        z = mu + std * eps
        return mu, std, z

# Decoder
class Decoder(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.decoder_lin = nn.Sequential(
            nn.Linear(latent_dims, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 31 * 31 * 32),
            nn.ReLU(True)
        )
        self.unflatten = nn.Unflatten(dim=1, unflattened_size=(32, 31, 31))
        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 3, stride=2, output_padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.decoder_lin(x)
        x = self.unflatten(x)
        x = self.decoder_conv(x)
        return x

# VAE
class VAE(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.encoder = Encoder(latent_dims)
        self.decoder = Decoder(latent_dims)

    def forward(self, x):
        mu, std, z = self.encoder(x)
        return mu, std, self.decoder(z)

# Training loop
def train_vae_for_emotion(emotion):
    print(f"Training VAE for: {emotion}")
    emotion_path = os.path.join(data_root, emotion)
    dataset = torchvision.datasets.ImageFolder(root=os.path.dirname(emotion_path), transform=transform)
    class_index = dataset.class_to_idx[emotion]
    dataset.samples = [s for s in dataset.samples if s[1] == class_index]
    
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    vae = VAE(latent_dims).to(device)
    optimizer = torch.optim.Adam(vae.parameters(), lr=1e-4, weight_decay=1e-5)

    for epoch in range(1, 11):
        vae.train()
        epoch_loss = 0.0
        for imgs, _ in loader:
            imgs = imgs.to(device)
            mu, std, out = vae(imgs)
            rec_loss = ((imgs - out) ** 2).sum()
            kl = ((std ** 2) / 2 + (mu ** 2) / 2 - torch.log(std) - 0.5).sum()
            loss = rec_loss + kl
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch}, Loss: {epoch_loss:.2f}")

    os.makedirs("models", exist_ok=True)
    torch.save(vae.state_dict(), f"models/vae_{emotion}.pth")
    return vae

# Generate images
def generate_images(vae, emotion):
    print(f"Generating images for: {emotion}")
    vae.eval()
    os.makedirs(os.path.join(output_root, emotion), exist_ok=True)
    with torch.no_grad():
        for i in tqdm(range(0, num_images_to_generate, batch_size)):
            z = torch.randn(batch_size, latent_dims).to(device)
            generated = vae.decoder(z).cpu()
            for j in range(generated.size(0)):
                save_image(generated[j], os.path.join(output_root, emotion, f"{i+j:04d}.png"))

# Emotions list
emotions = ["Anger", "Happy", "Sad", "Surprise", "Neutral", "Disgust", "Fear"]


In [None]:

# Main loop
for emotion in emotions:
    vae_model = train_vae_for_emotion(emotion)
    generate_images(vae_model, emotion)
    print(f"Done: {emotion}\n")


Trial with _Fear_

In [4]:
# VAE Single Emotion Generator (Test with 'Fear')
import os
import torch
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
import torch.nn.functional as F
from torchvision.utils import save_image
from tqdm import tqdm

# Configuration
latent_dims = 100
batch_size = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_root = r"C:\Users\Victor Cardenas\Documents\msc\semestre-4\idi-4\fer-2025\data\processed\fer_autoencoders"
output_root = r"C:\Users\Victor Cardenas\Documents\msc\semestre-4\idi-4\fer-2025\data\processed\fer_generated"
num_images_to_generate = 2000

target_emotion = "Fear"  # Change this for testing other emotions

# Transforms
transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(256),
    T.ToTensor()
])

# Encoder
class Encoder(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 8, 3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(8, 16, 3, stride=2, padding=1)
        self.batch2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, stride=2, padding=0)
        self.linear1 = nn.Linear(31 * 31 * 32, 1024)
        self.linear2 = nn.Linear(1024, latent_dims)
        self.linear3 = nn.Linear(1024, latent_dims)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.batch2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.linear1(x))
        mu = self.linear2(x)
        std = torch.exp(self.linear3(x))
        eps = torch.randn_like(std)
        z = mu + std * eps
        return mu, std, z

# Decoder
class Decoder(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.decoder_lin = nn.Sequential(
            nn.Linear(latent_dims, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 31 * 31 * 32),
            nn.ReLU(True)
        )
        self.unflatten = nn.Unflatten(dim=1, unflattened_size=(32, 31, 31))
        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 3, stride=2, output_padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.decoder_lin(x)
        x = self.unflatten(x)
        x = self.decoder_conv(x)
        return x

# VAE
class VAE(nn.Module):
    def __init__(self, latent_dims=100):
        super().__init__()
        self.encoder = Encoder(latent_dims)
        self.decoder = Decoder(latent_dims)

    def forward(self, x):
        mu, std, z = self.encoder(x)
        return mu, std, self.decoder(z)

# Training loop
def train_vae_for_emotion(emotion):
    print(f"Training VAE for: {emotion}")
    emotion_path = os.path.join(data_root, emotion)
    dataset = torchvision.datasets.ImageFolder(root=os.path.dirname(emotion_path), transform=transform)
    class_index = dataset.class_to_idx[emotion]
    dataset.samples = [s for s in dataset.samples if s[1] == class_index]
    
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    vae = VAE(latent_dims).to(device)
    optimizer = torch.optim.Adam(vae.parameters(), lr=1e-4, weight_decay=1e-5)

    for epoch in range(1, 80):
        vae.train()
        epoch_loss = 0.0
        for imgs, _ in loader:
            imgs = imgs.to(device)
            mu, std, out = vae(imgs)
            rec_loss = ((imgs - out) ** 2).sum()
            kl = ((std ** 2) / 2 + (mu ** 2) / 2 - torch.log(std) - 0.5).sum()
            loss = rec_loss + kl
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch}, Loss: {epoch_loss:.2f}")

    os.makedirs("models", exist_ok=True)
    torch.save(vae.state_dict(), f"models/vae_{emotion}.pth")
    return vae

# Generate images
def generate_images(vae, emotion):
    print(f"Generating images for: {emotion}")
    vae.eval()
    os.makedirs(os.path.join(output_root, emotion), exist_ok=True)
    with torch.no_grad():
        for i in tqdm(range(0, num_images_to_generate, batch_size)):
            z = torch.randn(batch_size, latent_dims).to(device)
            generated = vae.decoder(z).cpu()
            for j in range(generated.size(0)):
                save_image(generated[j], os.path.join(output_root, emotion, f"{i+j:04d}.png"))



In [5]:
# Run for a single emotion (test)
vae_model = train_vae_for_emotion(target_emotion)


Training VAE for: Fear
Epoch 1, Loss: 43909978.69
Epoch 2, Loss: 39517661.53
Epoch 3, Loss: 35949067.97
Epoch 4, Loss: 32612090.97
Epoch 5, Loss: 29368879.94
Epoch 6, Loss: 26547473.91
Epoch 7, Loss: 24085570.69
Epoch 8, Loss: 21923393.64
Epoch 9, Loss: 20104470.81
Epoch 10, Loss: 18478773.23
Epoch 11, Loss: 17037677.09
Epoch 12, Loss: 15712652.55
Epoch 13, Loss: 14573882.23
Epoch 14, Loss: 13576467.98
Epoch 15, Loss: 12638787.72
Epoch 16, Loss: 11849483.41
Epoch 17, Loss: 11170244.58
Epoch 18, Loss: 10516727.88
Epoch 19, Loss: 9842630.26
Epoch 20, Loss: 9370505.88
Epoch 21, Loss: 8869245.07
Epoch 22, Loss: 8455140.12
Epoch 23, Loss: 8040388.59
Epoch 24, Loss: 7614258.03
Epoch 25, Loss: 7499335.70
Epoch 26, Loss: 7052718.71
Epoch 27, Loss: 6794452.37
Epoch 28, Loss: 6526904.88
Epoch 29, Loss: 6274749.57
Epoch 30, Loss: 6146979.41
Epoch 31, Loss: 5907245.72
Epoch 32, Loss: 5760553.76
Epoch 33, Loss: 5615540.45
Epoch 34, Loss: 5488532.94
Epoch 35, Loss: 5271616.02
Epoch 36, Loss: 5174046

In [6]:
generate_images(vae_model, target_emotion)
print(f"Done generating images for: {target_emotion}")


Generating images for: Fear


100%|██████████| 63/63 [00:39<00:00,  1.61it/s]

Done generating images for: Fear



