# Ethical AI Image Generator using Variational Autoencoder

Connect to Google Drive

* Run first every time if storing images in Google Drive

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


Imports

In [3]:
from PIL import Image
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt

Configuration Setup
* Change file path to run locally

In [1]:
CONFIG = {
    'drive_folder': '/content/drive/MyDrive/Ethical_AI_Image/Images/Training',  # Change this path
    'image_size': 256,  # Resolution of images
    'latent_dim': 256,  # Size of latent space
    'batch_size': 16,
    'epochs': 50,
    'learning_rate': 1e-4,
    'beta': 1.0,  # Weight for KL divergence loss
}

Dataset Class for Dataset

In [4]:
class ArtDataset(Dataset):
    """Dataset for loading donated art images"""
    def __init__(self, folder_path, image_size=256):
        self.folder_path = folder_path
        self.image_size = image_size

        # Get all image files
        self.image_paths = []
        valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}

        for ext in valid_extensions:
            self.image_paths.extend(Path(folder_path).glob(f'**/*{ext}'))
            self.image_paths.extend(Path(folder_path).glob(f'**/*{ext.upper()}'))

        self.image_paths = [str(p) for p in self.image_paths]
        print(f"Found {len(self.image_paths)} images in {folder_path}")

        # Image transformations
        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        return image

Define Class for Variational Autoencoder

In [26]:
class VAE(nn.Module):
    """Variational Autoencoder for image generation"""
    def __init__(self, latent_dim=256):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim

        # Encoder
        self.encoder = nn.Sequential(
          nn.Conv2d(3, 32, 4, stride=2, padding=1),    # 256 -> 128
          nn.ReLU(),
          nn.Conv2d(32, 32, 4, stride=2, padding=1),   # 128 -> 64
          nn.ReLU(),
          nn.Conv2d(32, 64, 4, stride=2, padding=1),   # 64 -> 32
          nn.ReLU(),
          nn.Conv2d(64, 128, 4, stride=2, padding=1),  # 32 -> 16
          nn.ReLU(),
          nn.Conv2d(128, 256, 4, stride=2, padding=1), # 16 -> 8
          nn.ReLU(),
          nn.Conv2d(256, 512, 4, stride=2, padding=1), # 8 -> 4
          nn.ReLU(),
        )

        # Latent space
        self.fc_mu = nn.Linear(512 * 4 * 4, latent_dim)
        self.fc_logvar = nn.Linear(512 * 4 * 4, latent_dim)

        # Decoder input
        self.decoder_input = nn.Linear(latent_dim, 512 * 4 * 4)

        # Decoder
        self.decoder = nn.Sequential(
          nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),  # 4 -> 8
          nn.ReLU(),
          nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),  # 8 -> 16
          nn.ReLU(),
          nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),   # 16 -> 32
          nn.ReLU(),
          nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1),    # 32 -> 64
          nn.ReLU(),
          nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1),    # 64 -> 128
          nn.ReLU(),
          nn.ConvTranspose2d(16, 3, 4, stride=2, padding=1),     # 128 -> 256
          nn.Tanh()
        )

    def encode(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        mu = self.fc_mu(x)
        logvar = self.fc_logvar(x)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        x = self.decoder_input(z)
        x = x.view(x.size(0), 512, 4, 4)
        x = self.decoder(x)
        return x

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z)
        return recon, mu, logvar

Loss Function

In [6]:
def vae_loss(recon_x, x, mu, logvar, beta=1.0):
    """VAE loss = reconstruction loss + KL divergence"""
    # Reconstruction loss (MSE)
    recon_loss = F.mse_loss(recon_x, x, reduction='sum')

    # KL divergence loss
    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return recon_loss + beta * kl_loss

Training Function

In [7]:
def train_vae(model, dataloader, optimizer, device, beta=1.0):
    """Train VAE for one epoch"""
    model.train()
    total_loss = 0

    for batch_idx, data in enumerate(dataloader):
        data = data.to(device)
        optimizer.zero_grad()

        recon_batch, mu, logvar = model(data)
        loss = vae_loss(recon_batch, data, mu, logvar, beta)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if batch_idx % 10 == 0:
            print(f'Batch {batch_idx}/{len(dataloader)}, Loss: {loss.item()/len(data):.4f}')

    return total_loss / len(dataloader.dataset)

Function to Generate Output

In [8]:
def generate_artwork(model, device, num_samples=1):
    """Generate new artwork from the trained VAE"""
    model.eval()
    with torch.no_grad():
        # Sample from standard normal distribution
        z = torch.randn(num_samples, model.latent_dim).to(device)
        samples = model.decode(z)
        samples = samples.cpu()
    return samples

Function to Save Output

In [9]:
def save_image(tensor, filepath):
    """Save a tensor as an image"""
    # Denormalize from [-1, 1] to [0, 1]
    tensor = (tensor + 1) / 2
    tensor = torch.clamp(tensor, 0, 1)

    # Convert to PIL Image
    img = transforms.ToPILImage()(tensor)
    img.save(filepath)
    print(f"Saved image to {filepath}")

Function to Visualize How it Works

In [10]:
def visualize_results(original, reconstructed, generated):
    """Visualize original, reconstructed, and generated images"""
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    # Denormalize images
    for img in [original, reconstructed, generated]:
        img.data = (img.data + 1) / 2
        img.data = torch.clamp(img.data, 0, 1)

    axes[0].imshow(original.permute(1, 2, 0).cpu())
    axes[0].set_title('Original')
    axes[0].axis('off')

    axes[1].imshow(reconstructed.permute(1, 2, 0).cpu())
    axes[1].set_title('Reconstructed')
    axes[1].axis('off')

    axes[2].imshow(generated.permute(1, 2, 0).cpu())
    axes[2].set_title('Generated')
    axes[2].axis('off')

    plt.tight_layout()
    plt.show()

Training Pipeline

In [11]:
def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load dataset
    print("\nLoading dataset...")
    dataset = ArtDataset(CONFIG['drive_folder'], CONFIG['image_size'])

    if len(dataset) == 0:
        print("ERROR: No images found! Please check your folder path.")
        print(f"Looking in: {CONFIG['drive_folder']}")
        return

    dataloader = DataLoader(dataset, batch_size=CONFIG['batch_size'],
                          shuffle=True, num_workers=2)

    # Initialize model
    print("\nInitializing VAE model...")
    model = VAE(latent_dim=CONFIG['latent_dim']).to(device)
    optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])

    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Training loop
    print("\nStarting training...")
    losses = []

    for epoch in range(CONFIG['epochs']):
        print(f"\n{'='*60}")
        print(f"Epoch {epoch+1}/{CONFIG['epochs']}")
        print('='*60)

        avg_loss = train_vae(model, dataloader, optimizer, device, CONFIG['beta'])
        losses.append(avg_loss)
        print(f"Average Loss: {avg_loss:.4f}")

        # Generate and save artwork every 10 epochs
        if (epoch + 1) % 10 == 0 or epoch == CONFIG['epochs'] - 1:
            print("\nGenerating new artwork...")
            generated = generate_artwork(model, device, num_samples=1)[0]

            # Save the generated artwork
            output_path = f'/content/drive/MyDrive/Ethical_AI_Image/Images/Output/Temp/generated_artwork_epoch_{epoch+1}.png'
            save_image(generated, output_path)

            # Visualize results
            sample_data = next(iter(dataloader))[0:1].to(device)
            with torch.no_grad():
                recon, _, _ = model(sample_data)
            visualize_results(sample_data[0], recon[0], generated)

    # Plot training loss
    plt.figure(figsize=(10, 5))
    plt.plot(losses)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('VAE Training Loss')
    plt.grid(True)
    plt.show()

    # Save final model
    model_path = '/content/drive/MyDrive/Ethical_AI_Image/Code/Model/vae_ethical_art_model.pth'
    torch.save(model.state_dict(), model_path)
    print(f"\nModel saved to {model_path}")

    # Generate final artwork
    print("\nGenerating final artwork...")
    final_artwork = generate_artwork(model, device, num_samples=1)[0]
    final_path = '/content/drive/MyDrive/Ethical_AI_Image/Images/Output/Final/final_generated_artwork.png'
    save_image(final_artwork, final_path)

    print("\n" + "="*60)
    print("Training complete")
    print("="*60)

Train Model

In [27]:
if __name__ == "__main__":
    main()

Output hidden; open in https://colab.research.google.com to view.