# Import Required Libraries
Import PyTorch, torchvision, FASTGAN, EfficientNetV2, ShuffleNetV2, and utility libraries

In [None]:
# Import PyTorch and torchvision
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# Import FASTGAN
from fastgan import FastGAN

# Import EfficientNetV2 and ShuffleNetV2
from efficientnet_pytorch import EfficientNet
from torchvision.models import shufflenet_v2_x1_0

# Import utility libraries
import pandas as pd
import numpy as np
import os
from PIL import Image

# Load and Prepare HAM10000 Dataset
Load the HAM10000 dataset, create data loaders, and organize images by class

In [None]:
# Dataset Class
class HAM10000Dataset(Dataset):
    def __init__(self, csv_file, img_dirs, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dirs = img_dirs
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx]['image_id'] + '.jpg'
        for img_dir in self.img_dirs:
            img_path = os.path.join(img_dir, img_name)
            if os.path.exists(img_path):
                image = Image.open(img_path).convert('RGB')
                if self.transform:
                    image = self.transform(image)
                label = self.data.iloc[idx]['dx']
                return image, label
        raise FileNotFoundError(f"Image {img_name} not found in directories {self.img_dirs}")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
csv_file = 'HAM10000_metadata.csv'
img_dirs = ['HAM10000_images_part_1', 'HAM10000_images_part_2']
dataset = HAM10000Dataset(csv_file, img_dirs, transform=transform)

# Create data loaders
batch_size = 32
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Organize images by class
class_images = {}
for img, label in dataset:
    if label not in class_images:
        class_images[label] = []
    class_images[label].append(img)

# Print the number of images per class
for label, images in class_images.items():
    print(f"Class {label}: {len(images)} images")

# Setup FASTGAN Configuration
Configure FASTGAN parameters, set up the generator and discriminator models

In [None]:
# Setup FASTGAN Configuration

# Define FASTGAN parameters
latent_dim = 128
image_size = 224
channels = 3

# Initialize the generator and discriminator models
generator = FastGAN.Generator(latent_dim, image_size, channels)
discriminator = FastGAN.Discriminator(image_size, channels)

# Print the generator and discriminator models
print(generator)
print(discriminator)

# Define the optimizer for both generator and discriminator
lr = 0.0002
beta1 = 0.5
beta2 = 0.999

optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(beta1, beta2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, beta2))

# Print the optimizers
print(optimizer_G)
print(optimizer_D)

# Generate Synthetic Images
Generate 1000 synthetic images per class using FASTGAN, save intermediate results

In [None]:
# Generate Synthetic Images

# Function to generate synthetic images
def generate_synthetic_images(generator, num_images, latent_dim, device):
    generator.eval()
    noise = torch.randn(num_images, latent_dim, 1, 1, device=device)
    with torch.no_grad():
        synthetic_images = generator(noise)
    return synthetic_images

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator.to(device)

# Generate 1000 synthetic images per class
num_synthetic_images_per_class = 1000
synthetic_images = {}

for label in class_images.keys():
    synthetic_images[label] = generate_synthetic_images(generator, num_synthetic_images_per_class, latent_dim, device)

# Save synthetic images to disk
output_dir = '/kaggle/working/synthetic_images'
os.makedirs(output_dir, exist_ok=True)

for label, images in synthetic_images.items():
    label_dir = os.path.join(output_dir, label)
    os.makedirs(label_dir, exist_ok=True)
    for i, img in enumerate(images):
        img = (img * 0.5 + 0.5) * 255  # Denormalize the image
        img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
        img_pil = Image.fromarray(img)
        img_pil.save(os.path.join(label_dir, f"{label}_{i}.png"))

print("Synthetic images generated and saved successfully.")

# Load Pre-trained Models
Initialize and load pre-trained EfficientNetV2 and ShuffleNetV2 models

In [None]:
# Load Pre-trained Models

# Initialize and load pre-trained EfficientNetV2 model
efficientnet_model = EfficientNet.from_pretrained('efficientnet-v2-s')
efficientnet_model.eval()  # Set the model to evaluation mode

# Initialize and load pre-trained ShuffleNetV2 model
shufflenet_model = shufflenet_v2_x1_0(pretrained=True)
shufflenet_model.eval()  # Set the model to evaluation mode

# Move models to the appropriate device (GPU if available)
efficientnet_model.to(device)
shufflenet_model.to(device)

# Print the models to verify they are loaded correctly
print(efficientnet_model)
print(shufflenet_model)

# Classify Images
Run inference on synthetic images using both classification models

In [None]:
# Classify Images

# Function to classify images using a given model
def classify_images(model, images, device):
    model.eval()
    images = images.to(device)
    with torch.no_grad():
        outputs = model(images)
    _, preds = torch.max(outputs, 1)
    return preds

# Create a DataLoader for synthetic images
synthetic_dataset = []
for label, images in synthetic_images.items():
    for img in images:
        synthetic_dataset.append((img, label))

synthetic_loader = DataLoader(synthetic_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Classify synthetic images using both models
correctly_classified_images = []

for images, labels in synthetic_loader:
    images = images.to(device)
    labels = labels.to(device)
    
    # Classify with EfficientNetV2
    efficientnet_preds = classify_images(efficientnet_model, images, device)
    
    # Classify with ShuffleNetV2
    shufflenet_preds = classify_images(shufflenet_model, images, device)
    
    # Filter images that both models classify correctly
    for i in range(len(images)):
        if efficientnet_preds[i] == labels[i] and shufflenet_preds[i] == labels[i]:
            correctly_classified_images.append((images[i], labels[i]))

# Save filtered synthetic images to disk
filtered_output_dir = '/kaggle/working/filtered_synthetic_images'
os.makedirs(filtered_output_dir, exist_ok=True)

for i, (img, label) in enumerate(correctly_classified_images):
    img = (img * 0.5 + 0.5) * 255  # Denormalize the image
    img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
    img_pil = Image.fromarray(img)
    img_pil.save(os.path.join(filtered_output_dir, f"{label}_{i}.png"))

print("Filtered synthetic images saved successfully.")

# Filter Images
Filter and keep only the synthetic images that both models classify correctly

In [None]:
# Filter Images

# Function to classify images using a given model
def classify_images(model, images, device):
    model.eval()
    images = images.to(device)
    with torch.no_grad():
        outputs = model(images)
    _, preds = torch.max(outputs, 1)
    return preds

# Create a DataLoader for synthetic images
synthetic_dataset = []
for label, images in synthetic_images.items():
    for img in images:
        synthetic_dataset.append((img, label))

synthetic_loader = DataLoader(synthetic_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Classify synthetic images using both models
correctly_classified_images = []

for images, labels in synthetic_loader:
    images = images.to(device)
    labels = labels.to(device)
    
    # Classify with EfficientNetV2
    efficientnet_preds = classify_images(efficientnet_model, images, device)
    
    # Classify with ShuffleNetV2
    shufflenet_preds = classify_images(shufflenet_model, images, device)
    
    # Filter images that both models classify correctly
    for i in range(len(images)):
        if efficientnet_preds[i] == labels[i] and shufflenet_preds[i] == labels[i]:
            correctly_classified_images.append((images[i], labels[i]))

# Save filtered synthetic images to disk
filtered_output_dir = '/kaggle/working/filtered_synthetic_images'
os.makedirs(filtered_output_dir, exist_ok=True)

for i, (img, label) in enumerate(correctly_classified_images):
    img = (img * 0.5 + 0.5) * 255  # Denormalize the image
    img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
    img_pil = Image.fromarray(img)
    img_pil.save(os.path.join(filtered_output_dir, f"{label}_{i}.png"))

print("Filtered synthetic images saved successfully.")

# Save Filtered Data
Save the filtered synthetic images, training images, and test images to specified directory

In [None]:
# Save Filtered Data

# Save filtered synthetic images to disk
filtered_output_dir = '/kaggle/working/filtered_synthetic_images'
os.makedirs(filtered_output_dir, exist_ok=True)

for i, (img, label) in enumerate(correctly_classified_images):
    img = (img * 0.5 + 0.5) * 255  # Denormalize the image
    img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
    img_pil = Image.fromarray(img)
    img_pil.save(os.path.join(filtered_output_dir, f"{label}_{i}.png"))

print("Filtered synthetic images saved successfully.")

# Save training images to disk
train_output_dir = '/kaggle/working/train_images'
os.makedirs(train_output_dir, exist_ok=True)

for i, (img, label) in enumerate(dataset):
    img = (img * 0.5 + 0.5) * 255  # Denormalize the image
    img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
    img_pil = Image.fromarray(img)
    img_pil.save(os.path.join(train_output_dir, f"{label}_{i}.png"))

print("Training images saved successfully.")

# Save test images to disk
test_output_dir = '/kaggle/working/test_images'
os.makedirs(test_output_dir, exist_ok=True)

# Assuming test_dataset is defined and loaded similarly to dataset
for i, (img, label) in enumerate(test_dataset):
    img = (img * 0.5 + 0.5) * 255  # Denormalize the image
    img = img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
    img_pil = Image.fromarray(img)
    img_pil.save(os.path.join(test_output_dir, f"{label}_{i}.png"))

print("Test images saved successfully.")