In [None]:
%run ../../supportvectors-common.ipynb

We walk through a vanilla AutoEncoder on the Trees dataset consisting of Oak and Weeping Willow trees.

First the imports

In [2]:

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import v2
from sklearn.mixture import GaussianMixture
from svlearn.config.configuration import ConfigurationMixin
from svlearn.trees.preprocess import Preprocessor
from svlearn.trees.tree_dataset import TreeDataset
from svlearn.auto_encoders.auto_encoder_multi_channel_util import (train_autoencoder,
                                                        visualize_reconstruction,
                                                        get_latent_representations
)
from svlearn.auto_encoders.vanilla_resnet_auto_encoder import AutoencoderResnet
from svlearn.auto_encoders.auto_encoder_util import (   sample_from_gmm,
                                                        generate_images_from_latent,
                                                        visualize_generated_images,
                                                        visualize_interpolations)

In [3]:
import gc
gc.collect()
torch.cuda.empty_cache()

Load the config and paths for the tree images

In [None]:
config = ConfigurationMixin().load_config()
data_dir = config['tree-classification']['data']

Preprocess the images as before

In [5]:
preprocessor = Preprocessor()
train_df, val_df, label_encoder = preprocessor.preprocess(data_dir)

Take them through the same tensor transforms as before

In [6]:
# 
train_transform = v2.Compose([
    v2.ToImage(), 
    v2.RandomResizedCrop(224 , scale = (0.5, 1)), # Randomly crop and resize to 224x224
    v2.RandomHorizontalFlip(p=0.5),       # Randomly flip the image horizontally with a 50% chance
    v2.ColorJitter(brightness=0.4 , contrast=0.4, saturation=0.4), # randomly change the brightness , contrast and saturation of images
    v2.ToDtype(torch.float32, scale=True), # ensure te tensor is of float datatype
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # normalize tensor 
    
])

test_transform = v2.Compose([
    v2.ToImage(), 
    v2.Resize(size=(224 , 224)),  # resize all images to a standard size suitable for the cnn model
    v2.ToDtype(torch.float32, scale=True), # ensure te tensor is of float datatype
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # normalize tensor 
])


Create the dataset and dataloaders

In [7]:
train_dataset = TreeDataset(train_df, transform=train_transform)
val_dataset = TreeDataset(val_df, transform=test_transform)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False) 

Identify device (either cuda or cpu)

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoencoderResnet().to(device)

In [None]:
print(model)

Run training on the vanilla autoencoder

In [None]:
# Train autoencoder
train_autoencoder(model, train_loader, val_loader, num_epochs=50, device=device)

Visualize the reconstruction (top row being original images and bottom being reconstructed)

In [None]:
# Create random shuffles of the val loader to visualize different samples each time.
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=True) 
# Call this to monitor reconstruction
visualize_reconstruction(model, val_loader, device=device)

Generate images from hidden vectors using the decoder of the autoencoder

In [None]:

# Collect latent representations from training data
latent_data = get_latent_representations(model, train_loader, device=device)

# Fit a Gaussian Mixture Model with 2 components
gmm = GaussianMixture(n_components=2)
gmm.fit(latent_data)

# Sample from the GMM
latent_samples = sample_from_gmm(gmm, num_samples=10)

# Generate images from the latent samples
generated_images = generate_images_from_latent(model, latent_samples, device=device)

# Visualize the generated images
visualize_generated_images(generated_images, is_color=True)


Visualize interpolations between the reconstruction of two images

In [None]:

# Create random shuffle of train dataset to pick random 2 images every time.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
first_train_batch, _ = next(iter(train_loader))
# Visualize interpolations
# Assuming image1 and image2 are samples from your dataset (PIL images already transformed to tensor)
image1 = first_train_batch[0]  # First image
image2 = first_train_batch[1]  # Second image

# Visualize interpolation between the two images
visualize_interpolations(model, image1, image2, num_steps=10, is_color=True, device=device)