# AutoEncoder Library Demonstration with Geological Datasets

This notebook demonstrates how to use the `autoencoder_lib` package for experimenting with different autoencoder architectures, focusing on geological layered datasets for investigating latent space representation capabilities.

In [None]:
# Import necessary libraries
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

## 1. Import Components from autoencoder_lib

In [None]:
# Import key components from our package
from autoencoder_lib.models import (
    SimpleLinearAutoencoder,
    DeeperLinearAutoencoder,
    ConvAutoencoder,
    DeeperConvAutoencoder,
    VariationalAutoencoder,
    DenoisingAutoencoder
)

from autoencoder_lib.data import (
    ShapeDataset,
    load_or_create_data_split
)

from autoencoder_lib.experiment import (
    AutoEncoderWrapper,
    ExperimentManager,
    MultiArchitectureManager
)

from autoencoder_lib.visualization import (
    visualize_reconstructions,
    compare_reconstructions,
    plot_with_labels,
    plot_training_loss_curves,
    enhanced_training_visualizations
)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## 2. Loading the Geologic Layer Datasets

Let's load the geological layer datasets that come with the project. We have two sets:
1. Consistent layers: Geologic patterns with consistent layer thickness
2. Variable layers: Geologic patterns with variable layer thickness

In [None]:
# Import ImageFolderDataset
from autoencoder_lib.data.datasets import ImageFolderDataset

# Load the geological layer datasets
consistent_layers_dataset = ImageFolderDataset(
    root_dir='layered_geologic_patterns_dataset/consistent_layers',
    img_size=64,
    grayscale=True
)

variable_layers_dataset = ImageFolderDataset(
    root_dir='layered_geologic_patterns_dataset/variable_layers',
    img_size=64,
    grayscale=True
)

# Combine datasets for some examples
from torch.utils.data import ConcatDataset
geo_combined_dataset = ConcatDataset([consistent_layers_dataset, variable_layers_dataset])

print(f"Consistent layers dataset: {len(consistent_layers_dataset)} images")
print(f"Variable layers dataset: {len(variable_layers_dataset)} images")
print(f"Combined dataset: {len(geo_combined_dataset)} images")

# Split datasets for training/validation/testing
from autoencoder_lib.data import load_or_create_data_split
from torch.utils.data import DataLoader

# Split consistent layers dataset
consistent_train, consistent_val, consistent_test = load_or_create_data_split(
    consistent_layers_dataset,
    test_ratio=0.2,
    val_ratio=0.1,
    split_file_path='consistent_layers_split.json',
    seed=42
)

# Split variable layers dataset
variable_train, variable_val, variable_test = load_or_create_data_split(
    variable_layers_dataset,
    test_ratio=0.2,
    val_ratio=0.1,
    split_file_path='variable_layers_split.json',
    seed=42
)

# Create data loaders for the consistent layers dataset (which we'll use primarily)
batch_size = 32
train_loader = DataLoader(consistent_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(consistent_val, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(consistent_test, batch_size=batch_size, shuffle=False)

In [None]:
# Visualize some examples from our datasets
plt.figure(figsize=(12, 4))
for i in range(5):
    # Consistent layers
    plt.subplot(2, 5, i+1)
    img, _ = consistent_layers_dataset[i]
    plt.imshow(img.squeeze(), cmap='gray')
    plt.title(f"Consistent {i+1}")
    plt.axis('off')
    
    # Variable layers
    plt.subplot(2, 5, i+6)
    img, _ = variable_layers_dataset[i]
    plt.imshow(img.squeeze(), cmap='gray')
    plt.title(f"Variable {i+1}")
    plt.axis('off')
    
plt.tight_layout()
plt.show()

## 3. Training a Simple Linear Autoencoder

Let's train a simple linear autoencoder on our shape dataset using the `AutoEncoderWrapper` class.

In [None]:
# Get input shape from dataset
input_shape = consistent_layers_dataset[0][0].shape
print(f"Input shape: {input_shape}")

# Create an AutoEncoderWrapper for a simple linear autoencoder
linear_ae_wrapper = AutoEncoderWrapper(
    model_class=SimpleLinearAutoencoder,
    model_name="GeoLinearAE",
    input_shape=input_shape,
    latent_dim=10,  # Using 10 dimensions in the latent space
    batch_size=batch_size,
    device=device,
    results_dir='results/geo_linear_ae'
)

# Setup data loaders
linear_ae_wrapper.train_loader = train_loader
linear_ae_wrapper.val_loader = val_loader
linear_ae_wrapper.test_loader = test_loader
linear_ae_wrapper.dataloader_setup = True

# Initialize the model
linear_ae_wrapper.setup_model()

In [None]:
# Train the model
model, metrics = linear_ae_wrapper.train(
    epochs=20,                   # Train for 20 epochs
    learning_rate=0.001,         # Learning rate for the optimizer
    early_stopping_patience=5,   # Stop if no improvement for 5 epochs
    verbose=True,                # Print training progress
    visualize_training=True      # Create visualizations of training metrics
)

In [None]:
# Evaluate on test set and visualize reconstructions
test_loss, test_metrics = linear_ae_wrapper.evaluate(
    data_loader=test_loader,
    visualize=True,
    num_samples=10
)

# Visualize latent space
linear_ae_wrapper.visualize_latent_space(
    data_loader=test_loader,
    method='tsne',
    max_samples=200,
    title="Geological Layers - Linear Autoencoder Latent Space",
    class_names=['Consistent Layers']  # Only one class for geological layers
)

## 4. Using the ExperimentManager for Multiple Experiments

The `ExperimentManager` class helps manage multiple experiments with different parameters.

In [None]:
# Create an ExperimentManager for exploring latent dimensions
exp_manager = ExperimentManager(
    model_class=SimpleLinearAutoencoder,
    results_dir='results/geo_latent_dim_exploration'
)

# Run experiments with different latent dimensions
latent_dims = [2, 5, 10, 20]
results_df = exp_manager.run_latent_dim_experiment(
    dataset=consistent_layers_dataset,
    dimensions=latent_dims,
    base_experiment_name="GeoLinearAE_LatentDim",
    epochs=15,
    batch_size=32,
    device=device
)

# Display results
print("Results from latent dimension exploration:")
print(results_df)

## 5. Comparing Different Autoencoder Architectures

The `MultiArchitectureManager` class helps compare different autoencoder architectures on the same dataset.

In [None]:
# Create a MultiArchitectureManager
multi_arch_manager = MultiArchitectureManager(
    results_dir='results/geo_architecture_comparison'
)

# Add different architectures to compare
multi_arch_manager.add_architecture(
    name="SimpleLinear",
    model_class=SimpleLinearAutoencoder
)

multi_arch_manager.add_architecture(
    name="DeeperLinear",
    model_class=DeeperLinearAutoencoder
)

multi_arch_manager.add_architecture(
    name="Convolutional",
    model_class=ConvAutoencoder
)

multi_arch_manager.add_architecture(
    name="Variational",
    model_class=VariationalAutoencoder
)

# Set up the common dataset
multi_arch_manager.setup_common_dataset(consistent_layers_dataset)

# Run the comparison
comparison_results = multi_arch_manager.run_comparison(
    latent_dims=[10],  # Using 10 dimensions for all models
    epochs=15,         # Train each model for 15 epochs
    batch_size=32,
    verbose=True
)

# Generate and save a summary report
multi_arch_manager.save_summary_report()

## 6. Loading Pre-trained Models

You can save and load trained models for later use.

In [None]:
# Save the trained linear autoencoder model
saved_model_path = linear_ae_wrapper.save_model()
print(f"Model saved to: {saved_model_path}")

# Create a new wrapper and load the saved model
new_wrapper = AutoEncoderWrapper(
    model_class=SimpleLinearAutoencoder,
    model_name="LoadedGeoLinearAE",
    device=device,
    results_dir='results/loaded_geo_models'
)

# Load the saved model
loaded_model = new_wrapper.load_model(saved_model_path)

# Test the loaded model
new_wrapper.test_loader = test_loader
test_loss, test_metrics = new_wrapper.evaluate(
    visualize=True,
    num_samples=5
)

print(f"Test loss with loaded model: {test_loss:.6f}")

## 7. Comparing Consistent vs. Variable Layers

Let's compare how autoencoders perform on the consistent vs. variable geological layer datasets.

In [None]:
# Create data loaders for the variable layers dataset
variable_train_loader = DataLoader(variable_train, batch_size=batch_size, shuffle=True)
variable_val_loader = DataLoader(variable_val, batch_size=batch_size, shuffle=False)
variable_test_loader = DataLoader(variable_test, batch_size=batch_size, shuffle=False)

# Train a convolutional autoencoder on consistent layers
consistent_conv_ae = AutoEncoderWrapper(
    model_class=ConvAutoencoder,
    model_name="ConsistentLayersConvAE",
    input_shape=input_shape,
    latent_dim=10,
    batch_size=batch_size,
    device=device,
    results_dir='results/consistent_layers_conv_ae'
)

# Setup data
consistent_conv_ae.train_loader = train_loader
consistent_conv_ae.val_loader = val_loader
consistent_conv_ae.test_loader = test_loader
consistent_conv_ae.dataloader_setup = True

# Train the model (fewer epochs for demonstration)
consistent_model, consistent_metrics = consistent_conv_ae.train(
    epochs=10,
    learning_rate=0.001,
    early_stopping_patience=5,
    verbose=True
)

# Train a convolutional autoencoder on variable layers
variable_conv_ae = AutoEncoderWrapper(
    model_class=ConvAutoencoder,
    model_name="VariableLayersConvAE",
    input_shape=input_shape,
    latent_dim=10,
    batch_size=batch_size,
    device=device,
    results_dir='results/variable_layers_conv_ae'
)

# Setup data
variable_conv_ae.train_loader = variable_train_loader
variable_conv_ae.val_loader = variable_val_loader
variable_conv_ae.test_loader = variable_test_loader
variable_conv_ae.dataloader_setup = True

# Train the model (fewer epochs for demonstration)
variable_model, variable_metrics = variable_conv_ae.train(
    epochs=10,
    learning_rate=0.001,
    early_stopping_patience=5,
    verbose=True
)

# Compare reconstructions
from autoencoder_lib.visualization.reconstructions import compare_reconstructions

# Create a mixed test loader with both types
import torch
mixed_test_data = []
mixed_test_labels = []

for i in range(5):
    # Add consistent sample
    data, _ = consistent_test[i]
    mixed_test_data.append(data)
    mixed_test_labels.append(torch.tensor(0))  # Label 0 for consistent
    
    # Add variable sample
    data, _ = variable_test[i]
    mixed_test_data.append(data)
    mixed_test_labels.append(torch.tensor(1))  # Label 1 for variable

mixed_test_data = torch.stack(mixed_test_data)
mixed_test_labels = torch.stack(mixed_test_labels)

# Compare the reconstructions of both models
fig = compare_reconstructions(
    [consistent_model, variable_model],
    ["Consistent Layers Model", "Variable Layers Model"],
    DataLoader(torch.utils.data.TensorDataset(mixed_test_data, mixed_test_labels), batch_size=10),
    device,
    num_samples=10,
    title="Comparing Models Trained on Different Geological Patterns"
)

plt.show()

## 8. Finding Minimum Latent Dimension for Geological Data

Use the `find_minimum_latent_dim` method to determine the minimum number of latent dimensions needed to achieve acceptable reconstruction quality for geological patterns.

In [None]:
# Create a new experiment manager for the geologic dataset
geo_exp_manager = ExperimentManager(
    model_class=ConvAutoencoder,  # Using convolutional AE for the geologic data
    results_dir='results/geologic_min_latent_dim'
)

# Find minimum latent dimension for consistent layers
min_dim_consistent, results_df_consistent = geo_exp_manager.find_minimum_latent_dim(
    dataset=consistent_layers_dataset,
    dimensions_to_try=[2, 4, 8, 16, 32],
    base_experiment_name="ConsistentGeoConvAE",
    epochs=10,
    threshold=0.1,  # Allow 10% relative loss increase
    batch_size=32,
    device=device
)

print(f"Minimum latent dimension for consistent layers: {min_dim_consistent}")
print("Results for consistent layers:")
print(results_df_consistent)

# Find minimum latent dimension for variable layers
min_dim_variable, results_df_variable = geo_exp_manager.find_minimum_latent_dim(
    dataset=variable_layers_dataset,
    dimensions_to_try=[2, 4, 8, 16, 32],
    base_experiment_name="VariableGeoConvAE",
    epochs=10,
    threshold=0.1,  # Allow 10% relative loss increase
    batch_size=32,
    device=device
)

print(f"Minimum latent dimension for variable layers: {min_dim_variable}")
print("Results for variable layers:")
print(results_df_variable)

# Compare the results
import matplotlib.pyplot as plt
from autoencoder_lib.visualization.training import plot_metric_vs_latent_dim

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(results_df_consistent['latent_dim'], results_df_consistent['test_loss'], 'o-', label='Consistent Layers')
ax.plot(results_df_variable['latent_dim'], results_df_variable['test_loss'], 's-', label='Variable Layers')
ax.set_xlabel('Latent Dimension')
ax.set_ylabel('Test Loss')
ax.set_title('Reconstruction Loss vs Latent Dimension by Dataset Type')
ax.legend()
ax.grid(True)
plt.show()

## 9. Advanced: Using a Variational Autoencoder with Geological Data

Let's train a Variational Autoencoder (VAE) on the geological data to enable smoother latent space representation and generative capabilities.

In [None]:
# Create a wrapper for a Variational Autoencoder
geo_vae_wrapper = AutoEncoderWrapper(
    model_class=VariationalAutoencoder,
    model_name="GeologyVAE",
    input_shape=input_shape,
    latent_dim=10,
    batch_size=batch_size,
    device=device,
    results_dir='results/geo_vae'
)

# Setup data
geo_vae_wrapper.train_loader = train_loader
geo_vae_wrapper.val_loader = val_loader
geo_vae_wrapper.test_loader = test_loader
geo_vae_wrapper.dataloader_setup = True

# Train the VAE model
model, metrics = geo_vae_wrapper.train(
    epochs=20,
    learning_rate=0.001,
    early_stopping_patience=5,
    verbose=True,
    visualize_training=True
)

# Evaluate and visualize
test_loss, test_metrics = geo_vae_wrapper.evaluate(visualize=True, num_samples=10)
geo_vae_wrapper.visualize_latent_space(method='tsne', class_names=['Consistent Layers'])

## 10. Latent Space Traversal with Geological VAE

With a trained Variational Autoencoder, we can traverse the latent space to generate new geological layer patterns.

In [None]:
# Import the latent traversal visualization function
from autoencoder_lib.visualization.latent_space import plot_latent_traversal

# Generate latent space traversals
fig = plot_latent_traversal(
    model=geo_vae_wrapper.model,
    latent_dim=5,  # Visualize the first 5 dimensions
    steps=8,       # 8 steps from -2 to 2 in each dimension
    device=device,
    figsize=(15, 10)
)

plt.show()

# Create some synthetic geological patterns by sampling the latent space
plt.figure(figsize=(12, 8))
with torch.no_grad():
    # Sample random points in latent space
    z = torch.randn(16, geo_vae_wrapper.latent_dim).to(device)
    
    # Generate images
    generated = geo_vae_wrapper.model.decode(z).cpu()
    
    # Plot
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.imshow(generated[i].squeeze(), cmap='gray')
        plt.title(f"Sample {i+1}")
        plt.axis('off')
        
plt.tight_layout()
plt.suptitle("Synthetic Geological Patterns Generated by VAE", y=1.02)
plt.show()

## Conclusion

In this notebook, we've demonstrated how to use the `autoencoder_lib` package with geological layer datasets to:

1. Load and visualize geological layer data
2. Train different autoencoder architectures on geological patterns
3. Compare performance between consistent and variable layer datasets
4. Find the minimum latent dimensions needed for geological patterns
5. Generate synthetic geological patterns using variational autoencoders
6. Visualize latent space representations of geological features
7. Compare different architectures for geological pattern analysis

This geological application demonstrates how autoencoders can be used to analyze and generate complex spatial patterns, with potential applications in geology, remote sensing, and earth science.