In [None]:
# Import required libraries
# Standard library imports
import os
import sys
from pathlib import Path

# Third-party imports
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras

# Add src directory to path
sys.path.append(str(Path.cwd().parent))

# Local imports
from src.data_utils import CIFAR10_CLASSES, load_cifar10
from src.metrics import compute_metrics_summary
from src.model_utils import build_compression_ae
from src.visualization import plot_image_grid, plot_reconstruction_comparison, plot_training_history

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f'TensorFlow version: {tf.__version__}')
print(f'GPU Available: {tf.config.list_physical_devices("GPU")}')

## Load and Prepare CIFAR-10 Dataset

CIFAR-10 consists of 60,000 32x32 color images in 10 classes. We'll use it to train our compression autoencoders.

In [None]:
# Load CIFAR-10
(x_train, y_train), (x_test, y_test) = load_cifar10(normalize=True)

print(f"Training samples: {len(x_train)}")
print(f"Test samples: {len(x_test)}")
print(f"Image shape: {x_train.shape[1:]}")
print(f"Classes: {CIFAR10_CLASSES}")

In [None]:
# Visualize some samples
sample_indices = np.random.choice(len(x_test), 10, replace=False)
sample_images = x_test[sample_indices]
sample_labels = y_test[sample_indices]
titles = [CIFAR10_CLASSES[label] for label in sample_labels]

fig = plot_image_grid(sample_images, titles=titles)
plt.suptitle('Sample CIFAR-10 Images', fontsize=16, y=1.02)
plt.show()

## Train Compression Autoencoders

We'll train separate models with different latent dimensions to explore the compression-quality trade-off:

- **Latent dim 32**: Highest compression (~96% size reduction)
- **Latent dim 64**: High compression (~94% size reduction)
- **Latent dim 128**: Moderate compression (~88% size reduction)
- **Latent dim 256**: Lower compression (~75% size reduction)

Original image size: 32 × 32 × 3 = 3,072 pixels

In [None]:
# Training configuration
LATENT_DIMS = [32, 64, 128, 256]
EPOCHS = 50
BATCH_SIZE = 128
LEARNING_RATE = 0.001

# Create models directory
models_dir = Path('../models')
models_dir.mkdir(exist_ok=True)

# Create logs directory for TensorBoard
logs_dir = Path('../logs')
logs_dir.mkdir(exist_ok=True)

In [None]:
# Train models for each latent dimension
trained_models = {}
histories = {}

for latent_dim in LATENT_DIMS:
    print(f"\n{'='*60}")
    print(f"Training Autoencoder with Latent Dimension: {latent_dim}")
    print(f"{'='*60}\n")
    
    # Build model
    autoencoder, encoder, decoder = build_compression_ae(latent_dim=latent_dim)
    
    # Compile
    autoencoder.compile(
        optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='mse',
        metrics=['mae']
    )
    
    print(f"\nModel Summary:")
    print(f"Total parameters: {autoencoder.count_params():,}")
    print(f"Compression ratio: {3072/latent_dim:.2f}x\n")
    
    # Callbacks
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            filepath=str(models_dir / f'compression_ae_latent{latent_dim}.keras'),
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        keras.callbacks.TensorBoard(
            log_dir=str(logs_dir / f'compression_latent{latent_dim}'),
            histogram_freq=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        )
    ]
    
    # Train
    history = autoencoder.fit(
        x_train, x_train,  # Input and target are the same
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(x_test, x_test),
        callbacks=callbacks,
        verbose=1
    )
    
    # Store results
    trained_models[latent_dim] = autoencoder
    histories[latent_dim] = history
    
    print(f"\n✅ Model saved to: models/compression_ae_latent{latent_dim}.keras")

## Visualize Training History

Let's examine how the models learned over time.

In [None]:
# Plot training history for all models
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

for latent_dim, history in histories.items():
    # Loss
    axes[0].plot(history.history['loss'], label=f'Latent {latent_dim} (train)', alpha=0.7)
    axes[0].plot(history.history['val_loss'], label=f'Latent {latent_dim} (val)', linestyle='--', alpha=0.7)

axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# MAE
for latent_dim, history in histories.items():
    axes[1].plot(history.history['mae'], label=f'Latent {latent_dim} (train)', alpha=0.7)
    axes[1].plot(history.history['val_mae'], label=f'Latent {latent_dim} (val)', linestyle='--', alpha=0.7)

axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('Mean Absolute Error')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Evaluate Reconstruction Quality

Now let's evaluate the quality of reconstructions using various metrics:
- **MSE** (Mean Squared Error): Lower is better
- **PSNR** (Peak Signal-to-Noise Ratio): Higher is better (typically 20-40 dB)
- **SSIM** (Structural Similarity Index): Higher is better (0-1 scale)

In [None]:
# Evaluate all models
evaluation_results = {}

# Use a subset of test data for evaluation
eval_samples = x_test[:1000]

for latent_dim, model in trained_models.items():
    print(f"\nEvaluating Latent Dim {latent_dim}...")
    
    # Generate reconstructions
    reconstructions = model.predict(eval_samples, verbose=0)
    
    # Compute metrics
    metrics = compute_metrics_summary(eval_samples, reconstructions, latent_dim)
    evaluation_results[latent_dim] = metrics
    
    print(f"  MSE: {metrics['mse']:.6f}")
    print(f"  PSNR: {metrics['psnr']:.2f} dB")
    print(f"  SSIM: {metrics['ssim']:.4f}")
    print(f"  Compression Ratio: {metrics['compression_ratio']:.2f}x")

In [None]:
# Create comparison table
import pandas as pd

df = pd.DataFrame(evaluation_results).T
df.index.name = 'Latent Dim'
df = df.round(4)

print("\n" + "="*70)
print("COMPRESSION VS QUALITY COMPARISON")
print("="*70)
print(df.to_string())
print("="*70)

In [None]:
# Visualize compression-quality trade-off
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

latent_dims = list(evaluation_results.keys())
compression_ratios = [evaluation_results[ld]['compression_ratio'] for ld in latent_dims]
psnr_values = [evaluation_results[ld]['psnr'] for ld in latent_dims]
ssim_values = [evaluation_results[ld]['ssim'] for ld in latent_dims]
mse_values = [evaluation_results[ld]['mse'] for ld in latent_dims]

# PSNR vs Compression
axes[0].plot(compression_ratios, psnr_values, marker='o', linewidth=2, markersize=8)
axes[0].set_xlabel('Compression Ratio')
axes[0].set_ylabel('PSNR (dB)')
axes[0].set_title('PSNR vs Compression Ratio')
axes[0].grid(True, alpha=0.3)
axes[0].invert_xaxis()

# SSIM vs Compression
axes[1].plot(compression_ratios, ssim_values, marker='s', linewidth=2, markersize=8, color='green')
axes[1].set_xlabel('Compression Ratio')
axes[1].set_ylabel('SSIM')
axes[1].set_title('SSIM vs Compression Ratio')
axes[1].grid(True, alpha=0.3)
axes[1].invert_xaxis()

# MSE vs Compression
axes[2].plot(compression_ratios, mse_values, marker='^', linewidth=2, markersize=8, color='red')
axes[2].set_xlabel('Compression Ratio')
axes[2].set_ylabel('MSE')
axes[2].set_title('MSE vs Compression Ratio')
axes[2].grid(True, alpha=0.3)
axes[2].invert_xaxis()

plt.tight_layout()
plt.show()

## Visual Comparison of Reconstructions

Let's visually compare the reconstruction quality across different latent dimensions.

In [None]:
# Select random test images
n_samples = 5
sample_indices = np.random.choice(len(x_test), n_samples, replace=False)
test_samples = x_test[sample_indices]

# Create comparison figure
fig, axes = plt.subplots(len(LATENT_DIMS) + 1, n_samples, figsize=(n_samples * 3, (len(LATENT_DIMS) + 1) * 3))

# Display originals
for i in range(n_samples):
    axes[0, i].imshow(test_samples[i])
    axes[0, i].axis('off')
    if i == 0:
        axes[0, i].set_title('Original', fontweight='bold', fontsize=14)

# Display reconstructions for each latent dimension
for row_idx, latent_dim in enumerate(LATENT_DIMS, start=1):
    model = trained_models[latent_dim]
    reconstructions = model.predict(test_samples, verbose=0)
    
    for i in range(n_samples):
        axes[row_idx, i].imshow(reconstructions[i])
        axes[row_idx, i].axis('off')
        if i == 0:
            axes[row_idx, i].set_title(f'Latent {latent_dim}\n(Ratio: {3072/latent_dim:.1f}x)', 
                                       fontweight='bold', fontsize=14)

plt.suptitle('Reconstruction Quality Comparison', fontsize=18, y=0.995)
plt.tight_layout()
plt.show()

## Per-Class Performance Analysis

Let's see how well the autoencoder performs on different object classes.

In [None]:
from src.metrics import calculate_psnr

# Use the best model (latent_dim=128 is a good balance)
best_model = trained_models[128]

# Calculate PSNR for each class
class_psnr = {}

for class_idx, class_name in enumerate(CIFAR10_CLASSES):
    # Get images of this class
    class_mask = y_test == class_idx
    class_images = x_test[class_mask][:100]  # Use first 100 samples
    
    # Reconstruct
    reconstructions = best_model.predict(class_images, verbose=0)
    
    # Calculate PSNR
    psnr = np.mean(calculate_psnr(class_images, reconstructions))
    class_psnr[class_name] = psnr

# Plot per-class performance
fig, ax = plt.subplots(figsize=(12, 6))
classes = list(class_psnr.keys())
psnr_values = list(class_psnr.values())

bars = ax.bar(classes, psnr_values, color='steelblue', alpha=0.8)
ax.set_xlabel('Class', fontsize=12)
ax.set_ylabel('PSNR (dB)', fontsize=12)
ax.set_title('Reconstruction Quality by Class (Latent Dim: 128)', fontsize=14, fontweight='bold')
ax.axhline(y=np.mean(psnr_values), color='red', linestyle='--', label=f'Mean: {np.mean(psnr_values):.2f} dB')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

print("\nPer-Class PSNR:")
for class_name, psnr in class_psnr.items():
    print(f"  {class_name:12s}: {psnr:.2f} dB")

## Conclusions

### Key Findings:

1. **Compression-Quality Trade-off**: Higher compression ratios lead to lower reconstruction quality, as expected.

2. **Optimal Balance**: Latent dimension of 128 provides a good balance between compression (~24x) and quality.

3. **Class Variability**: Some classes (e.g., simpler objects) compress better than others.

### Applications:

- **Image storage**: Reduce storage requirements with acceptable quality loss
- **Bandwidth optimization**: Transmit compressed latent representations
- **Feature extraction**: Use latent representations for downstream tasks

### Next Steps:

1. Explore the trained models in the Streamlit app for interactive demos
2. Try the anomaly detection notebook to see autoencoders in a different context
3. Learn about VAEs for generative modeling in notebook 03