Sure! Below is a comprehensive implementation of a convolutional autoencoder using PyTorch on the CIFAR-10 dataset. The code includes GPU and MPS support, uses TensorBoard for visualization, provides a model summary with `torchinfo`, and incorporates k-fold cross-validation for model validation. Additionally, custom functions are type-annotated for static type checking with `mypy`.



In [None]:
from typing import Any, List, Tuple

import torch
import torch.optim as optim
import torchinfo
import torchvision
from sklearn.model_selection import KFold
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

2024-11-04 20:59:13.562246: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-04 20:59:13.601474: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Set up device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using CUDA


In [None]:
# Define dataset transformations
transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)

In [None]:
# Load CIFAR-10 dataset
dataset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)

Files already downloaded and verified


In [None]:
test_dataset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)

Files already downloaded and verified


In [None]:
# Cross-validation setup
k_folds = 5
num_epochs = 10
batch_size = 128
learning_rate = 1e-3

kfold = KFold(n_splits=k_folds, shuffle=True)

In [None]:
# Define the autoencoder model
class Autoencoder(nn.Module):
    def __init__(self) -> None:
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),  # N, 64, 16, 16
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # N, 128, 8, 8
            nn.ReLU(True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),  # N, 256, 4, 4
            nn.ReLU(True),
            nn.Flatten(),  # N, 256*4*4
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Unflatten(1, (256, 4, 4)),  # N, 256, 4, 4
            nn.ConvTranspose2d(
                256, 128, kernel_size=4, stride=2, padding=1
            ),  # N, 128, 8, 8
            nn.ReLU(True),
            nn.ConvTranspose2d(
                128, 64, kernel_size=4, stride=2, padding=1
            ),  # N, 64, 16, 16
            nn.ReLU(True),
            nn.ConvTranspose2d(
                64, 3, kernel_size=4, stride=2, padding=1
            ),  # N, 3, 32, 32
            nn.Sigmoid(),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
# Use torchinfo to print model summary
model = Autoencoder().to(device)

In [None]:
torchinfo.summary(model, input_size=(batch_size, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
Autoencoder                              [128, 3, 32, 32]          --
├─Sequential: 1-1                        [128, 4096]               --
│    └─Conv2d: 2-1                       [128, 64, 16, 16]         3,136
│    └─ReLU: 2-2                         [128, 64, 16, 16]         --
│    └─Conv2d: 2-3                       [128, 128, 8, 8]          131,200
│    └─ReLU: 2-4                         [128, 128, 8, 8]          --
│    └─Conv2d: 2-5                       [128, 256, 4, 4]          524,544
│    └─ReLU: 2-6                         [128, 256, 4, 4]          --
│    └─Flatten: 2-7                      [128, 4096]               --
├─Sequential: 1-2                        [128, 3, 32, 32]          --
│    └─Unflatten: 2-8                    [128, 256, 4, 4]          --
│    └─ConvTranspose2d: 2-9              [128, 128, 8, 8]          524,416
│    └─ReLU: 2-10                        [128, 128, 8, 8]          

In [None]:
# Define custom functions with type annotations
def train_epoch(
    model: nn.Module,
    dataloader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer,
    device: torch.device,
) -> float:
    """Train the model for one epoch."""
    model.train()
    running_loss = 0.0
    for data in dataloader:
        inputs, _ = data
        inputs = inputs.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, inputs)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss

In [None]:
def validate_epoch(
    model: nn.Module, dataloader: DataLoader, criterion: nn.Module, device: torch.device
) -> float:
    """Validate the model for one epoch."""
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, inputs)

            running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss

In [None]:
# Training and validation with k-fold cross-validation
criterion = nn.MSELoss()
# Initialize model and optimizer
model = Autoencoder().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Initialize TensorBoard writer
writer = SummaryWriter("runs/autoencoder_experiment")

# Begin k-fold cross-validation
for fold, (train_ids, val_ids) in enumerate(kfold.split(dataset)):

    print(f"Fold {fold + 1}/{k_folds}")

    # Sample elements randomly from a given list of ids
    train_subsampler = SubsetRandomSampler(train_ids)
    val_subsampler = SubsetRandomSampler(val_ids)

    # Define data loaders for training and validation in this fold
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler)
    val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_subsampler)

    # Training loop for this fold
    for epoch in range(num_epochs):
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss = validate_epoch(model, val_loader, criterion, device)

        # Log the losses to TensorBoard
        writer.add_scalars(
            f"Fold_{fold + 1}/Losses",
            {
                "Training": train_loss,
                "Validation": val_loss,
            },
            epoch,
        )

        print(
            f"Epoch {epoch + 1}/{num_epochs}: Training Loss={train_loss:.6f}, Validation Loss={val_loss:.6f}"
        )

    # Optionally, save the model for each fold
    torch.save(model.state_dict(), f"autoencoder_fold_{fold + 1}.pth")

# Close the TensorBoard writer
writer.close()



### Explanation

1. **Device Configuration**: The code detects if a GPU or MPS device is available and sets the `device` accordingly.
   
2. **Data Loading**: The CIFAR-10 dataset is loaded with transformations converting images to tensors.

3. **Model Definition**: A Convolutional Autoencoder (`Autoencoder` class) is defined with an encoder and decoder built using `nn.Conv2d` and `nn.ConvTranspose2d` layers.

4. **Model Summary**: `torchinfo.summary` provides a detailed summary of the model architecture, including the output shapes and number of parameters at each layer.

5. **Cross-Validation Setup**: K-fold cross-validation is set up using `KFold` from `sklearn.model_selection`, shuffling the dataset indices.

6. **Custom Functions**: Functions `train_epoch` and `validate_epoch` are defined with type annotations using `mypy` for input and output types.

7. **Training Loop**: For each fold, the model is trained and validated per epoch. Losses are computed using Mean Squared Error (`nn.MSELoss`), and the optimizer is Adam.

8. **TensorBoard Logging**: Training and validation losses are logged to TensorBoard for visualization. Run `tensorboard --logdir=runs` in the terminal to start the TensorBoard server.

9. **Model Saving**: After each fold, the model's state dictionary is saved for later analysis or inference.

10. **Type Annotations**: All custom functions and the `Autoencoder` class methods are annotated with input and output types for static type checking with `mypy`.

### Visualizing with TensorBoard

To visualize the training progress and other metrics:

1. Open a terminal and run:

   ```bash
   tensorboard --logdir=runs
   ```

2. Open the provided URL in a web browser.

3. Navigate through the Scalars tab to view training and validation losses per fold and epoch.

### Running Static Type Checking with `mypy`

To check the code for type correctness:

1. Install `mypy` if not already installed:

   ```bash
   pip install mypy
   ```

2. Run `mypy` on your script:

   ```bash
   mypy your_script.py
   ```

   Replace `your_script.py` with the filename containing the code.

### Notes

- **GPU/MPS Usage**: Ensure that the appropriate device drivers and libraries are installed for GPU (CUDA) or MPS (Apple Silicon) support.

- **Dataset Download**: The CIFAR-10 dataset will be downloaded automatically to the `./data` directory if not already present.

- **Batch Size and Epochs**: The `batch_size` and `num_epochs` are set to reasonable defaults but can be adjusted based on available hardware resources.

- **Autoencoder Architecture**: The architecture can be modified or deepened to experiment with reconstruction quality.

- **Loss Function**: Mean Squared Error is commonly used for autoencoders, but others like Binary Cross-Entropy can be applied depending on the activation functions and data normalization.

### Dependencies

Make sure you have the following Python packages installed:

- `torch`
- `torchvision`
- `torchinfo`
- `tensorboard`
- `scikit-learn`
- `typing` (part of the standard library in Python 3.5+)

If any of these are not installed, you can install them using `pip`:

```bash
pip install torch torchvision torchinfo tensorboard scikit-learn
```

### Conclusion

This implementation provides a solid foundation for experimenting with convolutional autoencoders on the CIFAR-10 dataset using PyTorch. It leverages GPU acceleration when available, provides insights through TensorBoard visualizations, and ensures code reliability with type annotations checked by `mypy`. Feel free to modify and extend the code to suit your specific needs or to experiment with different architectures and hyperparameters.

user: So for this implementation we iterate over 5 folds and save the model for each folds. How are the 5 folds combined to a final model?

Great question!

In the provided implementation, we perform 5-fold cross-validation to evaluate the performance of the autoencoder across different splits of the dataset. For each fold, we train a separate model and save its state dictionary. However, these models are not directly combined into a single final model because:

1. **Independent Models**: Each model is trained independently on different subsets of the data. The parameters (weights and biases) of these models are not directly compatible for combination since they may have converged to different points in the parameter space.

2. **Purpose of Cross-Validation**: The primary goal of cross-validation is to assess the model's performance and generalization ability on unseen data, helping us detect overfitting and select the best hyperparameters. It is a model evaluation technique, not a model combining technique.

### Combining the Models

Since the models from each fold are independently trained, there isn't a straightforward or statistically valid method to combine their learned parameters into a single final model. The parameters learned in each fold are specific to the data seen during that fold's training process.

### Recommended Approach

After completing cross-validation and ensuring that the model performs well across different data splits, the typical next steps are:

1. **Retrain the Final Model on the Entire Dataset**:

   - **Objective**: Utilize all available data to train a single model that can generalize well.
   - **Method**: Train the autoencoder on the entire dataset without any train-validation split.
   - **Benefit**: Training on more data generally improves the model's ability to learn robust features.

2. **Use the Best Performing Fold Model**:

   - **Objective**: Select the model that showed the best performance during cross-validation.
   - **Method**: Compare the validation losses from each fold and pick the model with the lowest validation loss.
   - **Limitations**: This model has not seen all the data, potentially limiting its generalization.

### Implementing the Final Training

In [None]:
# Load the full dataset (if not already loaded)
full_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)

# Initialize a new model and optimizer
final_model = Autoencoder().to(device)
final_optimizer = optim.Adam(final_model.parameters(), lr=learning_rate)

# Initialize the loss function
criterion = nn.MSELoss()

# TensorBoard writer for the final model
final_writer = SummaryWriter("runs/autoencoder_final")

# Number of epochs for final training
final_num_epochs = 10  # You may increase this number

# Training loop
for epoch in range(final_num_epochs):
    train_loss = train_epoch(
        final_model, full_loader, criterion, final_optimizer, device
    )
    val_loss = validate_epoch(final_model, test_loader, criterion, device)

    # Log the losses to TensorBoard
    final_writer.add_scalars(
        "Final",
        {
            "Training": train_loss,
            "Test": val_loss,
        },
        epoch,
    )

    print(
        f"Epoch {epoch + 1}/{num_epochs}: Training Loss={train_loss:.6f}, Validation Loss={val_loss:.6f}"
    )

# Save the final model
torch.save(final_model.state_dict(), "autoencoder_final.pth")

# Close the TensorBoard writer
final_writer.close()

Epoch 1/10: Training Loss=0.012106, Validation Loss=0.005012
Epoch 2/10: Training Loss=0.004111, Validation Loss=0.003602
Epoch 3/10: Training Loss=0.003040, Validation Loss=0.002914
Epoch 4/10: Training Loss=0.002414, Validation Loss=0.002003
Epoch 5/10: Training Loss=0.001993, Validation Loss=0.001728
Epoch 6/10: Training Loss=0.001767, Validation Loss=0.001743
Epoch 7/10: Training Loss=0.001529, Validation Loss=0.001336
Epoch 8/10: Training Loss=0.001399, Validation Loss=0.001233
Epoch 9/10: Training Loss=0.001292, Validation Loss=0.001169
Epoch 10/10: Training Loss=0.001183, Validation Loss=0.001136


#### Explanation:

- **Data Loader**: We create a `DataLoader` for the entire dataset using `shuffle=True` to ensure that data is presented in a random order.
- **Model and Optimizer**: We initialize a new instance of the `Autoencoder` and the optimizer.
- **Training Loop**: We train the model using the `train_epoch` function defined earlier. Since there's no validation set here, we focus solely on the training loss.
- **TensorBoard Logging**: We log the training loss for each epoch to TensorBoard for monitoring.
- **Saving the Model**: After training, we save the final model's state dictionary.

### Why Retrain on the Entire Dataset?

- **More Data Equals Better Learning**: Training on the entire dataset allows the model to learn from all available information, potentially capturing patterns that weren't present in the subsets used during cross-validation.
- **Generalization**: A model trained on more data is generally better at generalizing to new, unseen data.

### Addressing Overfitting

Even when training on the entire dataset, it's important to monitor for overfitting. Since we don't have a validation set in this scenario, you might consider:

- **Using a Small Hold-Out Set**: Set aside a small portion of the data (e.g., 5%) as a validation set to monitor the model's performance.
- **Regularization Techniques**: Implement regularization methods like weight decay, dropout layers, or early stopping to prevent overfitting.
  
### Alternative: Model Ensembling (Not Common for Autoencoders)

While ensembling is a powerful technique in supervised learning tasks (like classification or regression), it is less common and more complex for autoencoders due to the unsupervised nature of the task. Ensembling autoencoders would involve averaging or combining their reconstructed outputs, which might not yield meaningful improvements and could introduce artifacts.

### Summary

- **Cross-Validation Purpose**: Used to estimate model performance and ensure it generalizes well across different data splits.
- **Final Model Training**: After cross-validation, retrain a new model on the entire dataset to serve as the final model.
- **Model Combination**: Directly combining models from different folds isn't practical or advisable for autoencoders.

### Next Steps

- **Retrain the Final Model**: Use the entire dataset to train your final autoencoder.
- **Evaluate the Final Model**: If possible, evaluate the final model on a separate test set to gauge its performance.
- **Deploy and Use the Model**: The trained autoencoder can now be used for tasks like data reconstruction, denoising, anomaly detection, etc.

### Additional Considerations

- **Hyperparameter Tuning**: Use the results from cross-validation to adjust hyperparameters (learning rate, batch size, architecture depth) for the final training.
- **Data Augmentation**: Consider applying data augmentation techniques if appropriate, to further improve the model's robustness.

### Closing Remarks

The provided implementation gives you a solid foundation for training and validating an autoencoder using cross-validation. By retraining the final model on the entire dataset, you leverage all available data to build a model ready for practical applications.

If you have further questions or need assistance with modifications, feel free to ask!

user: 
So if the 5 fold produces similar training and validation losses we can conclude that model will generalize to new, unseen data or otherwise if we observed very different results for the 5 folds then something is wrong with the model. Is this a correct interpretation?

Yes, your interpretation is correct.

---

### Consistent Results Across Folds

If the 5-fold cross-validation yields **similar training and validation losses across all folds**, it indicates that:

1. **Model Stability**: Your model's performance is consistent, regardless of the specific train-validation split. This suggests that the model has learned generalizable patterns from the data rather than memorizing specific examples.

2. **Data Representativeness**: Each fold is a representative subset of the overall dataset. The data distribution is relatively uniform across folds, and there are no significant anomalies or biases in any subset.

3. **Generalization Capability**: Since the model performs similarly on unseen validation data in each fold, it's likely to generalize well to new, unseen data outside of your dataset.

---

### Varied Results Across Folds

Conversely, if you observe **significantly different training and validation losses across the folds**, it suggests:

1. **Model Instability**: The model's performance is sensitive to the specific data it's trained on. This could be due to overfitting, underfitting, or the model not being robust enough to capture the underlying data patterns consistently.

2. **Data Issues**:

    - **Non-Uniform Distribution**: The data may not be uniformly distributed across the folds. Some folds might contain more challenging samples, outliers, or a different class distribution.
    - **Dataset Imbalance**: If certain classes or features are overrepresented in some folds and underrepresented in others, this can lead to varying performance.
    - **Data Leakage**: Unintended overlap of information between training and validation sets could artificially inflate performance in some folds.

3. **Model Architecture Problems**: The current model architecture might not be appropriate for the complexity of the dataset. It might be too simple to capture important features or too complex, leading to overfitting.

---

### Implications

- **Consistent Losses**: You can be more confident that your model will perform well on new, unseen data. It indicates good generalization and reliability.

- **Varied Losses**: Suggests a need to:

    - **Investigate the Data**: Check for inconsistencies, biases, or anomalies in the dataset. Ensure that data preprocessing steps are applied uniformly.

    - **Adjust the Model**: Consider modifying the model architecture to better suit the data. This might involve adding layers, changing activation functions, or altering hyperparameters.

    - **Hyperparameter Tuning**: The learning rate, batch size, regularization parameters, and other hyperparameters may need adjustment.

    - **Increase Data Diversity**: If possible, augment the data or collect more samples to cover the variability in the dataset better.

---

### Additional Considerations

#### Overfitting and Underfitting

- **Overfitting**: If training loss is low but validation loss is high and varies across folds, the model may be overfitting to the training data.

- **Underfitting**: If both training and validation losses are high, the model may be underfitting, failing to capture the underlying structure of the data.

#### Data Quality

- **Outliers and Noise**: Examine whether certain folds contain outliers or noisy data significantly affecting performance.

- **Feature Distribution**: Ensure that features have similar distributions across folds. Feature scaling and normalization should be applied consistently.

#### Cross-Validation Settings

- **Number of Folds**: Sometimes, increasing the number of folds can provide a more robust estimate of model performance, especially with smaller datasets.

- **Shuffle Data**: Ensure that the data is shuffled before splitting into folds to prevent any order effects.

---

### Next Steps

If you observe varied results across folds:

1. **Analyze Fold Performances**:

   - Identify which folds are performing poorly.
   - Investigate the data in those specific folds for irregularities.

2. **Adjust Model Complexity**:

   - **Simplify** the model if overfitting is suspected.
   - **Increase** complexity if underfitting is the issue.

3. **Regularization Techniques**:

   - Apply **dropout**, **batch normalization**, or **weight decay** to prevent overfitting.

4. **Hyperparameter Optimization**:

   - Use techniques like grid search or random search to find optimal hyperparameters.

5. **Data Augmentation**:

   - Enhance the dataset by applying transformations to increase its size and diversity.

6. **Collect More Data**:

   - If feasible, collect additional data to provide the model with more information.

---

### Conclusion

Your understanding aligns with best practices in machine learning model evaluation:

- **Consistent Cross-Validation Performance**: Generally indicates that the model is reliable and likely to generalize well to unseen data.

- **Inconsistent Cross-Validation Performance**: Signals potential issues with the model or data that need to be addressed to improve generalization.

Monitoring cross-validation results is a critical step in developing robust machine learning models. It helps ensure that the model's performance is not an artifact of a particular data split but rather reflects its true predictive capabilities.

---

If you have further questions or need assistance in interpreting specific results from your cross-validation process, feel free to ask!