In [1]:
import sys
import os

# Add particle_detection to sys.path
sys.path.append(os.path.abspath(".."))

In [2]:
import os
import torch
from sklearn.model_selection import train_test_split
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import numpy as np
import cv2
from particle_detection.autoencoder.model import create_autoencoder
#from particle_detection.data.dataset import ImageDataset, get_transforms
from particle_detection.data.dataset import get_transforms
from particle_detection.data.dataset import create_dataloaders

<class 'torch.utils.data.dataloader.DataLoader'>


In [25]:
import os
import torch
from sklearn.model_selection import train_test_split
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import numpy as np
import cv2
from particle_detection.autoencoder.model import create_autoencoder

class ImageDataset(Dataset):
    def __init__(self, image_files, data_dir, transform=None):
        """
        Simple Dataset class for loading images.

        :param image_files: List of image file names.
        :param data_dir: Path to the directory containing images.
        :param transform: Transform to apply to the images.
        """
        self.image_files = image_files
        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.data_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image


def preprocess_image(image):
    """
    Normalize and convert the image to RGB format.

    :param image: Input PIL image.
    :return: Preprocessed PIL image.
    """
    sample = np.array(image)
    sample = cv2.normalize(sample, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    processed_image = Image.fromarray(sample).convert('RGB')
    return processed_image


def get_transforms(image_size=(224, 224), is_train=True):
    """
    Simple function to get transformations for training or testing.

    :param image_size: Tuple of desired image dimensions (height, width).
    :param is_train: Whether to include augmentation (True for training, False for testing).
    :return: A torchvision.transforms.Compose object.
    """
    transform_list = [
        transforms.Lambda(preprocess_image),  # Integrate preprocessing here
        transforms.Resize(image_size),
        transforms.ToTensor()
    ]

    if is_train:
        # Add augmentations only for training
        transform_list.extend([
            transforms.RandomRotation(degrees=(-30, 30)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ColorJitter(brightness=0.2, contrast=0.2)
        ])

    return transforms.Compose(transform_list)


def load_image_file_paths(data_dir, extensions=(".tif", ".TIF")):
    """
    List all image file paths in the directory with the given extensions.

    :param data_dir: Path to the directory containing images.
    :param extensions: Tuple of allowed image file extensions.
    :return: List of image file names.
    """
    return [f for f in os.listdir(data_dir) if f.endswith(extensions)]


def split_dataset(image_files, test_size=0.2, random_seed=42):
    """
    Split image file names into training and testing sets.

    :param image_files: List of image file names.
    :param test_size: Proportion of the dataset to use for testing.
    :param random_seed: Random seed for reproducibility.
    :return: Tuple of (train_files, test_files).
    """
    return train_test_split(image_files, test_size=test_size, random_state=random_seed)


def create_dataloaders(data_dir, transform, batch_size, test_size=0.2):
    """
    Create PyTorch DataLoader objects for training and testing.

    :param data_dir: Path to the directory containing images.
    :param transform: Transform to apply to the images.
    :param batch_size: Batch size for DataLoaders.
    :param test_size: Proportion of the dataset to use for testing.
    :return: Tuple of (train_loader, test_loader).
    """
    # Step 1: Load all image file paths
    image_files = load_image_file_paths(data_dir)

    # Step 2: Split into train and test sets
    train_files, test_files = split_dataset(image_files, test_size=test_size)

    # Step 3: Create Dataset objects
    train_dataset = ImageDataset(train_files, data_dir, transform=transform)
    test_dataset = ImageDataset(test_files, data_dir, transform=transform)

    # Step 4: Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [16]:
data_dir = "/Users/blah_m4/Desktop/nanoparticle/images"
transform = get_transforms(image_size=(224, 224), is_train=True)
batch_size = 16

train_loader, test_loader = create_dataloaders(data_dir, transform, batch_size)

print(f"[INFO] Number of training batches: {len(train_loader)}")
print(f"[INFO] Number of testing batches: {len(test_loader)}")

[INFO] Number of training batches: 1
[INFO] Number of testing batches: 1


In [17]:
for images in train_loader:
    print(images.shape)  # Prints the shape of the first batch of images
    break

torch.Size([8, 3, 224, 224])


In [29]:
num_epochs=1
# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")

# Get transforms
transform = get_transforms(image_size=(1024, 1024), is_train=True)

# Create dataloaders
train_loader, test_loader = create_dataloaders(data_dir, transform, batch_size)

# Validate batch size
if batch_size > len(train_loader.dataset):
    print(f"[WARNING] Batch size ({batch_size}) exceeds dataset size ({len(train_loader.dataset)}). Adjusting batch size.")
    batch_size = len(train_loader.dataset)

model = create_autoencoder()
model = nn.DataParallel(model)
#model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        batch = batch.to(device)

        # Forward pass
        outputs = model(batch)
        loss = criterion(outputs, batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"[INFO] Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss / len(train_loader):.4f}")

[INFO] Using device: cpu


AttributeError: 'tuple' object has no attribute 'size'

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class VAE(nn.Module):
    def __init__(self, in_channels=3, latent_dim=128):
        super(VAE, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=4, stride=2, padding=1),  # 1024 -> 512
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # 512 -> 256
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),  # 256 -> 128
            nn.ReLU()
        )

        # Latent space
        self.fc_mu = nn.Linear(256 * 128 * 128, latent_dim)
        self.fc_logvar = nn.Linear(256 * 128 * 128, latent_dim)

        # Decoder
        self.fc_decoder = nn.Linear(latent_dim, 256 * 128 * 128)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # 128 -> 256
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),  # 256 -> 512
            nn.ReLU(),
            nn.ConvTranspose2d(64, in_channels, kernel_size=4, stride=2, padding=1),  # 512 -> 1024
            nn.Sigmoid()  # Output normalized to [0, 1]
        )

    def reparameterize(self, mu, logvar):
        """
        Reparameterization trick: z = mu + std * epsilon
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # Encode
        encoded = self.encoder(x)
        encoded_flat = encoded.view(encoded.size(0), -1)
        
        # Latent variables
        mu = self.fc_mu(encoded_flat)
        logvar = self.fc_logvar(encoded_flat)
        z = self.reparameterize(mu, logvar)

        # Decode
        decoded_flat = self.fc_decoder(z)
        decoded = decoded_flat.view(-1, 256, 128, 128)
        x_reconstructed = self.decoder(decoded)

        return x_reconstructed, mu, logvar

# Loss function for VAE
def vae_loss(recon_x, x, mu, logvar):
    """
    Computes the VAE loss as the sum of reconstruction loss and KL divergence.
    :param recon_x: Reconstructed input.
    :param x: Original input.
    :param mu: Mean of the latent distribution.
    :param logvar: Log variance of the latent distribution.
    """
    #recon_loss = F.mse_loss(recon_x, x, reduction='sum')
    recon_loss = nn.functional.mse_loss(recon_x, x, reduction='sum')
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_div

def create_vae():
    """
    Creates and returns a Variational Autoencoder (VAE) instance.
    """
    return VAE()

In [23]:
#optimized version 

import torch
import torch.nn as nn
import torch.nn.functional as F

class VAE(nn.Module):
    def __init__(self, in_channels=3, latent_dim=128):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=4, stride=2, padding=1),  # 1024 -> 512
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),          # 512 -> 256
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),         # 256 -> 128
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((8, 8))  # Reduce spatial size to 8x8
        )
        
        # Latent space
        self.flattened_dim = 256 * 8 * 8  # Adjust based on encoder output
        self.fc_mu = nn.Linear(self.flattened_dim, latent_dim)
        self.fc_logvar = nn.Linear(self.flattened_dim, latent_dim)

        # Decoder
        self.fc_decoder = nn.Linear(latent_dim, self.flattened_dim)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # 128 -> 256
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),   # 256 -> 512
            nn.ReLU(),
            nn.ConvTranspose2d(64, in_channels, kernel_size=4, stride=2, padding=1),  # 512 -> 1024
            nn.Sigmoid()  # Output normalized to [0, 1]
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # Encode
        encoded = self.encoder(x)
        encoded_flat = encoded.view(encoded.size(0), -1)
        
        # Latent variables
        mu = self.fc_mu(encoded_flat)
        logvar = self.fc_logvar(encoded_flat)
        z = self.reparameterize(mu, logvar)

        # Decode
        decoded_flat = self.fc_decoder(z)
        decoded = decoded_flat.view(-1, 256, 8, 8)  # Adjust based on encoder output
        x_reconstructed = self.decoder(decoded)

        return x_reconstructed, mu, logvar


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class VAE(nn.Module):
    def __init__(self, in_channels=3, latent_dim=128):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=4, stride=2, padding=1),  # 1024 -> 512
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),          # 512 -> 256
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),         # 256 -> 128
            nn.ReLU()
        )
        
        # Latent space
        self.flattened_dim = 256 * 128 * 128  # Adjusted for input size 1024x1024
        self.fc_mu = nn.Linear(self.flattened_dim, latent_dim)
        self.fc_logvar = nn.Linear(self.flattened_dim, latent_dim)

        # Decoder
        self.fc_decoder = nn.Linear(latent_dim, self.flattened_dim)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # 128 -> 256
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),   # 256 -> 512
            nn.ReLU(),
            nn.ConvTranspose2d(64, in_channels, kernel_size=4, stride=2, padding=1),  # 512 -> 1024
            nn.Sigmoid()  # Normalize output to [0, 1]
        )

    def reparameterize(self, mu, logvar):
        """
        Reparameterization trick: z = mu + std * epsilon
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        print(f"Input shape: {x.shape}")  # Debug input shape
        
        # Encode
        encoded = self.encoder(x)
        print(f"Encoded shape: {encoded.shape}")  # Debug encoded shape
        encoded_flat = encoded.view(encoded.size(0), -1)
        
        # Latent variables
        mu = self.fc_mu(encoded_flat)
        logvar = self.fc_logvar(encoded_flat)
        z = self.reparameterize(mu, logvar)

        # Decode
        decoded_flat = self.fc_decoder(z)
        decoded = decoded_flat.view(-1, 256, 128, 128)  # Reshape to match encoder output
        print(f"Decoded (before upsampling) shape: {decoded.shape}")  # Debug decoded shape
        x_reconstructed = self.decoder(decoded)
        print(f"Reconstructed shape: {x_reconstructed.shape}")  # Debug reconstructed shape

        # Ensure reconstruction matches input size
        x_reconstructed = F.interpolate(x_reconstructed, size=x.shape[2:], mode="bilinear", align_corners=False)
        print(f"Final reconstructed shape (after interpolation): {x_reconstructed.shape}")  # Debug final shape

        return x_reconstructed, mu, logvar

# Loss function for VAE
def vae_loss(recon_x, x, mu, logvar):
    """
    Computes the VAE loss as the sum of reconstruction loss and KL divergence.
    :param recon_x: Reconstructed input.
    :param x: Original input.
    :param mu: Mean of the latent distribution.
    :param logvar: Log variance of the latent distribution.
    """
    # Reconstruction loss
    recon_loss = F.mse_loss(recon_x, x, reduction='sum')
    
    # KL divergence
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_div

def create_vae():
    """
    Creates and returns a Variational Autoencoder (VAE) instance.
    """
    return VAE()

In [6]:
import torch
import torch.nn as nn

# Parameters
num_epochs = 20
batch_size = 1
dataset_dir = "/home/blah-buttery/nanoparticles/images/normal"  # gpu workstation image location
#dataset_dir = "/Users/blah_m4/Desktop/nanoparticle/images" # macbook image location

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")

# Get transforms
transform = get_transforms(image_size=(1024, 1024), is_train=True)

# Create dataloaders
train_loader, test_loader = create_dataloaders(dataset_dir, transform, batch_size)

# Validate batch size
if batch_size > len(train_loader.dataset):
    print(f"[WARNING] Batch size ({batch_size}) exceeds dataset size ({len(train_loader.dataset)}). Adjusting batch size.")
    batch_size = len(train_loader.dataset)
'''
# Create VAE model
model = create_vae()
#model.to(device)
model = nn.DataParallel(model).to(device)
#model.to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
'''

[INFO] Using device: cuda


'\n# Create VAE model\nmodel = create_vae()\n#model.to(device)\nmodel = nn.DataParallel(model).to(device)\n#model.to(device)\n\n# Define loss function and optimizer\ncriterion = nn.MSELoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.0001)\n'

In [28]:
#torch.cuda.empty_cache()
num_epochs = 50

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    recon_loss_total = 0.0
    kl_div_total = 0.0

    for batch in train_loader:
        batch = batch.to(device)

        # Forward pass
        reconstructed, mu, logvar = model(batch)
        
        # Compute losses
        recon_loss = nn.functional.mse_loss(reconstructed, batch, reduction='sum')  # Reconstruction loss
        kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())  # KL Divergence
        loss = recon_loss + kl_div

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        recon_loss_total += recon_loss.item()
        kl_div_total += kl_div.item()

    print(f"[INFO] Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss / len(train_loader):.4f}, "
          f"Reconstruction Loss: {recon_loss_total / len(train_loader):.4f}, "
          f"KL Divergence: {kl_div_total / len(train_loader):.4f}")

print("[INFO] Testing complete!")


Input shape: torch.Size([1, 3, 1024, 1024])
Encoded shape: torch.Size([1, 256, 128, 128])
Decoded (before upsampling) shape: torch.Size([1, 256, 128, 128])
Reconstructed shape: torch.Size([1, 3, 1024, 1024])
Final reconstructed shape (after interpolation): torch.Size([1, 3, 1024, 1024])


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 GiB. GPU 0 has a total capacity of 23.69 GiB of which 1.10 GiB is free. Including non-PyTorch memory, this process has 22.57 GiB memory in use. Of the allocated memory 20.25 GiB is allocated by PyTorch, and 1.86 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Training loop without extra information
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        batch = batch.to(device)

        # Forward pass
        reconstructed, mu, logvar = model(batch)
        loss = vae_loss(reconstructed, batch, mu, logvar)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"[INFO] Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss / len(train_loader):.4f}")

print("[INFO] Testing complete!")

In [12]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 5            |        cudaMalloc retries: 5         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  20878 MiB |  20878 MiB |  36886 MiB |  16008 MiB |
|       from large pool |  20875 MiB |  20875 MiB |  36879 MiB |  16004 MiB |
|       from small pool |      3 MiB |      3 MiB |      7 MiB |      4 MiB |
|---------------------------------------------------------------------------|
| Active memory         |  20878 MiB |  20878 MiB |  36886 MiB |  16008 MiB |
|       from large pool |  20875 MiB |  20875 MiB |  36879 MiB |  16004 MiB |
|       from small pool |      3 MiB |      3 MiB |      7 MiB |      4 MiB |
|---------------------------------------------------------------

In [6]:
import torch
from torch.cuda.amp import GradScaler, autocast

def train_vae(model, train_loader, optimizer, num_epochs, accumulation_steps=4, device="cuda"):
    # Initialize mixed precision GradScaler
    scaler = GradScaler()
    model.to(device)
    model.train()

    for epoch in range(num_epochs):
        train_loss = 0.0

        for i, batch in enumerate(train_loader):
            batch = batch.to(device)

            # Forward pass with mixed precision
            optimizer.zero_grad(set_to_none=True)  # Use set_to_none=True for better memory optimization
            with autocast():
                reconstructed, mu, logvar = model(batch)
                loss = vae_loss(reconstructed, batch, mu, logvar)
                loss = loss / accumulation_steps  # Scale loss by accumulation steps

            # Backward pass with mixed precision scaling
            scaler.scale(loss).backward()

            # Perform optimizer step after accumulating gradients
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)  # Reset gradients

            # Accumulate training loss
            train_loss += loss.item() * accumulation_steps  # Rescale to original loss

        avg_loss = train_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    print("[INFO] Training complete!")

In [4]:
import torch
from torch.cuda.amp import GradScaler, autocast

def train_vae(model, train_loader, optimizer, num_epochs, accumulation_steps=2, device="cuda"):
    scaler = GradScaler()
    model.to(device)
    model.train()

    for epoch in range(num_epochs):
        train_loss = 0.0

        for i, batch in enumerate(train_loader):
            batch = batch.to(device)

            # Clear GPU cache to avoid fragmentation
            torch.cuda.empty_cache()

            # Mixed precision training
            optimizer.zero_grad(set_to_none=True)
            with autocast(device_type="cuda"):
                reconstructed, mu, logvar = model(batch)
                loss = vae_loss(reconstructed, batch, mu, logvar)
                loss = loss / accumulation_steps  # Scale loss for gradient accumulation

            # Backward pass
            scaler.scale(loss).backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            # Perform optimizer step after gradient accumulation
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)

            # Accumulate loss
            train_loss += loss.item() * accumulation_steps

        avg_loss = train_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    print("[INFO] Training complete!")


In [7]:
# Define model, optimizer, and dataloader
vae = VAE(in_channels=3, latent_dim=128)
vae = nn.DataParallel(vae).to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
#train_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

# Train the model
train_vae(model=vae, train_loader=train_loader, optimizer=optimizer, num_epochs=10, accumulation_steps=4, device="cuda")


  scaler = GradScaler()
  with autocast(device_type="cuda"):


TypeError: autocast.__init__() got an unexpected keyword argument 'device_type'