In [11]:

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import os
from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer


  from .autonotebook import tqdm as notebook_tqdm


In [None]:


# Custom dataset class for loading and converting grayscale images to 3-channel RGB
class NPYImageDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_files = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith('.npy')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        image_path = self.image_files[index]
        image = np.load(image_path)  # Load the .npy file

        # Assuming the image is stored as a 2D array (for grayscale)
        # Convert it to a 3D array with 1 channel repeated 3 times for compatibility
        if len(image.shape) == 2:  # Grayscale image, needs conversion
            image = np.repeat(image[:, :, np.newaxis], 3, axis=2)  # Convert to 3-channel

        # Convert image to PyTorch tensor and apply transforms if any
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0  # Normalize to [0, 1]
        if self.transform:
            image = self.transform(image)
        return image

# Transformation setup for resizing and normalizing 3-channel images
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize for 3 channels
])

# Initialize dataset and dataloader for training
image_folder = './Samples'  # Update this path
dataset = NPYImageDataset(image_folder=image_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the Unet model
model = Unet(
    dim = 64,
    dim_mults = (1, 2, 4, 8),
    flash_attn = True
)

# Initialize the GaussianDiffusion process
diffusion = GaussianDiffusion(
    model,
    image_size = 128,
    timesteps = 1000,           
    sampling_timesteps = 250    
)

# Initialize the trainer
trainer = Trainer(
    diffusion,
    folder = image_folder,  # The folder path is required but not directly used in this script
    train_batch_size = 32,
    train_lr = 8e-5,
    train_num_steps = 700000,       
    gradient_accumulate_every = 2,   
    ema_decay = 0.995,                
    amp = True,  # Automated Mixed Precision for faster training
    calculate_fid = True  # If you have set up FID calculation
)

# Train the model
trainer.train()

# Note: This script assumes the `Trainer` class from `denoising_diffusion_pytorch` is used as-is
# and is capable of directly handling the provided dataloader. Adjustments might be necessary
# if the `Trainer` class does not directly support this usage.


In [None]:
class NPYImageDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_files = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith('.npy')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        image_path = self.image_files[index]
        image = np.load(image_path)  # Load the .npy file

        # Assuming the image is stored as a 2D array (for grayscale)
        # Convert it to a 3D array with 1 channel repeated 3 times for compatibility
        if len(image.shape) == 2:  # Grayscale image, needs conversion
            image = np.repeat(image[:, :, np.newaxis], 3, axis=2)  # Convert to 3-channel

        # Convert image to PyTorch tensor and apply transforms if any
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0  # Normalize to [0, 1]
        if self.transform:
            image = self.transform(image)
        return image

In [None]:
image_folder = './Samples'  # Update this path
dataset = NPYImageDataset(image_folder=image_folder, transform=transform)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [3]:
import os
import numpy as np

directory = './Samples'
numpy_list = []

for file_name in os.listdir(directory):
    if file_name.endswith('.npy'):
        file_path = os.path.join(directory, file_name)
        image_np = np.load(file_path)

        # Assuming the loaded image shape is (1, 150, 150), i.e., grayscale with a single channel
        if image_np.shape == (1, 150, 150):
            # Remove the channel dimension
            image_np = np.squeeze(image_np, axis=0)
            # Convert grayscale to RGB by repeating the single channel 3 times
            image_np = np.repeat(image_np[np.newaxis, :, :], 3, axis=0)

        # Append the RGB numpy array directly to the list
        numpy_list.append(image_np)

# Now, `numpy_list` contains all your images as numpy arrays of shape (3, 150, 150)


In [4]:
numpy_list = np.array(numpy_list)
print(numpy_list.shape)

(10000, 3, 150, 150)


In [5]:
import torch

# Convert the numpy arrays to PyTorch tensors
tensor_list = [torch.tensor(image_np, dtype=torch.float32) for image_np in numpy_list]

# Normalize the tensors to have values between 0 and 1 if they aren't already
tensor_list = [tensor / 255. for tensor in tensor_list]


In [18]:
type(tensor_list[0])

torch.Tensor

In [6]:
from torch.utils.data import Dataset

class CustomTensorDataset(Dataset):
    def __init__(self, tensor_list):
        self.tensor_list = tensor_list

    def __len__(self):
        return len(self.tensor_list)

    def __getitem__(self, idx):
        return self.tensor_list[idx]


In [7]:
# Initialize the dataset with your list of tensors
dataset = CustomTensorDataset(tensor_list)


In [16]:
type(dataset[0])

torch.Tensor

In [9]:
from torch.utils.data import DataLoader

# Create a DataLoader
batch_size = 32  # Set the batch size
shuffle = True   # Shuffle the dataset at every epoch
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


In [29]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torchvision.transforms import Compose, Resize, Lambda
from denoising_diffusion_pytorch import Unet, GaussianDiffusion
import torchvision.transforms.functional as TF

# Assuming images_np is your numpy array with shape (10000, 3, 150, 150)
# For demonstration, creating a dummy numpy array with random values

# Step 1: Convert numpy array to PyTorch tensor
images_tensor = tensor_list

# Step 2: Define a transform to resize images to 128x128
transform = Compose([
    Lambda(lambda x: TF.to_pil_image(x)),
    Resize((128, 128)),  # Convert to PIL Image to use torchvision's Resize                  # Resize image to 128x128
    Lambda(lambda x: TF.to_tensor(x)),     # Convert back to tensor
])

# Apply the transform to each image in the tensor
images_tensor_resized = torch.stack([transform(img) for img in images_tensor])

# Step 3: Create a Dataset and DataLoader
dataset = TensorDataset(images_tensor_resized)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the Unet model
model = Unet(
    dim=64,
    dim_mults=(1, 2, 4, 8),
    flash_attn=True
)

# Initialize the GaussianDiffusion process
diffusion = GaussianDiffusion(
    model,
    image_size=128,
    timesteps=1000  # Number of steps
)

# Define an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop (simplified example)
num_epochs = 3  # Set the number of epochs as needed
for epoch in range(num_epochs):
    for batch in data_loader:
        optimizer.zero_grad()
        images = batch[0]  # Extract images tensor from the batch
        loss = diffusion(images)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters
    print(f"Epoch {epoch + 1}: Loss = {loss.item()}")

# After training, sample images from the model
sampled_images = diffusion.sample(batch_size=8)
print(f"Sampled images shape: {sampled_images.shape}")  # Should be (8, 3, 128, 128)


Epoch 1: Loss = 0.011305714026093483
