In [1]:

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import os
from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer


  from .autonotebook import tqdm as notebook_tqdm


In [1]:
import torch
print(torch.__version__)

2.2.2+cu121


In [2]:
import os
import numpy as np

directory = './Samples'
numpy_list = []

for file_name in os.listdir(directory):
    if file_name.endswith('.npy'):
        file_path = os.path.join(directory, file_name)
        image_np = np.load(file_path)

        # Assuming the loaded image shape is (1, 150, 150), i.e., grayscale with a single channel
        if image_np.shape == (1, 150, 150):
            # Remove the channel dimension
            image_np = np.squeeze(image_np, axis=0)
            # Convert grayscale to RGB by repeating the single channel 3 times
            image_np = np.repeat(image_np[np.newaxis, :, :], 3, axis=0)

        # Append the RGB numpy array directly to the list
        numpy_list.append(image_np)

# Now, `numpy_list` contains all your images as numpy arrays of shape (3, 150, 150)


In [3]:
numpy_list = np.array(numpy_list)
print(numpy_list.shape)

(10000, 3, 150, 150)


In [4]:
import torch

# Convert the numpy arrays to PyTorch tensors
tensor_list = [torch.tensor(image_np, dtype=torch.float32) for image_np in numpy_list]

# Normalize the tensors to have values between 0 and 1 if they aren't already
tensor_list = [tensor / 255. for tensor in tensor_list]


In [5]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torchvision.transforms import Compose, Resize, Lambda
from denoising_diffusion_pytorch import Unet, GaussianDiffusion
import torchvision.transforms.functional as TF

# Assuming images_np is your numpy array with shape (10000, 3, 150, 150)
# For demonstration, creating a dummy numpy array with random values

# Step 1: Convert numpy array to PyTorch tensor
images_tensor = tensor_list

# Step 2: Define a transform to resize images to 128x128
transform = Compose([
    Lambda(lambda x: TF.to_pil_image(x)),
    Resize((128, 128)),  # Convert to PIL Image to use torchvision's Resize                  # Resize image to 128x128
    Lambda(lambda x: TF.to_tensor(x)),     # Convert back to tensor
])

# Apply the transform to each image in the tensor
images_tensor_resized = torch.stack([transform(img) for img in images_tensor])

# Step 3: Create a Dataset and DataLoader
dataset = TensorDataset(images_tensor_resized)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)



In [6]:
model = Unet(
    dim=64,
    dim_mults=(1, 2, 4, 8),
    flash_attn=True
)
# Initialize the GaussianDiffusion process
diffusion = GaussianDiffusion(
    model,
    image_size=128,
    timesteps=1000  # Number of steps\
)

# Define an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Non-A100 GPU detected, using math or mem efficient attention if input tensor is on cuda


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [12]:
model = model.to(device)  # Move model to the GPU
diffusion = diffusion.to(device)  # Ensure diffusion model is also on the GPU


In [13]:
# Training loop (simplified example)
# num_epochs = 3  # Set the number of epochs as needed
# for epoch in range(num_epochs):
#     for batch in data_loader:
#         optimizer.zero_grad()
#         images = batch[0]  # Extract images tensor from the batch
#         loss = diffusion(images)  # Compute loss
#         loss.backward()  # Backpropagation
#         optimizer.step()  # Update model parameters
#     print(f"Epoch {epoch + 1}: Loss = {loss.item()}")

import os
from torchvision.utils import save_image
from tqdm import tqdm

#run on gpu
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

num_epochs = 3  # Set the number of epochs as needed
save_image_interval = 1  # How often to save generated images (every N epochs)

# Directory for saving generated images
generated_images_dir = './results'
os.makedirs(generated_images_dir, exist_ok=True)

for epoch in range(num_epochs):
    epoch_loss = 0.0  # To accumulate loss over the epoch
    for batch in tqdm(data_loader,desc="Training",total=len(data_loader)):
        optimizer.zero_grad()
        images = batch[0].to(device)  # Extract images tensor from the batch
        loss = diffusion(images).to(device)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        epoch_loss += loss.item()

    # Average loss for the epoch
    avg_epoch_loss = epoch_loss / len(data_loader)
    print(f"Epoch {epoch + 1}: Avg Loss = {avg_epoch_loss}")

    # Periodically generate and save images to monitor progress
    if (epoch + 1) % save_image_interval == 0:
        with torch.no_grad():  # No need to track gradients for image generation
            sampled_images = diffusion.sample(batch_size=16)
            for i, img in enumerate(sampled_images):
                img_path = os.path.join(generated_images_dir, f'epoch_{epoch + 1}_image_{i}.png')
                save_image(img, img_path)

# After training, it's also helpful to evaluate the model using more comprehensive metrics like FID score.
# Ensure you have a separate script or process to calculate FID against a set of real images.


Training: 100%|██████████| 313/313 [13:35<00:00,  2.61s/it]


Epoch 1: Avg Loss = 0.08111970683637137


sampling loop time step: 100%|██████████| 1000/1000 [01:59<00:00,  8.36it/s]
Training: 100%|██████████| 313/313 [13:39<00:00,  2.62s/it]


Epoch 2: Avg Loss = 0.011547146545062526


sampling loop time step: 100%|██████████| 1000/1000 [02:01<00:00,  8.26it/s]
Training: 100%|██████████| 313/313 [13:32<00:00,  2.60s/it]


Epoch 3: Avg Loss = 0.0066943497035485775


sampling loop time step: 100%|██████████| 1000/1000 [02:02<00:00,  8.15it/s]


In [None]:
# After training, sample images from the model
# sampled_images = diffusion.sample(batch_size=8)
# print(f"Sampled images shape: {sampled_images.shape}")  # Should be (8, 3, 128, 128)

In [None]:
# from torchvision.utils import save_image

# # Assume `sampled_images` is your tensor of generated images
# for i, img in enumerate(sampled_images):
#     # Define a path for each image
#     img_path = os.path.join(results_dir, f'generated_image_{i}.png')
#     # Save the image
#     save_image(img, img_path)

In [15]:
from pytorch_fid.fid_score import calculate_fid_given_paths

# Paths to the directories containing real and generated images
paths = ['./Samples', './results']

# # The device to run the calculation on, 'cuda' or 'cpu'
# device = 'cuda'

# Calculate the FID score
fid_value = calculate_fid_given_paths(paths, batch_size=50, dims=2048,device=device)
print('FID score:', fid_value)


Downloading: "https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth" to /home/harsh/.cache/torch/hub/checkpoints/pt_inception-2015-12-05-6726825d.pth
100%|██████████| 91.2M/91.2M [00:06<00:00, 15.2MB/s]




ValueError: batch_size should be a positive integer value, but got batch_size=0