## Installations

In [1]:
# !pip install diffusers transformers
# !pip install accelerate

In [2]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
from diffusers import StableDiffusionPipeline, DDPMScheduler

## Data

In [3]:
class BlurDataset(Dataset):
    def __init__(self, blur_dir, non_blur_dir, transform=None):
        self.blur_dir = blur_dir
        self.non_blur_dir = non_blur_dir
        self.blur_images = sorted(os.listdir(blur_dir))
        self.non_blur_images = sorted(os.listdir(non_blur_dir))
        self.transform = transform

    def __len__(self):
        return len(self.blur_images)

    def __getitem__(self, idx):
        blur_image = Image.open(os.path.join(self.blur_dir, self.blur_images[idx]))
        non_blur_image = Image.open(os.path.join(self.non_blur_dir, self.non_blur_images[idx]))

        if self.transform:
            blur_image = self.transform(blur_image)
            non_blur_image = self.transform(non_blur_image)

        return blur_image, non_blur_image

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

dataset = BlurDataset('/content/drive/MyDrive/QR dataset/Images/Blur', '/content/drive/MyDrive/QR dataset/Images/Digital', transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

In [4]:
len(dataset), len(dataloader)

(801, 201)

## Code

In [5]:
import torch
from diffusers import DDPMScheduler, UNet2DConditionModel, AutoencoderKL
from transformers import CLIPTextModel, CLIPTokenizer

# Load pretrained model components
model_id = "CompVis/stable-diffusion-v1-4"

# Initialize the text encoder and tokenizer
tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder")

# Initialize the VAE
vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae")

# Initialize the UNet
unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet")

# Initialize the scheduler
scheduler = DDPMScheduler.from_pretrained(model_id, subfolder="scheduler")

# Optimizer
optimizer = torch.optim.Adam(unet.parameters(), lr=1e-5)

# Move models to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
unet.to(device)
vae.to(device)
text_encoder.to(device)

# Define training loop
num_epochs = 5  # Adjust the number of epochs as needed

for epoch in range(num_epochs):
    for blur_images, non_blur_images in dataloader:
        optimizer.zero_grad()

        # Move images to GPU
        blur_images = blur_images.to(device)
        non_blur_images = non_blur_images.to(device)

        # Generate latents for the blurry images
        with torch.no_grad():
            blur_latents = vae.encode(blur_images).latent_dist.sample()
            blur_latents = blur_latents * 0.18215

        # Prepare timestep and encoder hidden states
        timesteps = torch.randint(0, scheduler.num_train_timesteps, (blur_latents.shape[0],), device=device).long()

        # Generate text embeddings for conditioning
        input_text = ["a photo of a sharp image"] * blur_latents.shape[0]
        text_inputs = tokenizer(input_text, padding="max_length", max_length=tokenizer.model_max_length, return_tensors="pt")
        text_embeddings = text_encoder(text_inputs.input_ids.to(device))[0]

        # Forward pass through the UNet
        noise_pred = unet(blur_latents, timesteps, encoder_hidden_states=text_embeddings)["sample"]

        # Decode the noise prediction to pixel space
        reconstructed_latents = blur_latents - noise_pred  # Subtract the predicted noise from the input latents
        reconstructed_images = vae.decode(reconstructed_latents / 0.18215).sample


        # Calculate loss (mean squared error)
        loss = torch.nn.functional.mse_loss(reconstructed_images, non_blur_images)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 9.06 MiB is free. Process 113366 has 14.74 GiB memory in use. Of the allocated memory 14.41 GiB is allocated by PyTorch, and 179.09 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)