In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.cuda as cuda
import torch.nn.functional as F
from transformers import CLIPModel, CLIPProcessor
import cv2
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from peft import LoraConfig, get_peft_model
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

In [3]:
!nvidia-smi

Sat Jan 25 04:53:12 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
device = torch.device("cuda" if cuda.is_available() else "cpu")

In [6]:
import torch.nn.functional as F

In [7]:
class CustomImageVariationLoss(torch.nn.Module):
    def __init__(self, clip_model_name='openai/clip-vit-base-patch32'):
        super().__init__()
        self.clip_model = CLIPModel.from_pretrained(clip_model_name)
        self.clip_processor = CLIPProcessor.from_pretrained(clip_model_name)

    def extract_edges(self, image):
        if isinstance(image, torch.tensor):
            image = image.permute(1, 2, 0).numpy()
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        edges = cv2.Canny(gray_image, threshold1=100, threshold2=200)
        return torch.from_numpy(edges).float()/255.0

    def text_image_alignment_loss(self, generated_image, text_prompt):
        # Compute CLIP text-image similarity
        inputs = self.clip_processor(text=text_prompt, images=generated_image, return_tensors="pt", padding=True)
        outputs = self.clip_model(**inputs)
        return -outputs.logits_per_image.mean()

    def structural_preservation_loss(self, original_image, generated_image):
        # Compute structural similarity using edge detection and feature matching
        original_edges = self.extract_edges(original_image)
        generated_edges = self.extract_edges(generated_image)

        # Structural preservation metric
        structural_loss = F.mse_loss(original_edges, generated_edges)
        return structural_loss

    def forward(self, original_image, generated_image, text_prompt):
        text_alignment = self.text_image_alignment_loss(generated_image, text_prompt)
        structural_preserve = self.structural_preservation_loss(original_image, generated_image)

        # Weighted combination of losses
        total_loss = 0.6 * text_alignment + 0.4 * structural_preserve
        return total_loss

In [13]:
def setup_lora_model(base_model_path):
    # Configure LoRA parameters
    lora_config = LoraConfig(
        r=16,  # Rank of low-rank adaptation
        lora_alpha=32,  # Scaling factor
        target_modules=["to_q", "to_v"],
        lora_dropout=0.1,
        bias="none"
    )

    # Load base Stable Diffusion model
    model = StableDiffusionPipeline.from_pretrained(base_model_path)

    unet = model.unet

    lora_model = get_peft_model(unet, lora_config)

    model.unet = lora_model

    return model

In [9]:
class CarpetWallpaperDataset(Dataset):
    def __init__(self, csv_path, base_image_dir):
        self.dataframe = pd.read_csv(csv_path, header=None, names=['image_path', 'text_prompt'])
        self.base_image_dir = base_image_dir

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Construct full image path
        relative_path = self.dataframe.iloc[idx]['image_path']
        full_image_path = os.path.join(self.base_image_dir, relative_path)

        # Load image
        image = Image.open(full_image_path).convert('RGB')

        # Get corresponding text prompt
        text_prompt = self.dataframe.iloc[idx]['text_prompt']

        return {
            'image': image,
            'text_prompt': text_prompt
        }

In [10]:
def prepare_dataloader(csv_path, base_image_dir, batch_size=4):
    dataset = CarpetWallpaperDataset(csv_path, base_image_dir)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True
    )

    return dataloader

In [19]:
# Modified training function to work with the new dataset
def train_image_variation_model(
    csv_path,
    base_image_dir,
    custom_loss_fn,
    num_epochs=5,
    learning_rate=1e-4
):
    # Setup model and optimizer
    lora_model = setup_lora_model("stabilityai/stable-diffusion-xl-base-1.0")

    pipeline = lora_model
    pipeline.to(device)

    scaler = torch.cuda.amp.GradScaler()

    optimizer = torch.optim.AdamW(lora_model.unet.parameters(), lr=learning_rate)

    # Prepare DataLoader
    dataloader = prepare_dataloader(csv_path, base_image_dir)

    # Training loop
    for epoch in range(num_epochs):
        for batch in dataloader:
            images = batch['image'].to(device)
            prompts = batch['text_prompt']

            # Image-to-image generation using mixed precision training
            with torch.cuda.amp.autocast():
                generated_images = pipeline(
                    prompt=prompts,
                    image=images,
                    strength=0.75,  # Controls image variation intensity
                    guidance_scale=7.5
                ).images

            # Compute custom loss for each image in batch
            total_loss = 0
            for orig_img, gen_img, prompt in zip(images, generated_images, prompts):
                batch_loss = custom_loss_fn(
                    original_image=orig_img,
                    generated_image=gen_img,
                    text_prompt=prompt
                )
                total_loss += batch_loss

            # Backpropagate and update model
            scaler.scale(total_loss).backward()
            scaler.step(optimizer)
            scaler.update()

        print(f"Epoch {epoch+1}/{num_epochs} completed")

    return lora_model

In [20]:
# Usage example
if __name__ == "__main__":
    # Paths to configure
    CSV_PATH = '/content/drive/MyDrive/Carpets/desc.csv'
    BASE_IMAGE_DIR = '/content/drive/MyDrive/Carpets'

    # Initialize custom loss function
    custom_loss_fn = CustomImageVariationLoss()

    # Train the model
    trained_model = train_image_variation_model(
        csv_path=CSV_PATH,
        base_image_dir=BASE_IMAGE_DIR,
        custom_loss_fn=custom_loss_fn
    )

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


OutOfMemoryError: CUDA out of memory. Tried to allocate 10.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 5.06 MiB is free. Process 3062 has 14.74 GiB memory in use. Of the allocated memory 14.31 GiB is allocated by PyTorch, and 342.37 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)