In [2]:
from diffusers import StableDiffusionPipeline
from peft import get_peft_model, LoraConfig
import torch


## Load Pretrained Stable Diffusion Model

In [3]:
model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)


Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Fetching 16 files:   6%|▋         | 1/16 [00:00<00:01,  8.54it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install

## Configure LoRA for Fine-Tuning

In [5]:
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["attn1", "attn2"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)


In [9]:
from datasets import load_dataset
from PIL import Image
import requests
from io import BytesIO
from torchvision import transforms
from datasets import Dataset
import warnings
warnings.filterwarnings("ignore")

# Load 1% subset of LAION
dataset = load_dataset("laion/laion2B-en", split="train[:1%]")

# Keep only entries with valid TEXT and URL
dataset = dataset.filter(lambda x: x.get("TEXT") and x.get("URL"))

# Define preprocessing function
def process_sample(example):
    try:
        # Download image
        response = requests.get(example["URL"], timeout=3)
        image = Image.open(BytesIO(response.content)).convert("RGB")
        
        # Resize to 512x512
        image = image.resize((512, 512))
        
        return {
            "image": image,
            "text": example["TEXT"]
        }
    except Exception as e:
        return {"image": None, "text": None}  # Will be filtered out

# Apply processing
dataset = dataset.map(process_sample, remove_columns=dataset.column_names)

# Filter out failures
dataset = dataset.filter(lambda x: x["image"] is not None and x["text"] is not None)

# (Optional) Convert PIL image to tensor
to_tensor = transforms.ToTensor()

def convert_to_tensor(example):
    example["image"] = to_tensor(example["image"])
    return example

dataset = dataset.map(convert_to_tensor)

# Now: dataset[i] = { "image": tensor([3, 512, 512]), "text": str }


DatasetNotFoundError: Dataset 'laion/laion2B-en' is a gated dataset on the Hub. You must be authenticated to access it.

In [6]:
pipe.unet = get_peft_model(pipe.unet, lora_config)


ValueError: Target module Attention(
  (to_q): Linear(in_features=320, out_features=320, bias=False)
  (to_k): Linear(in_features=320, out_features=320, bias=False)
  (to_v): Linear(in_features=320, out_features=320, bias=False)
  (to_out): ModuleList(
    (0): Linear(in_features=320, out_features=320, bias=True)
    (1): Dropout(p=0.0, inplace=False)
  )
) is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `torch.nn.Embedding`, `torch.nn.Conv1d`, `torch.nn.Conv2d`, `torch.nn.Conv3d`, `transformers.pytorch_utils.Conv1D`, `torch.nn.MultiheadAttention.`.

## Prepare Dataset for Fine-Tuning

In [None]:
from datasets import load_dataset
dataset = load_dataset("laion/laion2B-en", split="train[:1%]")  # 1% subset


## Fine-Tune the Model

In [None]:
training_args = {
    "learning_rate": 1e-4,
    "batch_size": 4,
    "num_epochs": 3
}


In [None]:
train_model(pipe, processed_dataset, training_args)


## Save and Load Fine-Tuned Model

In [None]:
pipe.unet.save_pretrained("lora_weights")

In [None]:
## Inference 

In [None]:
prompt = "A fantasy landscape with mountains and rivers"
image = pipe(prompt).images[0]
image.save("generated_image.png")
