# Week 2 â€” LoRA Fine-Tuning of SLM
Align Small Language Model embeddings with Diffusion CLIP Encoder.

In [1]:
import diffusers, huggingface_hub, torch
print("diffusers:", diffusers.__version__)
print("huggingface_hub:", huggingface_hub.__version__)
print("Torch CUDA available:", torch.cuda.is_available())


  from .autonotebook import tqdm as notebook_tqdm


diffusers: 0.27.2
huggingface_hub: 0.24.6
Torch CUDA available: True


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from diffusers import StableDiffusionPipeline
from peft import LoraConfig, get_peft_model, TaskType
import torch, pandas as pd
from tqdm import tqdm

ModuleNotFoundError: No module named 'peft'

In [None]:
slm_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(slm_name)
slm = AutoModelForCausalLM.from_pretrained(slm_name, torch_dtype=torch.float16).to("cuda")
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
clip_encoder = pipe.text_encoder
print("SLM & Diffusion Model Loaded Successfully")

  torch.utils._pytree._register_pytree_node(
Loading pipeline components...:  43%|â–ˆâ–ˆâ–ˆâ–ˆâ–Ž     | 3/7 [00:00<00:00,  4.31it/s]`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
Loading pipeline components...: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7/7 [00:01<00:00,  6.15it/s]


SLM & Diffusion Model Loaded Successfully


In [None]:
data = {"caption": [
    "a sunset over the ocean",
    "a cat sitting on a laptop",
    "a futuristic city skyline",
    "a person walking in rain with umbrella",
] * 25}
df = pd.DataFrame(data)
df.to_csv("../data/mini_text_image_dataset.csv", index=False)
print("Dataset saved")

Dataset saved


In [None]:
config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.05)
slm_lora = get_peft_model(slm, config)
slm_lora.print_trainable_parameters()

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.10229075496156657


In [None]:
def dummy_train(slm, tokenizer, df):
    optimizer = torch.optim.AdamW(slm.parameters(), lr=1e-4)
    slm.train()
    for caption in tqdm(df['caption'][:50], desc="Training"):
        inputs = tokenizer(caption, return_tensors="pt", truncation=True, max_length=128).to("cuda")
        outputs = slm(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print("LoRA Fine-tuning Complete")
dummy_train(slm_lora, tokenizer, df)

Training: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [00:02<00:00, 24.09it/s]

LoRA Fine-tuning Complete





In [None]:
output_dir = "../adapters/slm_lora_adapter/"
slm_lora.save_pretrained(output_dir)
print(f"LoRA adapter saved to {output_dir}")

LoRA adapter saved to ../adapters/slm_lora_adapter/


Evaluation MAtrics


In [3]:
import sys
print(sys.executable)


/home/cse-sdpl/anaconda3/envs/gpu_env/bin/python


In [4]:
import diffusers
print(diffusers.__version__)


0.27.2


In [None]:
import os
os.makedirs("outputs", exist_ok=True)


In [None]:
import os

# Create folder for LoRA adapter
lora_dir = "./adapters/tinyllama_magicbrush_lora"
os.makedirs(lora_dir, exist_ok=True)
print("Created:", lora_dir)


Created: ./adapters/tinyllama_magicbrush_lora


In [None]:
from diffusers import StableDiffusionPipeline
from diffusers import UNet2DConditionModel
from diffusers import DDPMScheduler
from diffusers import AutoencoderKL
from diffusers import LoraConfig
from diffusers import DPMSolverMultistepScheduler
import torch
from PIL import Image
import os

device = "cuda"

print("Loading base Stable Diffusion model...")
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16
).to(device)

# Small dummy dataset (one image)
os.makedirs("dummy_train", exist_ok=True)
Image.new("RGB", (512,512), color="white").save("dummy_train/dummy.png")

# LoRA config
lora_config = LoRAConfig(
    r=4,
    lora_alpha=4,
    lora_dropout=0.1,
    bias="none",
    target_modules=["to_q", "to_v"]
)

# Apply LoRA to UNet
pipe.unet.add_adapter(lora_config)

# VERY small training loop
optimizer = torch.optim.Adam(pipe.unet.parameters(), lr=1e-4)

print("Training a tiny LoRA adapter (10 steps only)...")

for step in range(10):
    img = Image.open("dummy_train/dummy.png")

    latents = pipe.vae.encode(pipe.image_processor(img).unsqueeze(0).to(device)).latent_dist.sample()
    noise = torch.randn_like(latents)
    timesteps = torch.randint(0, 1000, (1,), device=device).long()

    noisy_latents = pipe.scheduler.add_noise(latents, noise, timesteps)

    model_pred = pipe.unet(noisy_latents, timesteps, encoder_hidden_states=torch.randn(1,77,768).to(device)).sample
    loss = torch.nn.functional.mse_loss(model_pred, noise)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Step {step+1}/10 | Loss: {loss.item():.4f}")

# Save LoRA weights
pipe.unet.save_attn_procs("./adapters/tinyllama_magicbrush_lora")
print("âœ” Dummy LoRA saved at adapters/tinyllama_magicbrush_lora")


  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'LoraConfig' from 'diffusers' (/home/cse-sdpl/anaconda3/envs/gpu_env/lib/python3.10/site-packages/diffusers/__init__.py)

In [None]:
import torch
from diffusers import StableDiffusionPipeline
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"

# ------------------------------
# 1. Load CLIP for Evaluation
# ------------------------------
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def compute_clip_score(text, image_path):
    img = Image.open(image_path).convert("RGB")
    inputs = clip_processor(text=[text], images=img, return_tensors="pt", padding=True).to(device)

    with torch.no_grad():
        out = clip_model(**inputs)
        logits = out.logits_per_image.item()
        img_emb = out.image_embeds[0]
        txt_emb = out.text_embeds[0]
        l2_dist = torch.nn.functional.pairwise_distance(img_emb.unsqueeze(0), txt_emb.unsqueeze(0)).item()

    return logits, l2_dist

# ------------------------------
# 2. Generate Base Model Image
# ------------------------------
print("â–¶ Generating BEFORE LoRA image...")

pipe_base = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16
).to(device)

prompt = "A serene sunset over a calm lake with orange sky and reflections."

before_img = pipe_base(prompt, num_inference_steps=25).images[0]
before_path = "outputs/before_lora.png"
before_img.save(before_path)

print("Saved:", before_path)

# ------------------------------
# 3. Generate LoRA Fine-Tuned Image
# ------------------------------
print("â–¶ Generating AFTER LoRA image...")

pipe_lora = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16
).to(device)

# Load your LoRA adapter

print("â–¶ Loading LoRA adapter from local directory...")

lora_dir = "./adapters/tinyllama_magicbrush_lora"  # <-- ensure this exists!

pipe_lora.load_lora_weights(
    lora_dir,
    use_safetensors=True,
    local_files_only=True
)

print("LoRA loaded successfully.")

after_img = pipe_lora(prompt, num_inference_steps=25).images[0]
after_path = "outputs/after_lora.png"
after_img.save(after_path)

print("Saved:", after_path)

# ------------------------------
# 4. Compute CLIP Similarity
# ------------------------------
print("\nâ–¶ Computing CLIP scores ...")

before_score, before_l2 = compute_clip_score(prompt, before_path)
after_score, after_l2   = compute_clip_score(prompt, after_path)

print("\nðŸ“Š Final Comparison")
print("----------------------------")
print(f"Prompt: {prompt}")
print("\nBefore LoRA:")
print(f" - CLIP Similarity: {before_score:.4f}")
print(f" - L2 Distance:     {before_l2:.4f}")

print("\nAfter LoRA:")
print(f" - CLIP Similarity: {after_score:.4f}")
print(f" - L2 Distance:     {after_l2:.4f}")

improvement = after_score - before_score
print("\nâœ¨ Improvement (Î” CLIP Score):", round(improvement, 4))


â–¶ Generating BEFORE LoRA image...


Loading pipeline components...:  29%|â–ˆâ–ˆâ–Š       | 2/7 [00:00<00:01,  3.44it/s]`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
Loading pipeline components...: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7/7 [00:01<00:00,  6.94it/s]
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 25/25 [00:02<00:00,  9.81it/s]


Saved: outputs/before_lora.png
â–¶ Generating AFTER LoRA image...


Loading pipeline components...:  29%|â–ˆâ–ˆâ–Š       | 2/7 [00:00<00:01,  3.18it/s]`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
Loading pipeline components...: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7/7 [00:01<00:00,  6.36it/s]


â–¶ Loading LoRA adapter from local directory...


HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': './adapters/tinyllama_magicbrush_lora'. Use `repo_type` argument if needed.