# Task 6 Metrics & Validation
Evaluate LoRA fine-tuned diffusion model with FID (optional small sample) and CLIP similarity.

In [1]:
%pip install -q torch-fidelity open-clip-torch

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import torch, json, numpy as np
from pathlib import Path
from torchvision.utils import save_image
from torch_fidelity import calculate_metrics
DEVICE='cuda' if torch.cuda.is_available() else 'cpu'
LORA_DIR=Path('task6_sd_lora/unet_lora')
assert LORA_DIR.exists(), 'Run task6_diffusion_finetune.ipynb first.'

## 1. Load Base + Apply LoRA

In [3]:
from diffusers import StableDiffusionPipeline
from peft import PeftModel
pipe=StableDiffusionPipeline.from_pretrained('runwayml/stable-diffusion-v1-5', torch_dtype=torch.float16 if DEVICE=='cuda' else torch.float32).to(DEVICE)
# Load LoRA weights from the correct directory containing adapter_config.json
pipe.unet = PeftModel.from_pretrained(pipe.unet, str(LORA_DIR))
pipe.unet.eval()

  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

PeftModel(
  (base_model): LoraModel(
    (model): UNet2DConditionModel(
      (conv_in): Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (time_proj): Timesteps()
      (time_embedding): TimestepEmbedding(
        (linear_1): Linear(in_features=320, out_features=1280, bias=True)
        (act): SiLU()
        (linear_2): Linear(in_features=1280, out_features=1280, bias=True)
      )
      (down_blocks): ModuleList(
        (0): CrossAttnDownBlock2D(
          (attentions): ModuleList(
            (0-1): 2 x Transformer2DModel(
              (norm): GroupNorm(32, 320, eps=1e-06, affine=True)
              (proj_in): Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1))
              (transformer_blocks): ModuleList(
                (0): BasicTransformerBlock(
                  (norm1): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
                  (attn1): Attention(
                    (to_q): lora.Linear(
                      (base_layer): Linear(in_feat

## 2. Generate Sample & Reference Sets

In [5]:
prompts=['a cute pastel pokemon','a fiery red pokemon','a blue aquatic creature']
gen_dir=Path('task6_gen'); ref_dir=Path('task6_ref'); gen_dir.mkdir(exist_ok=True); ref_dir.mkdir(exist_ok=True)
# Create reference images only once
if not any(ref_dir.iterdir()):
    base=StableDiffusionPipeline.from_pretrained('runwayml/stable-diffusion-v1-5').to(DEVICE)
    for i,p in enumerate(prompts):
        img=base(p, num_inference_steps=25).images[0]
        (ref_dir/f'{i}.png').parent.mkdir(exist_ok=True, parents=True)
        img.save(ref_dir/f'{i}.png')
# Generate (or reuse) LoRA images; skip ones already present to avoid FileNotFound later
for i,p in enumerate(prompts):
    out_path = gen_dir/f'{i}.png'
    if not out_path.exists():
        img=pipe(p, num_inference_steps=25).images[0]
        out_path.parent.mkdir(exist_ok=True, parents=True)
        img.save(out_path)
len(list(gen_dir.iterdir()))

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

3

## 3. CLIP Similarity

In [6]:
import open_clip, PIL.Image as Image, torchvision.transforms as T
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
tokenizer = open_clip.get_tokenizer('ViT-B-32')
model.to(DEVICE).eval()
clip_scores=[]
for p in prompts:
    img=Image.open(gen_dir/f'{prompts.index(p)}.png').convert('RGB')
    image_input=preprocess(img).unsqueeze(0).to(DEVICE)
    text_input=tokenizer([p]).to(DEVICE)
    with torch.no_grad():
        im_f, tx_f = model.encode_image(image_input), model.encode_text(text_input)
        im_f/=im_f.norm(dim=-1, keepdim=True); tx_f/=tx_f.norm(dim=-1, keepdim=True)
        sim=(im_f*tx_f).sum().item(); clip_scores.append(sim)
clip_scores

open_clip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/repos/f4/91/f49112076c029a4dafd8d687e7bfe82825a896cd9f38c366cf9b6b8799a86f32/e6d1bd7789aa45192b3bf90570a789b478bae1b74ebcce7eddd908e83a2b7c31?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27open_clip_model.safetensors%3B+filename%3D%22open_clip_model.safetensors%22%3B&Expires=1755425521&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1NTQyNTUyMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy9mNC85MS9mNDkxMTIwNzZjMDI5YTRkYWZkOGQ2ODdlN2JmZTgyODI1YTg5NmNkOWYzOGMzNjZjZjliNmI4Nzk5YTg2ZjMyL2U2ZDFiZDc3ODlhYTQ1MTkyYjNiZjkwNTcwYTc4OWI0NzhiYWUxYjc0ZWJjY2U3ZWRkZDkwOGU4M2EyYjdjMzE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=QUKz2Mh8AsKXEGp2fP51CoIyIHSD25c4TKs4USaPs6WoAWtMamhtA3L9VQo5IomaEK57WQuWAZQbt1GFNpwH4G83k2FsoBtSlzroQudvKut-p1xUWHVtWHMG10EEp0YOxLGe-2uSakBpc7Dkgdp6qtXQCisQ6klVQ3RQJrldRUh9sMSFTt%7EslHeMG9YolGPQ%7EC4Qb1wRmk00ZXdbL3AmJsduHq6IvWUiiYqwZYHk1hQBLE5xyt6

open_clip_model.safetensors:  94%|#########3| 566M/605M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


[0.27844351530075073, 0.3201940059661865, 0.32271507382392883]

## 4. (Optional) FID (tiny set - not statistically robust)

In [8]:
# Explicitly control CUDA usage: torch-fidelity defaults to cuda=True which breaks on CPU-only builds
metrics = calculate_metrics(
	input1=str(gen_dir),
	input2=str(ref_dir),
	fid=True,
	verbose=False,
	cuda=(DEVICE == 'cuda')
)
metrics

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())).view(height, width, 3)


{'frechet_inception_distance': 445.96179130944034}

## 5. Save Metrics

In [9]:
res={'clip_mean': float(np.mean(clip_scores)), 'clip_scores': clip_scores, 'fid': metrics.get('frechet_inception_distance')} 
json.dump(res, open('task6_metrics.json','w'), indent=2); res

{'clip_mean': 0.3071175316969554,
 'clip_scores': [0.27844351530075073, 0.3201940059661865, 0.32271507382392883],
 'fid': 445.96179130944034}

## 6. Summary
Metrics saved to task6_metrics.json (note: tiny sample; enlarge for reliability).