# Kvantitativna analiza

In [4]:
import os
from pathlib import Path
import torch
import lpips
from pytorch_fid import fid_score
from PIL import Image
import numpy as np
import cv2
from tqdm import tqdm

# metrics
lpips_alex = lpips.LPIPS(net='alex')  # perceptual similarity

device = "cuda" if torch.cuda.is_available() else "cpu"
lpips_alex = lpips_alex.to(device)


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


100%|██████████| 233M/233M [00:01<00:00, 159MB/s]


Loading model from: /usr/local/lib/python3.12/dist-packages/lpips/weights/v0.1/alex.pth


### FID (Fréchet Inception Distance) in LPIPS (Learned Perceptual Image Patch Similarity)

**FID** - Nižje vrednosti pomenijo, da slike izgledajo bolj realistične v primerjavi z referenčnimi slikami. FID rezultate smo uporabili, da preverimo, ali se naše obnovljene glave dobro vključujejo v celotno sliko, brez artefaktov.

**LPIPS** - Nižje vrednosti pomenijo, da so slike vizualno bolj podobne referenčni sliki. LPIPS rezultate smo uporabili za merjenje, kako dobro je vsaka metoda ohranila dejanski videz originalne osebe, ali smo uspešno povrnili njihov pravi obraz ali smo opazljivo spremenili njihov videz.

Uporabljamo FID in LPIPS skupaj, da lahko ugotovimo, katera metoda zagotavlja najboljši balans med ohranjanjem identitete originalne osebe in ustvarjanjem rezultatov, ki izgledajo najbolj realistično.

In [None]:
def get_base_id(fname: str) -> str:
    """Extract base_id like '00002_00' from a filename (works for .jpg and .png)."""
    stem = Path(fname).stem  # remove extension
    parts = stem.split('_')
    return parts[0] + '_' + parts[1]


import shutil, tempfile
def prepare_fid_dirs(gt_dir, gen_dir):
    """
    Create temp dirs with matching counts for FID:
    replicate GT images so they align with generated variations.
    """
    tmp_gt = Path(tempfile.mkdtemp())
    tmp_gen = Path(tempfile.mkdtemp())

    gt_map = {get_base_id(f): f for f in os.listdir(gt_dir)}
    count = 0

    for f in os.listdir(gen_dir):
        base_id = get_base_id(f)
        if base_id not in gt_map:
            continue
        # copy GT (replicated for each generated variation)
        shutil.copy(Path(gt_dir)/gt_map[base_id], tmp_gt/f)
        shutil.copy(Path(gen_dir)/f, tmp_gen/f)
        count += 1

    return tmp_gt, tmp_gen, count


def calculate_fid(gt_dir, gen_dir):
    tmp_gt, tmp_gen, count = prepare_fid_dirs(gt_dir, gen_dir)
    if count == 0:
        print(f"[WARN] No matching images between {gt_dir} and {gen_dir}")
        return None
    fid = fid_score.calculate_fid_given_paths(
        [str(tmp_gt), str(tmp_gen)],
        batch_size=min(16, count),  # prevent batch_size > dataset
        device=device,
        dims=2048
    )
    return fid

def calculate_lpips(img1_path, img2_path):
    """LPIPS distance between two images."""
    img1 = lpips.im2tensor(lpips.load_image(str(img1_path))).to(device)
    img2 = lpips.im2tensor(lpips.load_image(str(img2_path))).to(device)
    return lpips_alex(img1, img2).item()

def calculate_folder_lpips(gt_dir, gen_dir):
    """Average LPIPS, matching generated files to GT by base_id."""
    values = []
    gt_map = {get_base_id(f): f for f in os.listdir(gt_dir)}

    for f in tqdm(sorted(os.listdir(gen_dir))):
        base_id = get_base_id(f)
        if base_id not in gt_map:
            continue

        p1 = Path(gt_dir)/gt_map[base_id]   # ground truth
        p2 = Path(gen_dir)/f               # generated/restored

        if p1.exists() and p2.exists():
            values.append(calculate_lpips(p1, p2))

    return np.mean(values) if values else None



### Primerjave
Ground truth vs Obnovitev:
- Meri, kako blizu so naše obnovljene slike originalnim, resničnim slikam. 
- Pove, katera metoda najbolje poda resnično območje glave/vratu po inpainting-u.



In [10]:
def evaluate_against_gt(gt_dir, method_dirs):
    results = {}
    for method, path in method_dirs.items():
        if method == "gt":  # skip gt itself
            continue
        print(f"--- Evaluating {method} vs GT ---")
        fid = calculate_fid(gt_dir, path)
        lp = calculate_folder_lpips(gt_dir, path)
        results[method] = {"FID": fid, "LPIPS": lp}
    return results


Generirano brez obnovitve vs Obnovitev:
- Meri, katera metoda obnovitve najbolj izboljša generirane slike v primerjavi z generirano sliko (brez obnovitve).
- Pokaže, katera metoda naredi generirane slike boljše in bolj naravne.

In [7]:
def evaluate_against_generated(generated_dir, method_dirs):
    results = {}
    for method, path in method_dirs.items():
        if method == "generated":
            continue  # skip baseline
        print(f"--- Evaluating {method} vs Generated ---")
        fid = calculate_fid(generated_dir, path)
        lp = calculate_folder_lpips(generated_dir, path)
        results[method] = {"FID": fid, "LPIPS": lp}
    return results


In [8]:
import pandas as pd

def summarize_results(results_dict):
    df = pd.DataFrame(results_dict).T
    # rank each column (lower is better)
    rank_df = df.rank(method="min", ascending=True)
    df["AvgRank"] = rank_df.mean(axis=1)
    return df.sort_values("AvgRank")


In [None]:
full_dirs = {
    "gt": Path("output/full/originals"),
    "generated": Path("output/full/generated"),
    "paste": Path("output/full/mask_paste"),
    "alpha": Path("output/full/alpha_blend"),
    "poisson": Path("output/full/poisson_blend"),
    "pyramid": Path("output/full/pyramid_blend"),
}

cropped_dirs = {
    "gt": Path("output/cropped/originals"),
    "generated": Path("output/cropped/generated"),
    "paste": Path("output/cropped/mask_paste"),
    "alpha": Path("output/cropped/alpha_blend"),
    "poisson": Path("output/cropped/poisson_blend"),
    "pyramid": Path("output/cropped/pyramid_blend"),
}


In [17]:
res_full_gt = evaluate_against_gt(full_dirs["gt"], full_dirs)

--- Evaluating generated vs GT ---


100%|██████████| 17/17 [00:03<00:00,  4.55it/s]
100%|██████████| 17/17 [00:04<00:00,  3.75it/s]
100%|██████████| 266/266 [00:13<00:00, 20.26it/s]


--- Evaluating paste vs GT ---


100%|██████████| 17/17 [00:03<00:00,  4.80it/s]
100%|██████████| 17/17 [00:03<00:00,  5.03it/s]
100%|██████████| 266/266 [00:12<00:00, 21.80it/s]


--- Evaluating alpha vs GT ---


100%|██████████| 17/17 [00:03<00:00,  4.50it/s]
100%|██████████| 17/17 [00:03<00:00,  5.02it/s]
100%|██████████| 266/266 [00:12<00:00, 21.75it/s]


--- Evaluating poisson vs GT ---


100%|██████████| 17/17 [00:04<00:00,  3.46it/s]
100%|██████████| 17/17 [00:03<00:00,  5.06it/s]
100%|██████████| 266/266 [00:12<00:00, 20.60it/s]


--- Evaluating pyramid vs GT ---


100%|██████████| 17/17 [00:03<00:00,  4.42it/s]
100%|██████████| 17/17 [00:03<00:00,  5.01it/s]
100%|██████████| 266/266 [00:12<00:00, 21.77it/s]


In [18]:
res_full_gen = evaluate_against_generated(full_dirs["generated"], full_dirs)

--- Evaluating gt vs Generated ---


100%|██████████| 1/1 [00:00<00:00,  3.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 10/10 [00:00<00:00, 21.90it/s]


--- Evaluating paste vs Generated ---


100%|██████████| 17/17 [00:03<00:00,  5.24it/s]
100%|██████████| 17/17 [00:04<00:00,  3.87it/s]
100%|██████████| 266/266 [00:12<00:00, 21.40it/s]


--- Evaluating alpha vs Generated ---


100%|██████████| 17/17 [00:03<00:00,  5.30it/s]
100%|██████████| 17/17 [00:03<00:00,  4.32it/s]
100%|██████████| 266/266 [00:12<00:00, 21.40it/s]


--- Evaluating poisson vs Generated ---


100%|██████████| 17/17 [00:03<00:00,  5.31it/s]
100%|██████████| 17/17 [00:03<00:00,  4.44it/s]
100%|██████████| 266/266 [00:12<00:00, 20.75it/s]


--- Evaluating pyramid vs Generated ---


100%|██████████| 17/17 [00:03<00:00,  5.31it/s]
100%|██████████| 17/17 [00:03<00:00,  4.47it/s]
100%|██████████| 266/266 [00:12<00:00, 21.30it/s]


In [21]:
res_crop_gt = evaluate_against_gt(cropped_dirs["gt"], cropped_dirs)

--- Evaluating generated vs GT ---


100%|██████████| 17/17 [00:01<00:00, 12.38it/s]
100%|██████████| 17/17 [00:01<00:00, 12.14it/s]
100%|██████████| 266/266 [00:06<00:00, 40.37it/s]


--- Evaluating paste vs GT ---


100%|██████████| 17/17 [00:01<00:00, 12.50it/s]
100%|██████████| 17/17 [00:01<00:00, 12.18it/s]
100%|██████████| 266/266 [00:06<00:00, 39.68it/s]


--- Evaluating alpha vs GT ---


100%|██████████| 17/17 [00:01<00:00, 12.46it/s]
100%|██████████| 17/17 [00:01<00:00, 11.79it/s]
100%|██████████| 266/266 [00:06<00:00, 41.67it/s]


--- Evaluating poisson vs GT ---


100%|██████████| 17/17 [00:01<00:00,  8.79it/s]
100%|██████████| 17/17 [00:01<00:00,  9.16it/s]
100%|██████████| 266/266 [00:06<00:00, 43.04it/s]


--- Evaluating pyramid vs GT ---


100%|██████████| 17/17 [00:01<00:00,  9.70it/s]
100%|██████████| 17/17 [00:01<00:00, 12.04it/s]
100%|██████████| 266/266 [00:06<00:00, 40.41it/s]


In [22]:
res_crop_gen = evaluate_against_generated(cropped_dirs["generated"], cropped_dirs)

--- Evaluating gt vs Generated ---


100%|██████████| 1/1 [00:00<00:00,  3.60it/s]
100%|██████████| 1/1 [00:00<00:00,  3.43it/s]
100%|██████████| 10/10 [00:00<00:00, 38.43it/s]


--- Evaluating paste vs Generated ---


100%|██████████| 17/17 [00:01<00:00, 11.85it/s]
100%|██████████| 17/17 [00:01<00:00, 11.77it/s]
100%|██████████| 266/266 [00:06<00:00, 40.39it/s]


--- Evaluating alpha vs Generated ---


100%|██████████| 17/17 [00:01<00:00, 11.30it/s]
100%|██████████| 17/17 [00:01<00:00, 11.82it/s]
100%|██████████| 266/266 [00:06<00:00, 42.44it/s]


--- Evaluating poisson vs Generated ---


100%|██████████| 17/17 [00:02<00:00,  8.03it/s]
100%|██████████| 17/17 [00:01<00:00, 11.40it/s]
100%|██████████| 266/266 [00:06<00:00, 41.26it/s]


--- Evaluating pyramid vs Generated ---


100%|██████████| 17/17 [00:01<00:00, 11.71it/s]
100%|██████████| 17/17 [00:01<00:00, 11.89it/s]
100%|██████████| 266/266 [00:06<00:00, 38.05it/s]


In [23]:
# Summaries
print("=== Full vs GT ===")
display(summarize_results(res_full_gt))

print("=== Full vs Generated ===")
display(summarize_results(res_full_gen))

print("=== Cropped vs GT ===")
display(summarize_results(res_crop_gt))

print("=== Cropped vs Generated ===")
display(summarize_results(res_crop_gen))


=== Full vs GT ===


Unnamed: 0,FID,LPIPS,AvgRank
poisson,152.474214,0.254614,1.5
generated,151.977768,0.256002,3.0
paste,152.804234,0.254794,3.0
pyramid,152.61318,0.254826,3.0
alpha,152.991133,0.254969,4.5


=== Full vs Generated ===


Unnamed: 0,FID,LPIPS,AvgRank
alpha,147.67371,0.242443,1.5
gt,158.830482,0.237106,3.0
pyramid,147.830526,0.242653,3.0
poisson,147.794723,0.24272,3.5
paste,147.853264,0.242667,4.0


=== Cropped vs GT ===


Unnamed: 0,FID,LPIPS,AvgRank
poisson,106.438041,0.126213,1.5
pyramid,106.001031,0.127517,1.5
paste,106.884987,0.12898,3.0
generated,109.672767,0.147739,4.5
alpha,109.889544,0.131818,4.5


=== Cropped vs Generated ===


Unnamed: 0,FID,LPIPS,AvgRank
alpha,85.327761,0.133496,1.0
pyramid,86.125258,0.135767,2.0
poisson,86.523684,0.137034,3.5
paste,86.82577,0.135935,3.5
gt,119.541909,0.140785,5.0


### Rezultati

#### Full & Ground Truth:

Poissonovo zlivanje je imelo najboljšo povprečno uvrstitev (1,5), z FID = 152,47 in LPIPS = 0,2546.
Druge metode (lepljenje, piramidno, alfa) so imele podobne rezultate, vendar nekoliko višje uvrstitve.
- Poissonovo zlivanje se globalno najbolj ujema z resničnimi slikami, vendar vse metode mešanja delujejo podobno.

#### Full & Generated:

Alfa zlivanje je imelo najboljšo povprečno uvrstitev (1,5), z FID = 147,67 in LPIPS = 0,2424.
Piramidno, poissonovo in lepljenje so bili tesno za njim.
- Alfa zlivanje najbolj izboljša splošni videz generiranih slik.

#### Cropped & Ground Truth:

Poissonovo in piramidno zlivanje sta imela najboljšo povprečno uvrstitev (1,5), z FID ≈ 106 in LPIPS ≈ 0,126–0,127.
- Ti metodi najbolje obnovita območje glave/vratu, da se ujema z originalno sliko.

#### Cropped & Generated:
Alfa zlivanje je imelo najboljšo povprečno uvrstitev (1,0), z FID = 85,33 in LPIPS = 0,1335.
Piramidno in poissonovo zlivanje sta tesno sledila.
- Alfa zlivanje najbolj izboljša generirano območje glave/vratu, zaradi česar izgleda bolj naravno.


#### Povzetek

Naša kvantitativna analiza uspešno dokazuje, da metode obnovitve glave izboljšajo izgled in naravnost slike pri generiranju oblačil z stable diffusion modelom. Čeprav trendi nakazujejo, da Poissonovo mešanje lahko ponuja rahle zaznavne prednosti, vsi trije pristopi mešanja zagotavljajo primerljivo in učinkovito kakovost obnovitve.