In [14]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from glob import glob
from pytorch_fid.inception import InceptionV3
from pytorch_fid.fid_score import calculate_frechet_distance
from scipy import linalg
import os

# ----- CONFIG -----
real_dir = "./data/dataset_real"
gen_dir = "./data/dataset_lora_ti_controlnet_refined"
dims = 2048
image_size = 299
batch_size = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----- Load image paths (recursively) -----
real_paths = sorted(glob(f"{real_dir}/**/*.png", recursive=True))
gen_paths = sorted(glob(f"{gen_dir}/**/*.png", recursive=True))

# ----- Dataset -----
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

class ImageDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img = Image.open(self.paths[idx]).convert("RGB")
        return self.transform(img)

# ----- Dataloaders -----
real_loader = DataLoader(ImageDataset(real_paths, transform), batch_size=batch_size)
gen_loader = DataLoader(ImageDataset(gen_paths, transform), batch_size=batch_size)

# ----- InceptionV3 -----
model = InceptionV3([InceptionV3.BLOCK_INDEX_BY_DIM[dims]]).to(device).eval()

def get_activations(loader):
    activations = np.empty((len(loader.dataset), dims))
    start = 0
    for batch in loader:
        batch = batch.to(device)
        with torch.no_grad():
            pred = model(batch)[0].squeeze(-1).squeeze(-1).cpu().numpy()
        end = start + pred.shape[0]
        activations[start:end] = pred
        start = end
    return activations

# ----- Calculate activations -----
print("🔍 Extracting activations...")
act1 = get_activations(real_loader)
act2 = get_activations(gen_loader)

# ----- Calculate stats -----
mu1, sigma1 = np.mean(act1, axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = np.mean(act2, axis=0), np.cov(act2, rowvar=False)

# ----- FID -----
fid_score = calculate_frechet_distance(mu1, sigma1, mu2, sigma2)
print(f"✅ FID Score: {fid_score:.4f}")


🔍 Extracting activations...
✅ FID Score: 40.5769


In [13]:
# Check shapes
print("act1 shape:", act1.shape)
print("act2 shape:", act2.shape)

# Check for invalid numbers
print("Real NaNs:", np.isnan(act1).any(), "| Infs:", np.isinf(act1).any())
print("Gen  NaNs:", np.isnan(act2).any(), "| Infs:", np.isinf(act2).any())

act1 shape: (780, 2048)
act2 shape: (1207, 2048)
Real NaNs: False | Infs: False
Gen  NaNs: False | Infs: False


LoRA: 42.1789
LoRA_TI: 42.2448
LoRA_TI_ControlNet: 46.1624
LoRA_TI_ControlNet_refined: 40.5769

In [26]:
import os
# from torch_fidelity import calculate_metrics
from pytorch_fid import fid_score
from pytorch_fid.inception import InceptionV3
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.models.inception import inception_v3
import torch.nn.functional as F
import numpy as np
from PIL import Image
from tqdm import tqdm
from glob import glob

In [27]:
# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
real_dataset_path = "./data/dataset_real"
dataset_name = "lora"
aug_dataset_path = f"./data/dataset_{dataset_name}"

In [28]:
real_paths = sorted(glob(f"{real_dataset_path}/*/*/*.png"))
aug_paths = sorted(glob(f"{aug_dataset_path}/*/*.png"))

In [29]:
# Load Inception model block for 2048-dim FID
dims = 2048
image_size = 299
batch_size = 16
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
model = InceptionV3([block_idx]).to(device)

# Define transform
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

In [30]:
# Custom Dataset
class ImagePathDataset(Dataset):
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):
        img = Image.open(self.files[i]).convert("RGB")
        return self.transform(img)

In [None]:
# DataLoaders
real_dl = DataLoader(ImagePathDataset(real_paths, transform), batch_size=batch_size, drop_last=False)
gen_dl = DataLoader(ImagePathDataset(aug_paths, transform), batch_size=batch_size, drop_last=False)

# Get activations
def get_activations_from_loader(loader):
    model.eval()
    pred_arr = np.empty((len(loader.dataset), dims))
    start_idx = 0

    for batch in loader:
        batch = batch.to(device)
        with torch.no_grad():
            pred = model(batch)[0].squeeze(3).squeeze(2).cpu().numpy()
        pred_arr[start_idx:start_idx + pred.shape[0]] = pred
        start_idx += pred.shape[0]

    return pred_arr

act1 = get_activations_from_loader(real_dl)
act2 = get_activations_from_loader(gen_dl)

assert np.all(np.isfinite(act1)), "Non-finite values in real activations"
assert np.all(np.isfinite(act2)), "Non-finite values in generated activations"

# Calculate stats and FID
mu1, sigma1 = np.mean(act1, axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = np.mean(act2, axis=0), np.cov(act2, rowvar=False)
fid_value = fid_score.calculate_frechet_distance(mu1, sigma1, mu2, sigma2)

print(f"✅ Final FID: {fid_value:.4f}")

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  avg = a.mean(axis, **keepdims_kw)
  mu2, sigma2 = np.mean(act2, axis=0), np.cov(act2, rowvar=False)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


ValueError: array must not contain infs or NaNs

In [20]:
# Compute statistics
m1, s1 = fid_score.calculate_activation_statistics(real_paths, model, batch_size=16, dims=dims, device=device)
m2, s2 = fid_score.calculate_activation_statistics(aug_paths, model, batch_size=16, dims=dims, device=device)

# Calculate FID
fid_value = calculate_frechet_distance(m1, s1, m2, s2)
print(f"✅ FID Score: {fid_value:.4f}")

  0%|          | 0/49 [00:00<?, ?it/s]


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/fa926284/Documents/Repos/BUSI_diffusion/venv/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/fa926284/Documents/Repos/BUSI_diffusion/venv/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
  File "/home/fa926284/Documents/Repos/BUSI_diffusion/venv/lib/python3.10/site-packages/torch/utils/data/_utils/collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "/home/fa926284/Documents/Repos/BUSI_diffusion/venv/lib/python3.10/site-packages/torch/utils/data/_utils/collate.py", line 155, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
  File "/home/fa926284/Documents/Repos/BUSI_diffusion/venv/lib/python3.10/site-packages/torch/utils/data/_utils/collate.py", line 272, in collate_tensor_fn
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 585, 683] at entry 0 and [3, 473, 563] at entry 1


In [13]:
# --- FID --- #
print("🔍 Calculating FID...")
fid_value = fid_score.calculate_fid_given_paths(
    [real_paths, aug_paths],
    batch_size=16,
    device=device,
    dims=2048
)
print(f"FID {dataset_name}: {fid_value:.4f}")

# --- IS --- #
def inception_score(img_folder, cuda=True, batch_size=32, splits=10):
    transform = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
    dataset = ImageFolder(root=os.path.dirname(img_folder), transform=transform)

    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    inception_model = inception_v3(pretrained=True, transform_input=False).eval()
    if cuda:
        inception_model = inception_model.cuda()

    preds = []

    with torch.no_grad():
        for batch, _ in tqdm(dataloader, desc="Inception Score"):
            batch = batch.cuda() if cuda else batch
            pred = inception_model(batch)
            preds.append(F.softmax(pred, dim=1).cpu().numpy())

    preds = np.concatenate(preds, axis=0)
    split_scores = []

    for k in range(splits):
        part = preds[k * (len(preds) // splits): (k+1) * (len(preds) // splits), :]
        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
        kl = np.mean(np.sum(kl, axis=1))
        split_scores.append(np.exp(kl))

    return np.mean(split_scores), np.std(split_scores)

print("🔍 Calculating Inception Scores...")
is_real, std_real = inception_score(real_dataset_path)
is_aug, std_aug = inception_score(aug_dataset_path)

print(f"Inception Score ({dataset_name} only): {is_aug:.3f} ± {std_aug:.3f}")

🔍 Calculating FID...


TypeError: stat: path should be string, bytes, os.PathLike or integer, not list