In [1]:
import os
import fid_custom
import random
import shutil

In [2]:
# Total number of images in folderC
N = 15000

# Percentage of N images to come from folderA
x_percent = 99

folderA = 'celebA_class_dataset/Smiling_Young_Male'
folderB = 'celebA_class_dataset/Not_Male'
fC = f'biased_datasets/{folderA.split('/')[-1]}_{x_percent}_{folderB.split('/')[-1]}_{100-x_percent}_N{N}'
folderC = f'{fC}/images/class'

folder_features = 'celebA_data/celebA_features'
new_features = f'{fC}/features'

os.makedirs(folderC, exist_ok=True)
os.makedirs(new_features, exist_ok=True)

num_from_A = int((x_percent / 100) * N)
num_from_B = N - num_from_A

In [3]:
images_A = [img for img in os.listdir(folderA) if img.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
images_B = [img for img in os.listdir(folderB) if img.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]

if len(images_A) < num_from_A or len(images_B) < num_from_B:
    raise ValueError("Not enough images in folderA or folderB to satisfy the required number.")

# Randomly sample images from folderA and folderB
selected_images_A = random.sample(images_A, num_from_A)
selected_images_B = random.sample(images_B, num_from_B)

# Copy the selected images to folderC
for img in selected_images_A + selected_images_B:
    if img in selected_images_A:
        shutil.copy(os.path.join(folderA, img), os.path.join(folderC, img))
    else:
        shutil.copy(os.path.join(folderB, img), os.path.join(folderC, img))

print(f"Successfully copied {N} images to {folderC}")

Successfully copied 15000 images to biased_datasets/Smiling_Young_Male_99_Not_Male_1_N15000/images/class


In [4]:
for filename in os.listdir(folderC):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        base_name = os.path.splitext(filename)[0]
        npy_file = base_name + ".npy"
        
        # Check if the .npy file exists in folder_features
        npy_file_path = os.path.join(folder_features, npy_file)
        if os.path.exists(npy_file_path):
            new_npy_path = os.path.join(new_features, npy_file)
            shutil.copy(npy_file_path, new_npy_path)
        else:
            print(f"{npy_file} not found in {folder_features}")

In [5]:
biased_stats = fid_custom.features_to_stat(new_features)
fid_custom.save_stats(biased_stats, f'{fC}/fid_stats.npz')

In [6]:
real_stats = fid_custom.load_stats('celebA_data/celebA_fid_stats.npz')
biased_stats = fid_custom.load_stats(f'{fC}/fid_stats.npz')

In [7]:
fid_custom.calculate_fid(*list(real_stats.values())[:2],*list(biased_stats.values())[:2])

np.float64(34.47328366822464)

In [8]:
real_stats

{'mean': array([0.31840017, 0.28387764, 0.17477879, ..., 0.31609127, 0.2679897 ,
        0.36640477], shape=(2048,), dtype=float32),
 'covariance': array([[ 0.03124174,  0.00045412,  0.00449469, ..., -0.00314412,
         -0.00232486, -0.00071472],
        [ 0.00045412,  0.02289081,  0.00398362, ...,  0.00317206,
          0.00469967,  0.00027849],
        [ 0.00449469,  0.00398362,  0.02128661, ...,  0.00539002,
          0.01254269,  0.0015931 ],
        ...,
        [-0.00314412,  0.00317206,  0.00539002, ...,  0.05696185,
          0.01446354,  0.00683247],
        [-0.00232486,  0.00469967,  0.01254269, ...,  0.01446354,
          0.06541866,  0.00803033],
        [-0.00071472,  0.00027849,  0.0015931 , ...,  0.00683247,
          0.00803033,  0.0503044 ]], shape=(2048, 2048)),
 'num_samples': 202599}

In [9]:
biased_stats

{'mean': array([0.32623982, 0.25882196, 0.1315849 , ..., 0.22621965, 0.17120321,
        0.36180928], shape=(2048,), dtype=float32),
 'covariance': array([[ 0.0291286 ,  0.00055065,  0.0033761 , ..., -0.00128957,
         -0.00085642,  0.00049317],
        [ 0.00055065,  0.01987235,  0.00166456, ...,  0.00200797,
          0.00289645, -0.00176687],
        [ 0.0033761 ,  0.00166456,  0.01146806, ...,  0.0040111 ,
          0.00407135,  0.00189668],
        ...,
        [-0.00128957,  0.00200797,  0.0040111 , ...,  0.0412734 ,
          0.0069742 ,  0.00785711],
        [-0.00085642,  0.00289645,  0.00407135, ...,  0.0069742 ,
          0.03001137,  0.00332698],
        [ 0.00049317, -0.00176687,  0.00189668, ...,  0.00785711,
          0.00332698,  0.05062519]], shape=(2048, 2048)),
 'num_samples': 15000}