In [10]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import random

In [3]:
casia = np.genfromtxt("casia-webface.txt",delimiter='\n',dtype=str)
casia = np.array([np.array(el) for el in np.char.split(casia)])
casia

array([['0', 'casia-webface/000000/00000001.jpg', '26.0', '1'],
       ['0', 'casia-webface/000000/00000002.jpg', '30.0', '1'],
       ['0', 'casia-webface/000000/00000003.jpg', '31.0', '1'],
       ...,
       ['10571', 'casia-webface/010571/00490621.jpg', '35.5', '1'],
       ['10571', 'casia-webface/010571/00490622.jpg', '28.0', '0'],
       ['10571', 'casia-webface/010571/00490623.jpg', '16.5', '1']],
      dtype='<U33')

In [6]:
def load_image_paths(base_path):
    image_paths = {}
    
    # Loop through each directory (person)
    for person_dir in os.listdir(base_path):
        person_path = os.path.join(base_path, person_dir)
        
        if os.path.isdir(person_path):
            # Get all image files in the directory
            images = [os.path.join(person_path, img) for img in os.listdir(person_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
            
            if images:  # If the person has images
                image_paths[person_dir] = images
    
    return image_paths
    

In [9]:
load_image_paths("casia-webface")

{'000000': ['casia-webface\\000000\\00000001.jpg',
  'casia-webface\\000000\\00000002.jpg',
  'casia-webface\\000000\\00000003.jpg',
  'casia-webface\\000000\\00000004.jpg',
  'casia-webface\\000000\\00000005.jpg',
  'casia-webface\\000000\\00000006.jpg',
  'casia-webface\\000000\\00000007.jpg',
  'casia-webface\\000000\\00000008.jpg',
  'casia-webface\\000000\\00000009.jpg',
  'casia-webface\\000000\\00000010.jpg',
  'casia-webface\\000000\\00000011.jpg',
  'casia-webface\\000000\\00000012.jpg',
  'casia-webface\\000000\\00000013.jpg',
  'casia-webface\\000000\\00000014.jpg',
  'casia-webface\\000000\\00000015.jpg'],
 '000001': ['casia-webface\\000001\\00000016.jpg',
  'casia-webface\\000001\\00000017.jpg',
  'casia-webface\\000001\\00000018.jpg',
  'casia-webface\\000001\\00000019.jpg',
  'casia-webface\\000001\\00000020.jpg',
  'casia-webface\\000001\\00000021.jpg',
  'casia-webface\\000001\\00000022.jpg',
  'casia-webface\\000001\\00000023.jpg',
  'casia-webface\\000001\\00000024.j

In [11]:
def generate_positive_pairs(image_paths, batch_size):
    positive_pairs = []
    directories = list(image_paths.keys())

    while len(positive_pairs) < batch_size:
        person = random.choice(directories)
        images = image_paths[person]

        # Ensure distinct pairs by using different image indices
        for i in range(len(images)):
            for j in range(i + 1, len(images)):  # i != j to create distinct pairs
                positive_pairs.append((images[i], images[j]))
                if len(positive_pairs) >= batch_size:
                    break
            if len(positive_pairs) >= batch_size:
                break

    return positive_pairs

In [19]:
def generate_negative_pairs(image_paths, batch_size):

    negative_pairs = set()  # Use a set to track unique pairs
    directories = list(image_paths.keys())

    while len(negative_pairs) < batch_size:
        # Select two directories (people)
        person1, person2 = random.sample(directories, 2)

        # If both people are the same, skip to the next iteration
        if person1 == person2:
            continue

        # Select random images from each person
        image1 = random.choice(image_paths[person1])
        image2 = random.choice(image_paths[person2])

        # Create a unique identifier for the negative pair
        pair = tuple(sorted([image1, image2]))  # Sorting ensures (image1, image2) and (image2, image1) are the same

        # Only add the pair if it's not already in the set
        if pair not in negative_pairs:
            negative_pairs.add(pair)

    # Convert the set of pairs back to a list for returning
    return list(negative_pairs)

In [18]:
generate_positive_pairs(load_image_paths("casia-webface"), 2)

[('casia-webface\\002047\\00141902.jpg',
  'casia-webface\\002047\\00141903.jpg'),
 ('casia-webface\\002047\\00141902.jpg',
  'casia-webface\\002047\\00141904.jpg')]

In [20]:
generate_negative_pairs(load_image_paths("casia-webface"), 2)

[('casia-webface\\000200\\00025000.jpg',
  'casia-webface\\000437\\00044019.jpg'),
 ('casia-webface\\000863\\00072035.jpg',
  'casia-webface\\002109\\00144945.jpg')]