### 1. First Generate the FairFace Bias-Controlled Datasets



In [12]:
# print number of samples in training and number of female and male samples in training
import os
import numpy as np
import pandas as pd 
# Define the paths
data_dir = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset"
training_path = os.path.join(data_dir, "Training") 
training_labels_path = os.path.join("../Pseudo-Labeling/FairFace/train_labels.csv")  # Path to training metadata

# Load training metadata
file = pd.read_csv(training_labels_path)
male_samples = file[file['gender']=='Male']
female_samples = file[file['gender']=='Female']

print("Number of samples in training: ", len(file))
print(len(male_samples))
print(len(female_samples))


Number of samples in training:  86744
45986
40758


In [6]:
import os
import shutil
import pandas as pd
from tqdm.auto import tqdm

# Define the paths
data_dir = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset"
training_path = os.path.join(data_dir, "Training") 
training_labels_path = os.path.join("../Pseudo-Labeling/FairFace/train_labels.csv")  # Path to training metadata

# Define the output folders
data_dir = "bias_training_data"
output_folders = {
    "Unlabeled": os.path.join(data_dir, "Unlabeled"),
    "Gender_Balanced_Unlabeled": os.path.join(data_dir, "Gender_Balanced_Unlabeled"),
    "Severe_Male_Gender": os.path.join(data_dir, "Severe_Male_Gender"),
    "Severe_Female_Gender": os.path.join(data_dir, "Severe_Female_Gender"),
    "Severe_Black_Bias": os.path.join(data_dir, "Severe_Black_Bias"),
    "Severe_East_Asian_Bias": os.path.join(data_dir, "Severe_East_Asian_Bias"),
    "Severe_Black_Male_Bias": os.path.join(data_dir, "Severe_Black_Male_Bias"),
    "Severe_Black_Female_Bias": os.path.join(data_dir, "Severe_Black_Female_Bias"),
    "Severe_East_Asian_Female_Bias": os.path.join(data_dir, "Severe_East_Asian_Female_Bias")
}

# Ensure output directories exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

# Load training metadata
training_data = pd.read_csv(training_labels_path)

# Function to copy files based on criteria
def copy_files(file_list, gender_list, source_dir, dest_dir):
    for file, gender in tqdm(zip(file_list, gender_list), desc=f"Copying files to {dest_dir}"):
        gender_folder = "male" if gender == "Male" else "female"
        file_path = os.path.join(source_dir, gender_folder, file)
        file_path = file_path.replace("\\train", "")
        if os.path.exists(file_path):
            shutil.copy(file_path, dest_dir)
        else:
            print(f"Warning: {file_path} does not exist.")
# 1. Unlabeled - contains all the training samples
copy_files(training_data["file"], training_data["gender"], training_path, output_folders["Unlabeled"])

# 2. Gender_Balanced_Unlabeled - 50% male and 50% female
male_samples = training_data[training_data["gender"] == "Male"].sample(frac=0.5, random_state=42)
female_samples = training_data[training_data["gender"] == "Female"].sample(frac=0.5, random_state=42)
gender_balanced_samples = pd.concat([male_samples, female_samples])
copy_files(gender_balanced_samples["file"], gender_balanced_samples["gender"], training_path, output_folders["Gender_Balanced_Unlabeled"])

# 3. Severe_Male_Gender - 80% male and 20% female
male_samples = training_data[training_data["gender"] == "Male"].sample(frac=0.8, random_state=42)
female_samples = training_data[training_data["gender"] == "Female"].sample(frac=0.2, random_state=42)
severe_male_gender_samples = pd.concat([male_samples, female_samples])
copy_files(severe_male_gender_samples["file"], severe_male_gender_samples["gender"], training_path, output_folders["Severe_Male_Gender"])

# 4. Severe_Female_Gender - 80% female and 20% male
female_samples = training_data[training_data["gender"] == "Female"].sample(frac=0.8, random_state=42)
male_samples = training_data[training_data["gender"] == "Male"].sample(frac=0.2, random_state=42)
severe_female_gender_samples = pd.concat([female_samples, male_samples])
copy_files(severe_female_gender_samples["file"], severe_female_gender_samples["gender"], training_path, output_folders["Severe_Female_Gender"])

# 5. Severe_Black_Bias - All samples of Black race
black_samples = training_data[training_data["race"] == "Black"]
copy_files(black_samples["file"], black_samples["gender"], training_path, output_folders["Severe_Black_Bias"])

# 6. Severe_East_Asian_Bias - All samples of East Asian race
east_asian_samples = training_data[training_data["race"] == "East Asian"]
copy_files(east_asian_samples["file"], east_asian_samples["gender"], training_path, output_folders["Severe_East_Asian_Bias"])

# 7. Severe_Black_Male_Bias - All samples of Black and Male
black_male_samples = training_data[(training_data["race"] == "Black") & (training_data["gender"] == "Male")]
copy_files(black_male_samples["file"], black_male_samples["gender"], training_path, output_folders["Severe_Black_Male_Bias"])

# 8. Severe_Black_Female_Bias - All samples of Black and Female
black_female_samples = training_data[(training_data["race"] == "Black") & (training_data["gender"] == "Female")]
copy_files(black_female_samples["file"], black_female_samples["gender"], training_path, output_folders["Severe_Black_Female_Bias"])

# 9. Severe_East_Asian_Female_Bias - All samples of East Asian and Female
east_asian_female_samples = training_data[(training_data["race"] == "East Asian") & (training_data["gender"] == "Female")]
copy_files(east_asian_female_samples["file"], east_asian_female_samples["gender"], training_path, output_folders["Severe_East_Asian_Female_Bias"])

print("Dataset organization complete.")

Copying files to bias_training_data\Unlabeled: 0it [00:00, ?it/s]

Copying files to bias_training_data\Gender_Balanced_Unlabeled: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_Male_Gender: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_Female_Gender: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_Black_Bias: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_East_Asian_Bias: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_Black_Male_Bias: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_Black_Female_Bias: 0it [00:00, ?it/s]

Copying files to bias_training_data\Severe_East_Asian_Female_Bias: 0it [00:00, ?it/s]

Dataset organization complete.


In [1]:
validation_path = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation"
test_path = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/Balanced_Validation"

### 2. Then Evaluate with Pseudo-Labeling, Pseudo-Balancing and DANN

In [None]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import requests

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")
test_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/Balanced_Validation")

# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Unlabeled",
    "Gender_Balanced_Unlabeled",
    "Severe_Male_Gender",
    "Severe_Female_Gender",
    "Severe_Black_Bias"
]
pseudo_label_thresholds = [0.9, 0.6]

# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Self-training parameters
num_iterations = 5
num_epochs = 10
patience = 2

# Function to pseudo-balance the dataset
def pseudo_balancing(male_path, female_path):
    male_samples = os.listdir(male_path) if os.path.exists(male_path) else []
    female_samples = os.listdir(female_path) if os.path.exists(female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    # Determine the target size for balancing
    target_size = min(num_male, num_female)
    # Randomly downsample the larger class
    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(female_path, sample))
    # Recount samples after balancing
    num_male = len(os.listdir(male_path))
    num_female = len(os.listdir(female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")
    return

# Loop over all experiments
for training_folder in training_folders:
 # Define the URL for the file
    url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"

    # Define the path where the file will be saved
    output_path = "classification_model.pth"
    # Send a GET request to download the file
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        with open(output_path, "wb") as file:
            file.write(response.content)  # Write the content of the response to the file
        print(f"File downloaded successfully and saved as {output_path}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
    # Load pre-trained ResNet model
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for pseudo_label_threshold in pseudo_label_thresholds:
        print(f"--- Starting Experiment: {training_folder}, Threshold: {pseudo_label_threshold} ---")
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Balanced_{training_folder}_{pseudo_label_threshold}")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_{pseudo_label_threshold}")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        for iteration in range(num_iterations):
            print(f"--- Starting Iteration {iteration + 1}/{num_iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling
            model.eval()
            new_samples = 0
            print("Pseudo-labeling unlabeled samples...")
            for filename in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, filename)
                img = Image.open(img_path).convert('RGB')  # Make sure it's RGB
                img = transforms_val(img)  # Apply transformations
                # Forward pass through the model
                with torch.no_grad():
                    inputs = img.unsqueeze(0).to(device)  # Unsqueeze to add batch dimension
                    outputs = model(inputs)
                    probs = torch.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                # Select samples with high confidence
                confident_mask = max_probs > pseudo_label_threshold
                if confident_mask.any():
                    for pred_label in preds[confident_mask]:
                        if pred_label == 1:
                            destination_folder = aaf_training_male_path
                        elif pred_label == 0:
                            destination_folder = aaf_training_female_path
                        else:
                            continue  # Skip samples with unexpected labels
                        # Move the image to the destination folder
                        shutil.copy(img_path, destination_folder)
                new_samples += 1
                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")
                del img, inputs, outputs, probs, max_probs, preds
                torch.cuda.empty_cache()

            print(f"Iteration {iteration + 1}: Added {new_samples} pseudo-labeled samples.")
            print(f"Number of pseudo-labeled male {len(os.listdir(aaf_training_male_path))} and female {len(os.listdir(aaf_training_female_path))} samples")

            # 3. Balance the training dataset
            pseudo_balancing(aaf_training_male_path, aaf_training_female_path)

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

            # 4. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_balanced_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_balanced_best.pth'))
            print(f"Self-training completed for {training_folder}, Threshold: {pseudo_label_threshold}.")

print("All experiments completed.")

File downloaded successfully and saved as classification_model.pth


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Experiment: Unlabeled, Threshold: 0.9 ---
--- Starting Iteration 1/10 ---
Unlabeled dataset size: 86744
Pseudo-labeling unlabeled samples...
Processed 10000 images
Processed 20000 images
Processed 30000 images
Processed 40000 images
Processed 50000 images
Processed 60000 images
Processed 70000 images
Processed 80000 images
Iteration 1: Added 86744 pseudo-labeled samples.
Number of pseudo-labeled male 28244 and female 12927 samples
Balanced pseudo-labeled samples: Male - 12927, Female - 12927
Iteration 1, Epoch 1, Train Loss: 0.0379
Iteration 1, Epoch 1, Validation Loss: 0.7299, Accuracy: 0.7974
Iteration 1, Epoch 2, Train Loss: 0.0235
Iteration 1, Epoch 2, Validation Loss: 0.7081, Accuracy: 0.7987
Iteration 1, Epoch 3, Train Loss: 0.0150
Iteration 1, Epoch 3, Validation Loss: 0.8830, Accuracy: 0.7935
Iteration 1, Epoch 4, Train Loss: 0.0081
Iteration 1, Epoch 4, Validation Loss: 0.8684, Accuracy: 0.8032
Early stopping triggered.
Self-training completed for Unlabeled, Thres

In [None]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import requests

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")
test_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/Balanced_Validation")

# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Severe_East_Asian_Female_Bias",
    "Severe_East_Asian_Bias"
]
pseudo_label_thresholds = [0.9, 0.6]

# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Self-training parameters
num_iterations = 5
num_epochs = 10
patience = 2

# Function to pseudo-balance the dataset
def pseudo_balancing(male_path, female_path):
    male_samples = os.listdir(male_path) if os.path.exists(male_path) else []
    female_samples = os.listdir(female_path) if os.path.exists(female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    # Determine the target size for balancing
    target_size = min(num_male, num_female)
    # Randomly downsample the larger class
    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(female_path, sample))
    # Recount samples after balancing
    num_male = len(os.listdir(male_path))
    num_female = len(os.listdir(female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")
    return

# Loop over all experiments
for training_folder in training_folders:
 # Define the URL for the file
    url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"

    # Define the path where the file will be saved
    output_path = "classification_model.pth"
    # Send a GET request to download the file
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        with open(output_path, "wb") as file:
            file.write(response.content)  # Write the content of the response to the file
        print(f"File downloaded successfully and saved as {output_path}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
    # Load pre-trained ResNet model
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for pseudo_label_threshold in pseudo_label_thresholds:
        print(f"--- Starting Experiment: {training_folder}, Threshold: {pseudo_label_threshold} ---")
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Balanced_{training_folder}_{pseudo_label_threshold}")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_{pseudo_label_threshold}")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        for iteration in range(num_iterations):
            print(f"--- Starting Iteration {iteration + 1}/{num_iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling
            model.eval()
            new_samples = 0
            print("Pseudo-labeling unlabeled samples...")
            for filename in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, filename)
                img = Image.open(img_path).convert('RGB')  # Make sure it's RGB
                img = transforms_val(img)  # Apply transformations
                # Forward pass through the model
                with torch.no_grad():
                    inputs = img.unsqueeze(0).to(device)  # Unsqueeze to add batch dimension
                    outputs = model(inputs)
                    probs = torch.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                # Select samples with high confidence
                confident_mask = max_probs > pseudo_label_threshold
                if confident_mask.any():
                    for pred_label in preds[confident_mask]:
                        if pred_label == 1:
                            destination_folder = aaf_training_male_path
                        elif pred_label == 0:
                            destination_folder = aaf_training_female_path
                        else:
                            continue  # Skip samples with unexpected labels
                        # Move the image to the destination folder
                        shutil.copy(img_path, destination_folder)
                new_samples += 1
                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")
                del img, inputs, outputs, probs, max_probs, preds
                torch.cuda.empty_cache()

            print(f"Iteration {iteration + 1}: Added {new_samples} pseudo-labeled samples.")
            print(f"Number of pseudo-labeled male {len(os.listdir(aaf_training_male_path))} and female {len(os.listdir(aaf_training_female_path))} samples")

            # 3. Balance the training dataset
            pseudo_balancing(aaf_training_male_path, aaf_training_female_path)

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

            # 4. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_balanced_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_balanced_best.pth'))
            print(f"Self-training completed for {training_folder}, Threshold: {pseudo_label_threshold}.")

print("All experiments completed.")

File downloaded successfully and saved as classification_model.pth


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Experiment: Severe_East_Asian_Female_Bias, Threshold: 0.9 ---
--- Starting Iteration 1/5 ---
Unlabeled dataset size: 6141
Pseudo-labeling unlabeled samples...
Iteration 1: Added 6141 pseudo-labeled samples.
Number of pseudo-labeled male 381 and female 1835 samples
Balanced pseudo-labeled samples: Male - 381, Female - 381
Iteration 1, Epoch 1, Train Loss: 0.0680
Iteration 1, Epoch 1, Validation Loss: 0.7060, Accuracy: 0.7761
Iteration 1, Epoch 2, Train Loss: 0.0349
Iteration 1, Epoch 2, Validation Loss: 0.6947, Accuracy: 0.7800
Iteration 1, Epoch 3, Train Loss: 0.0178
Iteration 1, Epoch 3, Validation Loss: 0.7992, Accuracy: 0.7826
Iteration 1, Epoch 4, Train Loss: 0.0248
Iteration 1, Epoch 4, Validation Loss: 0.7717, Accuracy: 0.7858
Early stopping triggered.
Self-training completed for Severe_East_Asian_Female_Bias, Threshold: 0.9.
--- Starting Iteration 2/5 ---
Unlabeled dataset size: 6141
Pseudo-labeling unlabeled samples...
Iteration 2: Added 6141 pseudo-labeled samples

In [6]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")
test_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/Balanced_Validation")

# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Unlabeled",
    "Gender_Balanced_Unlabeled",
    "Severe_Male_Gender",
    "Severe_Black_Bias",
    "Severe_East_Asian_Female_Bias"
]

pseudo_label_thresholds = [0.9, 0.6]
# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Self-training parameters
num_iterations = 3
num_epochs = 10
patience = 2

# Loop over all experiments
for training_folder in training_folders:
     # Define the URL for the file
    url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"

    # Define the path where the file will be saved
    output_path = "classification_model.pth"
    # Send a GET request to download the file
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        with open(output_path, "wb") as file:
            file.write(response.content)  # Write the content of the response to the file
        print(f"File downloaded successfully and saved as {output_path}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
    # Load pre-trained ResNet model
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for pseudo_label_threshold in pseudo_label_thresholds:
        print(f"--- Starting Experiment: {training_folder}, Threshold: {pseudo_label_threshold} ---")
        
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Unbalanced_{training_folder}_{pseudo_label_threshold}")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_{pseudo_label_threshold}")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        for iteration in range(num_iterations):
            print(f"--- Starting Iteration {iteration + 1}/{num_iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling
            model.eval()
            new_samples = 0
            print("Pseudo-labeling unlabeled samples...")
            for filename in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, filename)
                img = Image.open(img_path).convert('RGB')  # Make sure it's RGB
                img = transforms_val(img)  # Apply transformations
                # Forward pass through the model
                with torch.no_grad():
                    inputs = img.unsqueeze(0).to(device)  # Unsqueeze to add batch dimension
                    outputs = model(inputs)
                    probs = torch.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                # Select samples with high confidence
                confident_mask = max_probs > pseudo_label_threshold
                if confident_mask.any():
                    for pred_label in preds[confident_mask]:
                        if pred_label == 1:
                            destination_folder = aaf_training_male_path
                        elif pred_label == 0:
                            destination_folder = aaf_training_female_path
                        else:
                            continue  # Skip samples with unexpected labels
                        # Move the image to the destination folder
                        shutil.copy(img_path, destination_folder)
                new_samples += 1
                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")
                del img, inputs, outputs, probs, max_probs, preds
                torch.cuda.empty_cache()

            print(f"Iteration {iteration + 1}: Added {new_samples} pseudo-labeled samples.")
            print(f"Number of pseudo-labeled male {len(os.listdir(aaf_training_male_path))} and female {len(os.listdir(aaf_training_female_path))} samples")

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

            # 4. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_unbalanced_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_unbalanced_best.pth'))
            print(f"Self-training completed for {training_folder}, Threshold: {pseudo_label_threshold}.")

print("All experiments completed.")

File downloaded successfully and saved as classification_model.pth


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Experiment: Unlabeled, Threshold: 0.9 ---
--- Starting Iteration 1/3 ---
Unlabeled dataset size: 86744
Pseudo-labeling unlabeled samples...
Processed 10000 images
Processed 20000 images
Processed 30000 images
Processed 40000 images
Processed 50000 images
Processed 60000 images
Processed 70000 images
Processed 80000 images
Iteration 1: Added 86744 pseudo-labeled samples.
Number of pseudo-labeled male 28244 and female 12927 samples
Iteration 1, Epoch 1, Train Loss: 0.0304
Iteration 1, Epoch 1, Validation Loss: 0.7713, Accuracy: 0.7806
Iteration 1, Epoch 2, Train Loss: 0.0153
Iteration 1, Epoch 2, Validation Loss: 1.0568, Accuracy: 0.7755
Iteration 1, Epoch 3, Train Loss: 0.0107
Iteration 1, Epoch 3, Validation Loss: 0.9578, Accuracy: 0.7903
Early stopping triggered.
Self-training completed for Unlabeled, Threshold: 0.9.
--- Starting Iteration 2/3 ---
Unlabeled dataset size: 86744
Pseudo-labeling unlabeled samples...
Processed 10000 images
Processed 20000 images
Processed 300

In [None]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import requests

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")
test_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/Balanced_Validation")

# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Severe_East_Asian_Female_Bias",
    "Severe_East_Asian_Bias"
]
pseudo_label_thresholds = [0.9, 0.6]

# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Self-training parameters
num_iterations = 5
num_epochs = 10
patience = 2

# Function to pseudo-balance the dataset
def pseudo_balancing(male_path, female_path):
    male_samples = os.listdir(male_path) if os.path.exists(male_path) else []
    female_samples = os.listdir(female_path) if os.path.exists(female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    # Determine the target size for balancing
    target_size = min(num_male, num_female)
    # Randomly downsample the larger class
    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(female_path, sample))
    # Recount samples after balancing
    num_male = len(os.listdir(male_path))
    num_female = len(os.listdir(female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")
    return

# Loop over all experiments
for training_folder in training_folders:
 # Define the URL for the file
    url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"

    # Define the path where the file will be saved
    output_path = "classification_model.pth"
    # Send a GET request to download the file
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        with open(output_path, "wb") as file:
            file.write(response.content)  # Write the content of the response to the file
        print(f"File downloaded successfully and saved as {output_path}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
    # Load pre-trained ResNet model
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for pseudo_label_threshold in pseudo_label_thresholds:
        print(f"--- Starting Experiment: {training_folder}, Threshold: {pseudo_label_threshold} ---")
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Unbalanced_{training_folder}_{pseudo_label_threshold}")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_{pseudo_label_threshold}")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        for iteration in range(num_iterations):
            print(f"--- Starting Iteration {iteration + 1}/{num_iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling
            model.eval()
            new_samples = 0
            print("Pseudo-labeling unlabeled samples...")
            for filename in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, filename)
                img = Image.open(img_path).convert('RGB')  # Make sure it's RGB
                img = transforms_val(img)  # Apply transformations
                # Forward pass through the model
                with torch.no_grad():
                    inputs = img.unsqueeze(0).to(device)  # Unsqueeze to add batch dimension
                    outputs = model(inputs)
                    probs = torch.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                # Select samples with high confidence
                confident_mask = max_probs > pseudo_label_threshold
                if confident_mask.any():
                    for pred_label in preds[confident_mask]:
                        if pred_label == 1:
                            destination_folder = aaf_training_male_path
                        elif pred_label == 0:
                            destination_folder = aaf_training_female_path
                        else:
                            continue  # Skip samples with unexpected labels
                        # Move the image to the destination folder
                        shutil.copy(img_path, destination_folder)
                new_samples += 1
                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")
                del img, inputs, outputs, probs, max_probs, preds
                torch.cuda.empty_cache()

            print(f"Iteration {iteration + 1}: Added {new_samples} pseudo-labeled samples.")
            print(f"Number of pseudo-labeled male {len(os.listdir(aaf_training_male_path))} and female {len(os.listdir(aaf_training_female_path))} samples")

            # 3. Balance the training dataset
            # pseudo_balancing(aaf_training_male_path, aaf_training_female_path)

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

            # 4. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_unbalanced_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'Controlled_Bias_{pseudo_label_threshold}_{iteration + 1}_unbalanced_best.pth'))
            print(f"Self-training completed for {training_folder}, Threshold: {pseudo_label_threshold}.")

print("All experiments completed.")

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import shutil
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from PIL import Image
import requests

# Define the non-linear mapping function (e.g., convex function)
def non_linear_mapping(x):
    return x / (2 - x)  # Convex function for threshold adjustment

# Initialize class-specific thresholds and counts
num_classes = 2  # Male and Female
base_threshold = 0.95  # Base confidence threshold for pseudo-labeling
iterations = 5  # Number of self-training iterations
num_epochs = 10  # Epochs per iteration
batch_size = 16
patience = 2  # Early stopping patience

# Define device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")

# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Unlabeled",
    "Gender_Balanced_Unlabeled",
    "Severe_Male_Gender",
    "Severe_Female_Gender",
    "Severe_Black_Bias",
    "Severe_East_Asian_Bias",
    "Severe_East_Asian_Female_Bias"
]  
# Define transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = datasets.ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Define model
def load_pretrained_model():
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    return model

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Function to pseudo-balance the dataset
def pseudo_balancing(male_path, female_path):
    male_samples = os.listdir(male_path) if os.path.exists(male_path) else []
    female_samples = os.listdir(female_path) if os.path.exists(female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    target_size = min(num_male, num_female)
    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(female_path, sample))
    num_male = len(os.listdir(male_path))
    num_female = len(os.listdir(female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")
    return

# Loop over all experiments
for training_folder in training_folders:
        print(f"--- Starting Experiment: {training_folder} ---")
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Balanced_{training_folder}_FlexMatch")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_FlexMatch")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        # Load pre-trained model
        model = load_pretrained_model()
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

        # Initialize class thresholds and counts
        class_thresholds = torch.ones(num_classes) * base_threshold

        for iteration in range(iterations):
            print(f"--- Starting Iteration {iteration + 1}/{iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder, ignore_errors=True)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling using FlexMatch
            model.eval()
            class_counts = torch.zeros(num_classes)
            new_samples = 0

            for unlabeled_data in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, unlabeled_data)
                img = Image.open(img_path).convert('RGB')
                input = transforms_val(img).unsqueeze(0).to(device)

                with torch.no_grad():
                    output = model(input)
                    probs = F.softmax(output, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                    # Apply FlexMatch thresholds
                    for c in range(num_classes):
                        mask = (preds == c) & (max_probs > class_thresholds[c])
                        if mask.any():
                            class_counts[c] += mask.sum().item()
                            if c == 0:
                                shutil.copy(img_path, os.path.join(aaf_training_female_path, unlabeled_data))
                            elif c == 1:
                                shutil.copy(img_path, os.path.join(aaf_training_male_path, unlabeled_data))
                            new_samples += 1

                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")

            # 3. Update class thresholds based on learning status
            normalized_counts = class_counts / class_counts.sum()
            for c in range(num_classes):
                class_thresholds[c] = non_linear_mapping(normalized_counts[c]) * base_threshold

            print("FlexMatch thresholds of the two classes:", class_thresholds)

            # 4. Balance the training dataset
            pseudo_balancing(aaf_training_male_path, aaf_training_female_path)

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = datasets.ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

            # 5. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    probs = F.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                    # Supervised loss
                    supervised_loss = criterion(outputs, labels)

                    # Unsupervised loss (FlexMatch)
                    unsupervised_loss = 0.0
                    for c in range(num_classes):
                        mask = (preds == c) & (max_probs > class_thresholds[c])
                        if mask.any():
                            unsupervised_loss += F.cross_entropy(outputs[mask], preds[mask])

                    # Total loss
                    loss = supervised_loss + unsupervised_loss
                    loss.backward()
                    optimizer.step()

                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"FlexMatch_{iteration + 1}_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'FlexMatch_{iteration + 1}_final.pth'))
            print(f"Self-training completed for {training_folder}.")

print("All experiments completed.")

--- Starting Experiment: Unlabeled ---


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Iteration 1/5 ---
Unlabeled dataset size: 86744
Processed 0 images
Processed 10000 images
Processed 10000 images
Processed 10000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 30000 images
Processed 30000 images
Processed 30000 images
FlexMatch thresholds of the two classes: tensor([0.1686, 0.5100])
Balanced pseudo-labeled samples: Male - 9499, Female - 9499
Iteration 1, Epoch 1, Train Loss: 0.0302
Iteration 1, Epoch 1, Validation Loss: 0.9623, Accuracy: 0.7826
Iteration 1, Epoch 2, Train Loss: 0.0260
Iteration 1, Epoch 2, Validation Loss: 0.9308, Accuracy: 0.7806
Iteration 1, Epoch 3, Train Loss: 0.0132
Iteration 1, Epoch 3, Validation Loss: 0.8846, Accuracy: 0.7923
Iteration 1, Epoch 4, Train Loss: 0.0198
Iteration 1, Epoch 4, Validation Loss: 0.9437, Accuracy: 0.8135
Iteration 1, Epoch 5, Train Loss: 0.0107
Iteration 1, Epoch 5, Validation L

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import shutil
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from PIL import Image
import requests

# Define the non-linear mapping function (e.g., convex function)
def non_linear_mapping(x):
    return x / (2 - x)  # Convex function for threshold adjustment

# Initialize class-specific thresholds and counts
num_classes = 2  # Male and Female
base_threshold = 0.95  # Base confidence threshold for pseudo-labeling
iterations = 3  # Number of self-training iterations
num_epochs = 10  # Epochs per iteration
batch_size = 16
patience = 2  # Early stopping patience

# Define device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define paths
bias_folder = "./bias_training_data"
validation_folder = os.path.join("../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset/East_Asian_Validation")

# Define the training folders and pseudo-labeling thresholds
# Define the training folders and pseudo-labeling thresholds
training_folders = [
    "Unlabeled",
    "Gender_Balanced_Unlabeled",
    "Severe_Male_Gender",
    "Severe_Black_Bias",
    "Severe_East_Asian_Female_Bias"
]

# Define transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load validation dataset
val_dataset = datasets.ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Define model
def load_pretrained_model():
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
    pretrained_weights_path = 'classification_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    model = model.to(device)
    return model

# Loss and optimizer
criterion = nn.CrossEntropyLoss()

# Function to pseudo-balance the dataset
def pseudo_balancing(male_path, female_path):
    male_samples = os.listdir(male_path) if os.path.exists(male_path) else []
    female_samples = os.listdir(female_path) if os.path.exists(female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    target_size = min(num_male, num_female)
    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(female_path, sample))
    num_male = len(os.listdir(male_path))
    num_female = len(os.listdir(female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")
    return

# Loop over all experiments
for training_folder in training_folders:
        print(f"--- Starting Experiment: {training_folder} ---")
        # Define paths for this experiment
        unlabeled_folder = os.path.join(bias_folder, training_folder)
        train_folder = os.path.join(bias_folder, f"Training_pseudo_Balanced_{training_folder}_FlexMatch")
        aaf_training_female_path = os.path.join(train_folder, "female")
        aaf_training_male_path = os.path.join(train_folder, "male")
        model_save_folder = os.path.join(bias_folder, f"bias_{training_folder}_FlexMatch")
        os.makedirs(model_save_folder, exist_ok=True)

        # Create directories if they don't exist
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(aaf_training_female_path, exist_ok=True)
        os.makedirs(aaf_training_male_path, exist_ok=True)

        # Load pre-trained model
        model = load_pretrained_model()
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

        # Initialize class thresholds and counts
        class_thresholds = torch.ones(num_classes) * base_threshold

        for iteration in range(iterations):
            print(f"--- Starting Iteration {iteration + 1}/{iterations} ---")
            print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

            # 1. Clean the training folder
            shutil.rmtree(train_folder, ignore_errors=True)
            os.makedirs(train_folder, exist_ok=True)
            os.makedirs(aaf_training_female_path, exist_ok=True)
            os.makedirs(aaf_training_male_path, exist_ok=True)

            # 2. Pseudo-labeling using FlexMatch
            model.eval()
            class_counts = torch.zeros(num_classes)
            new_samples = 0

            for unlabeled_data in os.listdir(unlabeled_folder):
                img_path = os.path.join(unlabeled_folder, unlabeled_data)
                img = Image.open(img_path).convert('RGB')
                input = transforms_val(img).unsqueeze(0).to(device)

                with torch.no_grad():
                    output = model(input)
                    probs = F.softmax(output, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                    # Apply FlexMatch thresholds
                    for c in range(num_classes):
                        mask = (preds == c) & (max_probs > class_thresholds[c])
                        if mask.any():
                            class_counts[c] += mask.sum().item()
                            if c == 0:
                                shutil.copy(img_path, os.path.join(aaf_training_female_path, unlabeled_data))
                            elif c == 1:
                                shutil.copy(img_path, os.path.join(aaf_training_male_path, unlabeled_data))
                            new_samples += 1

                if new_samples % 10000 == 0:
                    print(f"Processed {new_samples} images")

            # 3. Update class thresholds based on learning status
            normalized_counts = class_counts / class_counts.sum()
            for c in range(num_classes):
                class_thresholds[c] = non_linear_mapping(normalized_counts[c]) * base_threshold

            print("FlexMatch thresholds of the two classes:", class_thresholds)

            # Reload training dataset with new pseudo-labeled samples
            train_dataset = datasets.ImageFolder(train_folder, transform=transforms_train)
            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

            # 5. Training with early stopping
            best_val_loss = float('inf')
            wait = 0

            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0

                for inputs, labels in train_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    probs = F.softmax(outputs, dim=1)
                    max_probs, preds = torch.max(probs, dim=1)

                    # Supervised loss
                    supervised_loss = criterion(outputs, labels)

                    # Unsupervised loss (FlexMatch)
                    unsupervised_loss = 0.0
                    for c in range(num_classes):
                        mask = (preds == c) & (max_probs > class_thresholds[c])
                        if mask.any():
                            unsupervised_loss += F.cross_entropy(outputs[mask], preds[mask])

                    # Total loss
                    loss = supervised_loss + unsupervised_loss
                    loss.backward()
                    optimizer.step()

                    running_loss += loss.item() * inputs.size(0)

                epoch_loss = running_loss / len(train_dataset)
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

                # Validation
                model.eval()
                val_loss = 0.0
                correct = 0
                total = 0

                with torch.no_grad():
                    for inputs, labels in val_dataloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        val_loss += criterion(outputs, labels).item() * inputs.size(0)
                        _, preds = torch.max(outputs, 1)
                        correct += (preds == labels).sum().item()
                        total += labels.size(0)

                val_loss /= len(val_dataset)
                val_accuracy = correct / total
                print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    wait = 0
                    torch.save(model.state_dict(), os.path.join(model_save_folder, f"FlexMatch_{iteration + 1}_unbalanced_best.pth"))
                else:
                    wait += 1
                    if wait >= patience:
                        print("Early stopping triggered.")
                        break

            # Save the final model
            torch.save(model.state_dict(), os.path.join(model_save_folder, f'FlexMatch_{iteration + 1}_unbalanced_final.pth'))
            print(f"Self-training completed for {training_folder}.")

print("All experiments completed.")

--- Starting Experiment: Unlabeled ---


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Iteration 1/3 ---
Unlabeled dataset size: 86744
Processed 0 images
Processed 10000 images
Processed 10000 images
Processed 10000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 20000 images
Processed 30000 images
Processed 30000 images
Processed 30000 images
FlexMatch thresholds of the two classes: tensor([0.1686, 0.5100])
Iteration 1, Epoch 1, Train Loss: 0.0622
Iteration 1, Epoch 1, Validation Loss: 0.8102, Accuracy: 0.8194
Iteration 1, Epoch 2, Train Loss: 0.0472
Iteration 1, Epoch 2, Validation Loss: 0.8955, Accuracy: 0.8026
Iteration 1, Epoch 3, Train Loss: 0.0435
Iteration 1, Epoch 3, Validation Loss: 0.8809, Accuracy: 0.8071
Early stopping triggered.
Self-training completed for Unlabeled.
--- Starting Iteration 2/3 ---
Unlabeled dataset size: 86744
Processed 10000 images
Processed 20000 images
Processed 30000 images
Processed 40000 images
Processed 50000 images

In [10]:
import os
import random
import shutil
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset
from tqdm import tqdm

# Define the validation folder
validation_folder = "../AAF-Dataset-Pseudo-Labeling-UDA/aaf_dataset/Validation"

# Define the paths to the female and male folders within the validation folder
validation_female_folder = os.path.join(validation_folder, "female")
validation_male_folder = os.path.join(validation_folder, "male")

# Count the number of samples in each folder
num_validation_female_samples = len(os.listdir(validation_female_folder))
num_validation_male_samples = len(os.listdir(validation_male_folder))

# Print the counts
print("Number of samples in Validation - Female:", num_validation_female_samples)
print("Number of samples in Validation - Male:", num_validation_male_samples)

# Balance the Validation set to have the same number of female/male classes
# Determine the minimum class count
min_validation_samples = min(num_validation_female_samples, num_validation_male_samples)
print("Balancing Validation Set...")
print(f"Minimum class count: {min_validation_samples}")

# Load the full validation dataset
validation_dataset = ImageFolder(validation_folder, transform=None)

# Separate indices for female and male classes
female_indices = [i for i, (_, label) in enumerate(validation_dataset) if label == 0]
male_indices = [i for i, (_, label) in enumerate(validation_dataset) if label == 1]

# Randomly select a subset of indices for each class
random.seed(42)  # For reproducibility
balanced_female_indices = random.sample(female_indices, min_validation_samples)
balanced_male_indices = random.sample(male_indices, min_validation_samples)

# Combine balanced indices and create a balanced dataset
balanced_indices = balanced_female_indices + balanced_male_indices
balanced_validation_dataset = Subset(validation_dataset, balanced_indices)

# Print the final balanced validation set counts
print(f"Balanced Validation Set - Female: {len(balanced_female_indices)}")
print(f"Balanced Validation Set - Male: {len(balanced_male_indices)}")
print(f"Total Balanced Validation Samples: {len(balanced_validation_dataset)}")

# Create a new folder for the balanced validation set
balanced_validation_folder = os.path.join(os.path.dirname(validation_folder), "Balanced_Validation")
# Paths for the balanced validation set
balanced_validation_female_folder = os.path.join(balanced_validation_folder, "female")
balanced_validation_male_folder = os.path.join(balanced_validation_folder, "male")

# Create directories for the balanced validation set
os.makedirs(balanced_validation_female_folder, exist_ok=True)
os.makedirs(balanced_validation_male_folder, exist_ok=True)

# Copy balanced female samples
print("Copying balanced female samples...")
for idx in tqdm(balanced_female_indices):
    source_path = validation_dataset.samples[idx][0]  # Get the file path from the dataset
    filename = os.path.basename(source_path)
    destination_path = os.path.join(balanced_validation_female_folder, filename)
    shutil.copy(source_path, destination_path)

# Copy balanced male samples
print("Copying balanced male samples...")
for idx in tqdm(balanced_male_indices):
    source_path = validation_dataset.samples[idx][0]  # Get the file path from the dataset
    filename = os.path.basename(source_path)
    destination_path = os.path.join(balanced_validation_male_folder, filename)
    shutil.copy(source_path, destination_path)

print("Balanced validation set copied successfully!")

Number of samples in Validation - Female: 3167
Number of samples in Validation - Male: 2524
Balancing Validation Set...
Minimum class count: 2524
Balanced Validation Set - Female: 2524
Balanced Validation Set - Male: 2524
Total Balanced Validation Samples: 5048
Copying balanced female samples...


100%|██████████| 2524/2524 [00:03<00:00, 644.45it/s]


Copying balanced male samples...


100%|██████████| 2524/2524 [00:04<00:00, 580.84it/s]

Balanced validation set copied successfully!





###  Obtain performance results from every experiment

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pandas as pd

# Function to display images
def imshow(input, title):
    input = input.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    plt.imshow(input)
    plt.title(title)
    plt.show()

# Define transformations
transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define paths
aaf_dataset_path = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset"
test_folders = {
    "East_Asian_Validation": os.path.join(aaf_dataset_path, "East_Asian_Validation"),  # FairFace East Asian validation
    "Balanced_Validation": os.path.join(aaf_dataset_path, "Balanced_Validation"),  # FairFace Multi-Race test
    "AAF_Validation": "../AAF-Dataset-Pseudo-Labeling-UDA/aaf_dataset/Balanced_Validation"  # AAF test
}
# Define FixMatch folders
data_path = "bias_training_data"
fixmatch_folders = [
    "bias_Severe_East_Asian_Female_Bias_0.6",  # "Controlled_Bias_0.6_{1,2,3...5}_balanced_best.pth", "Controlled_Bias_0.6_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_East_Asian_Female_Bias_0.9",  # "Controlled_Bias_0.9_{1,2,3...5}_balanced_best.pth", "Controlled_Bias_0.9_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_Black_Bias_0.6",  # "Controlled_Bias_0.6_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.6_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_Black_Bias_0.9",  # "Controlled_Bias_0.9_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.9_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_Male_Gender_0.6",  # "Controlled_Bias_0.6_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.6_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_Male_Gender_0.9",  # "Controlled_Bias_0.9_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.9_{1,2,3}_unbalanced_best.pth"
    "bias_Gender_Balanced_Unlabeled_0.6",  # "Controlled_Bias_0.6_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.6_{1,2,3}_unbalanced_best.pth"
    "bias_Gender_Balanced_Unlabeled_0.9",  # "Controlled_Bias_0.9_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.9_{1,2,3}_unbalanced_best.pth"
    "bias_Unlabeled_0.6",  # "Controlled_Bias_0.6_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.6_{1,2,3}_unbalanced_best.pth"
    "bias_Unlabeled_0.9",  # "Controlled_Bias_0.9_{1,2,3...10}_balanced_best.pth", "Controlled_Bias_0.9_{1,2,3}_unbalanced_best.pth"
    "bias_Severe_East_Asian_Bias_0.6",  # "Controlled_Bias_0.6_{1,2,3...5}_balanced_best.pth"
    "bias_Severe_East_Asian_Bias_0.9",  # "Controlled_Bias_0.9_{1,2,3...5}_balanced_best.pth"
    "bias_Severe_Female_Gender_0.6",  # "Controlled_Bias_0.6_{1,2,3...10}_balanced_best.pth"
    "bias_Severe_Female_Gender_0.9"  # "Controlled_Bias_0.9_{1,2,3...10}_balanced_best.pth"
]
fixmatch_folders = [os.path.join(data_path, folder) for folder in fixmatch_folders]

# Initialize results storage
results = []

# Load model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # Binary classification
criterion = nn.CrossEntropyLoss()

# Loop over test folders
for test_name, test_folder in test_folders.items():
    print(f"--- Testing on {test_name} ---")

    # Load test dataset
    test_dataset = ImageFolder(test_folder, transform=transforms_val)
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=4)
    class_names = test_dataset.classes
    print('Class names:', class_names)

    # Loop over FixMatch folders
    for fixmatch_folder in fixmatch_folders:
        # Determine the number of models based on the folder name
        if "Severe_East_Asian_Female_Bias_0.6" in fixmatch_folder or "Severe_East_Asian_Female_Bias_0.9" in fixmatch_folder:
            num_balanced = 5  # 1-5 balanced models
            num_unbalanced = 3  # 1-3 unbalanced models
        elif "Severe_East_Asian_Bias_0.6" in fixmatch_folder or "Severe_East_Asian_Bias_0.9" in fixmatch_folder:
            num_balanced = 5  # 1-5 balanced models
            num_unbalanced = 0  # No unbalanced models
        elif "Severe_Female_Gender_0.6" in fixmatch_folder or "Severe_Female_Gender_0.9" in fixmatch_folder:
            num_balanced = 10  # 1-10 balanced models
            num_unbalanced = 0  # No unbalanced models
        else:
            num_balanced = 10  # 1-10 balanced models
            num_unbalanced = 3  # 1-3 unbalanced models
        # Generate model paths
        if "0.6" in fixmatch_folder:
            model_paths_balanced = [os.path.join(fixmatch_folder, f"Controlled_Bias_0.6_{j}_balanced_best.pth") for j in range(1, num_balanced + 1)]
            model_paths_unbalanced = [os.path.join(fixmatch_folder, f"Controlled_Bias_0.6_{j}_unbalanced_best.pth") for j in range(1, num_unbalanced + 1)]
        else:
            model_paths_balanced = [os.path.join(fixmatch_folder, f"Controlled_Bias_0.9_{j}_balanced_best.pth") for j in range(1, num_balanced + 1)]
            model_paths_unbalanced = [os.path.join(fixmatch_folder, f"Controlled_Bias_0.9_{j}_unbalanced_best.pth") for j in range(1, num_unbalanced+ 1)]
        # Combine all model paths
        all_model_paths = model_paths_balanced + model_paths_unbalanced

        # Loop over models in the current folder
        for model_path in all_model_paths:
            print(f"--- Testing model: {model_path} ---")

            # Load model state
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            state_dict = torch.load(model_path, map_location=device)
            model.load_state_dict(state_dict)
            model.to(device)
            model.eval()

            # Initialize metrics
            male_corrects = 0
            male_total = 0
            female_corrects = 0
            female_total = 0
            running_loss = 0.0
            running_corrects = 0

            start_time = time.time()

            # Test loop
            with torch.no_grad():
                for i, (inputs, labels) in enumerate(test_dataloader):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    male_mask = labels == class_names.index('male')
                    female_mask = labels == class_names.index('female')

                    male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
                    male_total += torch.sum(male_mask)

                    female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
                    female_total += torch.sum(female_mask)

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                    # if i == 0:
                        # print('[Prediction Result Examples]')
                        # pred_labels = [class_names[x] for x in preds]
                        # images = torchvision.utils.make_grid(inputs[:8])
                        # imshow(images.cpu(), title=pred_labels)

            # Calculate metrics
            male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
            female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0
            epoch_loss = running_loss / len(test_dataset)
            epoch_acc = running_corrects / len(test_dataset) * 100.

            # Print results
            print(f'[Test] Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}% Time: {time.time() - start_time:.4f}s')
            print(f'[Test] Male Accuracy: {male_acc:.4f}%')
            print(f'[Test] Female Accuracy: {female_acc:.4f}%')

            # Save results
            results.append({
                "Test Dataset": test_name,
                "Model Folder": fixmatch_folder,
                "Model": model_path,
                "Loss": epoch_loss,
                "Accuracy": epoch_acc.item(),
                "Male Accuracy": male_acc.item(),
                "Female Accuracy": female_acc.item()
            })

# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("all_FixMatch_experiments_results.csv", index=False)
print("All experiments completed. Results saved to 'all_FixMatch_experiments_results.csv'.")

[Test] Loss: 0.9503 Acc: 79.2258% Time: 28.6596s
[Test] Male Accuracy: 88.9318%
[Test] Female Accuracy: 69.4696%
--- Testing model: bias_training_data\bias_Severe_Female_Gender_0.9\Controlled_Bias_0.9_2_balanced_best.pth ---
[Test] Loss: 1.5022 Acc: 76.1936% Time: 29.4945s
[Test] Male Accuracy: 95.1094%
[Test] Female Accuracy: 57.1798%
--- Testing model: bias_training_data\bias_Severe_Female_Gender_0.9\Controlled_Bias_0.9_3_balanced_best.pth ---
[Test] Loss: 1.8155 Acc: 74.9677% Time: 28.5983s
[Test] Male Accuracy: 94.8520%
[Test] Female Accuracy: 54.9806%
--- Testing model: bias_training_data\bias_Severe_Female_Gender_0.9\Controlled_Bias_0.9_4_balanced_best.pth ---
[Test] Loss: 1.9785 Acc: 76.3871% Time: 30.1143s
[Test] Male Accuracy: 93.1789%
[Test] Female Accuracy: 59.5084%
--- Testing model: bias_training_data\bias_Severe_Female_Gender_0.9\Controlled_Bias_0.9_5_balanced_best.pth ---
[Test] Loss: 1.9918 Acc: 76.7742% Time: 30.7057s
[Test] Male Accuracy: 93.9511%
[Test] Female Accura

In [13]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pandas as pd

# Function to display images
def imshow(input, title):
    input = input.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    plt.imshow(input)
    plt.title(title)
    plt.show()

# Define transformations
transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define paths
aaf_dataset_path = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset"
test_folders = {
    "East_Asian_Validation": os.path.join(aaf_dataset_path, "East_Asian_Validation"),  # FairFace East Asian validation
    "Balanced_Validation": os.path.join(aaf_dataset_path, "Balanced_Validation"),  # FairFace Multi-Race test
    "AAF_Validation": "../AAF-Dataset-Pseudo-Labeling-UDA/aaf_dataset/Balanced_Validation"  # AAF test
}

# Define model paths

data_path = "bias_training_data"

flexmatch_folders = [
    "bias_Gender_Balanced_Unlabeled_FlexMatch", # FlexMatch_{1-5}_best, FlexMatch_{1-3}_unbalanced_best
    "bias_Unlabeled_FlexMatch", # FlexMatch_{1-5}_best, FlexMatch_{1-3}_unbalanced_best
    "bias_Severe_Black_Bias_FlexMatch", # FlexMatch_{1-5}_best, FlexMatch_{1-3}_unbalanced_best
    "bias_Severe_Male_Gender_FlexMatch", # FlexMatch_{1-5}_best, FlexMatch_{1-3}_unbalanced_best
    "bias_Severe_East_Asian_Female_Bias_FlexMatch",# FlexMatch_{1-5}_best, FlexMatch_{1-3}_unbalanced_best
    "bias_Severe_East_Asian_Bias_FlexMatch",# FlexMatch_{1-5}_best
    "bias_Severe_Female_Gender_FlexMatch"# FlexMatch_{1-5}_best

]
flexmatch_folders = [os.path.join(data_path, folder) for folder in flexmatch_folders]

# Initialize results storage
results = []

# Load model
criterion = nn.CrossEntropyLoss()
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features

# Loop over test folders
for test_name, test_folder in test_folders.items():
    print(f"--- Testing on {test_name} ---")

    # Load test dataset
    test_dataset = ImageFolder(test_folder, transform=transforms_val)
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=4)
    class_names = test_dataset.classes
    print('Class names:', class_names)

    for flexmatch_folder in flexmatch_folders:
        # Define model paths for the current folder
        model_paths = [os.path.join(flexmatch_folder, f"FlexMatch_{j}_best.pth") for j in range(1, 6)]
        # Check if either substring is in the folder name
        if "Severe_East_Asian_Bias" in flexmatch_folder or "Severe_Female_Gender" in flexmatch_folder:
            all_model_paths = model_paths
        else:
            model_paths_unbalanced = [os.path.join(flexmatch_folder, f"FlexMatch_{i}_unbalanced_best.pth") for i in range(1, 4)]
            all_model_paths = model_paths + model_paths_unbalanced
        
        print(all_model_paths)

        for model_path in all_model_paths:
            print(f"--- Testing model: {model_path} ---")

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            state_dict = torch.load(model_path, map_location=device)
            filtered_state_dict = {k: v for k, v in state_dict.items() if not k.startswith("fc.")}
            model.load_state_dict(filtered_state_dict, strict=False)  # strict=False ignores missing layers
            # binary classification
            model.fc = nn.Linear(num_features, 2)

            model.to(device)
            model.eval()

            # Initialize metrics
            male_corrects = 0
            male_total = 0
            female_corrects = 0
            female_total = 0
            running_loss = 0.0
            running_corrects = 0

            start_time = time.time()

            # Test loop
            with torch.no_grad():
                for i, (inputs, labels) in enumerate(test_dataloader):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    male_mask = labels == class_names.index('male')
                    female_mask = labels == class_names.index('female')

                    male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
                    male_total += torch.sum(male_mask)

                    female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
                    female_total += torch.sum(female_mask)

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                    if i == 0:
                        print('[Prediction Result Examples]')
                        pred_labels = [class_names[x] for x in preds]
                        # images = torchvision.utils.make_grid(inputs[:8])
                        # imshow(images.cpu(), title=pred_labels)

            # Calculate metrics
            male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
            female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0
            epoch_loss = running_loss / len(test_dataset)
            epoch_acc = running_corrects / len(test_dataset) * 100.

            # Print results
            print(f'[Test] Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}% Time: {time.time() - start_time:.4f}s')
            print(f'[Test] Male Accuracy: {male_acc:.4f}%')
            print(f'[Test] Female Accuracy: {female_acc:.4f}%')

            # Save results
            results.append({
                "Dataset": test_name,
                "Model Folder": flexmatch_folder,
                "Model": model_path,
                "Loss": epoch_loss,
                "Accuracy": epoch_acc.item(),
                "Male Accuracy": male_acc.item(),
                "Female Accuracy": female_acc.item()
            })

# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("all_FlexMatch_experiments_results.csv", index=False)
print("All experiments completed. Results saved to 'all_FlexMatch_experiments_results.csv'.")

--- Testing on East_Asian_Validation ---
Class names: ['female', 'male']
['bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_1_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_2_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_3_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_4_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_5_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_1_unbalanced_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_2_unbalanced_best.pth', 'bias_training_data\\bias_Gender_Balanced_Unlabeled_FlexMatch\\FlexMatch_3_unbalanced_best.pth']
--- Testing model: bias_training_data\bias_Gender_Balanced_Unlabeled_FlexMatch\FlexMatch_1_best.pth ---


  state_dict = torch.load(model_path, map_location=device)


[Prediction Result Examples]
[Test] Loss: 0.8274 Acc: 42.0645% Time: 32.5842s
[Test] Male Accuracy: 10.1673%
[Test] Female Accuracy: 74.1268%
--- Testing model: bias_training_data\bias_Gender_Balanced_Unlabeled_FlexMatch\FlexMatch_2_best.pth ---
[Prediction Result Examples]
[Test] Loss: 0.5765 Acc: 72.4516% Time: 32.1051s
[Test] Male Accuracy: 87.9022%
[Test] Female Accuracy: 56.9211%
--- Testing model: bias_training_data\bias_Gender_Balanced_Unlabeled_FlexMatch\FlexMatch_3_best.pth ---
[Prediction Result Examples]
[Test] Loss: 0.7237 Acc: 43.6774% Time: 31.6695s
[Test] Male Accuracy: 29.3436%
[Test] Female Accuracy: 58.0854%
--- Testing model: bias_training_data\bias_Gender_Balanced_Unlabeled_FlexMatch\FlexMatch_4_best.pth ---
[Prediction Result Examples]
[Test] Loss: 0.7781 Acc: 51.9355% Time: 31.9934s
[Test] Male Accuracy: 98.7130%
[Test] Female Accuracy: 4.9159%
--- Testing model: bias_training_data\bias_Gender_Balanced_Unlabeled_FlexMatch\FlexMatch_5_best.pth ---
[Prediction Resul

In [12]:
# Test Phase
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pandas as pd

# Function to display images
def imshow(input, title):
    input = input.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    plt.imshow(input)
    plt.title(title)
    plt.show()

# Define validation transforms
transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define dataset paths
aaf_dataset_path = "../AAF-Dataset-Pseudo-Labeling-UDA/fairface_race_dataset"
balanced_east_asian_folder = os.path.join(aaf_dataset_path, "East_Asian_Validation")  # East Asian Validation dataset
balanced_val_folder = os.path.join(aaf_dataset_path, "Balanced_Validation")
balanced_test_folder = "../AAF-Dataset-Pseudo-Labeling-UDA/aaf_dataset/Balanced_Validation"

# Define models to evaluate
models_to_evaluate = [
    '../classification_model.pth',
    '../5epoch_fairface.pth',
    '../5epoch_fairface_5epoch_mixed.pth',
    '../5epoch_mixed.pth'
]

# Define datasets to evaluate
datasets_to_evaluate = {
    "East_Asian_Validation": balanced_east_asian_folder,
    "Balanced_Validation": balanced_val_folder,
    "Balanced_Test": balanced_test_folder,
}

# Initialize results list
results = []

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loop through each model
for model_path in models_to_evaluate:
    # Load the model
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 2)  # Binary classification
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()

    # Loop through each dataset
    for dataset_name, dataset_path in datasets_to_evaluate.items():
        # Load the dataset
        dataset = ImageFolder(dataset_path, transform=transforms_val)
        dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)
        class_names = dataset.classes

        # Initialize metrics
        male_corrects = 0
        male_total = 0
        female_corrects = 0
        female_total = 0
        running_loss = 0.0
        running_corrects = 0

        # Evaluate the model on the dataset
        start_time = time.time()
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                male_mask = labels == class_names.index('male')
                female_mask = labels == class_names.index('female')

                male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
                male_total += torch.sum(male_mask)
                female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
                female_total += torch.sum(female_mask)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                if i == 0:
                    print(f'[Prediction Result Examples for {model_path} on {dataset_name}]')
                    # pred_labels = [class_names[x] for x in preds]
                    # images = torchvision.utils.make_grid(inputs[:8])
                    # imshow(images.cpu(), title=pred_labels)

        # Compute metrics
        male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
        female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0
        epoch_loss = running_loss / len(dataset)
        epoch_acc = running_corrects / len(dataset) * 100.

        # Print results
        print(f'[Validation] Model: {model_path}, Dataset: {dataset_name}')
        print(f'[Validation] Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}%, Time: {time.time() - start_time:.4f}s')
        print(f'[Validation] Male Accuracy: {male_acc:.4f}%')
        print(f'[Validation] Female Accuracy: {female_acc:.4f}%')

        # Save results
        results.append([model_path, dataset_name, epoch_loss, epoch_acc, male_acc.item(), female_acc.item()])

# Save results to CSV
df = pd.DataFrame(results, columns=["Model", "Dataset", "Loss", "Accuracy", "Male Accuracy", "Female Accuracy"])
df.to_csv("model_evaluation_results.csv", index=False)
print("Results saved to model_evaluation_results.csv")



  state_dict = torch.load(model_path, map_location=device)


[Prediction Result Examples for ../classification_model.pth on East_Asian_Validation]
[Validation] Model: ../classification_model.pth, Dataset: East_Asian_Validation
[Validation] Loss: 0.4813, Acc: 77.9355%, Time: 36.5609s
[Validation] Male Accuracy: 85.9717%
[Validation] Female Accuracy: 69.8577%
[Prediction Result Examples for ../classification_model.pth on Balanced_Validation]
[Validation] Model: ../classification_model.pth, Dataset: Balanced_Validation
[Validation] Loss: 0.5048, Acc: 76.4032%, Time: 111.6237s
[Validation] Male Accuracy: 86.6387%
[Validation] Female Accuracy: 64.6252%
[Prediction Result Examples for ../classification_model.pth on Balanced_Test]
[Validation] Model: ../classification_model.pth, Dataset: Balanced_Test
[Validation] Loss: 0.6715, Acc: 73.2766%, Time: 53.0309s
[Validation] Male Accuracy: 97.9398%
[Validation] Female Accuracy: 48.6133%
[Prediction Result Examples for ../5epoch_fairface.pth on East_Asian_Validation]
[Validation] Model: ../5epoch_fairface.pt