### FairFace Dataset as pseudo-labeling, evaluated on All-Age-Face Data (AAF - Mostly Asian)

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
from pathlib import Path
from timm import create_model
!pip install torchmetrics
from torchmetrics.classification import BinaryAccuracy, Accuracy

tqdm.pandas()

base_img = Path('../Pseudo-Labeling/FairFace')

train_df = pd.read_csv('../Pseudo-Labeling/FairFace/train_labels.csv')
val_df = pd.read_csv('../Pseudo-Labeling/FairFace/val_labels.csv')

train_df.drop(columns=['service_test'], inplace=True)
val_df.drop(columns=['service_test'], inplace=True)

train_df['file'] = train_df['file'].progress_map(lambda x: base_img / x)
val_df['file'] = val_df['file'].progress_map(lambda x: base_img / x)

train_df['age'] = train_df['age'].astype('category')
train_df['age_code'] = train_df['age'].cat.codes

age_map = dict(zip(train_df['age'].cat.categories, range(len(train_df['age'].cat.categories))))
gender_map = {'Male': 0, 'Female': 1}

train_df['gender_code'] = train_df['gender'].progress_map(lambda x: gender_map[x])
val_df['age_code'] = val_df['age'].progress_map(lambda x: age_map[x])
val_df['gender_code'] = val_df['gender'].progress_map(lambda x: gender_map[x])

# Encoding race categories
train_df['race'] = train_df['race'].astype('category')
train_df['race_code'] = train_df['race'].cat.codes

race_map = dict(zip(train_df['race'].cat.categories, range(len(train_df['race'].cat.categories))))
val_df['race_code'] = val_df['race'].progress_map(lambda x: race_map[x])





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


  0%|          | 0/86744 [00:00<?, ?it/s]

  0%|          | 0/10954 [00:00<?, ?it/s]

  0%|          | 0/86744 [00:00<?, ?it/s]

  0%|          | 0/10954 [00:00<?, ?it/s]

  0%|          | 0/10954 [00:00<?, ?it/s]

  0%|          | 0/10954 [00:00<?, ?it/s]

In [3]:

# Define device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### FairFace Data Retrieval


### Imports

In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
from pathlib import Path
from timm import create_model
from torchmetrics.classification import BinaryAccuracy, Accuracy
import os
tqdm.pandas()

In [4]:
import shutil

# Define directories for FairFace dataset
dataset_path_orig = Path("FairFace")
dataset_path_dest = Path("./fairface_race_dataset")
os.makedirs(dataset_path_dest, exist_ok=True)

dataset_path_training = dataset_path_dest / "Training"
dataset_path_validation = dataset_path_dest / "Validation"
os.makedirs(dataset_path_training, exist_ok=True)
os.makedirs(dataset_path_validation, exist_ok=True)

dataset_path_training_female = dataset_path_training / "female"
dataset_path_training_male = dataset_path_training / "male"
dataset_path_validation_female = dataset_path_validation / "female"
dataset_path_validation_male = dataset_path_validation / "male"
os.makedirs(dataset_path_training_female, exist_ok=True)
os.makedirs(dataset_path_training_male, exist_ok=True)
os.makedirs(dataset_path_validation_female, exist_ok=True)
os.makedirs(dataset_path_validation_male, exist_ok=True)


In [5]:
# Filter FairFace dataset by gender and age
for index, row in train_df.iterrows():
    img_path_orig = row['file']
    gender = row['gender_code']

    if gender == 1:  # Female
        img_path_dest = dataset_path_training_female / img_path_orig.name
    elif gender == 0:  # Male
        img_path_dest = dataset_path_training_male / img_path_orig.name
    else:
        print('Weird things happening')

    shutil.copy(str(img_path_orig), str(img_path_dest))

for index, row in val_df.iterrows():
    img_path_orig = row['file']
    gender = row['gender_code']

    if gender == 1:  # Female
        img_path_dest = dataset_path_validation_female / img_path_orig.name
    elif gender == 0:  # Male
        img_path_dest = dataset_path_validation_male / img_path_orig.name
    else:
        print('Weird things happening')

    shutil.copy(str(img_path_orig), str(img_path_dest))

In [6]:
age_map, gender_map, race_map

({'0-2': 0,
  '10-19': 1,
  '20-29': 2,
  '3-9': 3,
  '30-39': 4,
  '40-49': 5,
  '50-59': 6,
  '60-69': 7,
  'more than 70': 8},
 {'Male': 0, 'Female': 1},
 {'Black': 0,
  'East Asian': 1,
  'Indian': 2,
  'Latino_Hispanic': 3,
  'Middle Eastern': 4,
  'Southeast Asian': 5,
  'White': 6})

In [7]:
val_df.head()

Unnamed: 0,file,age,gender,race,age_code,gender_code,race_code
0,..\Pseudo-Labeling\FairFace\val\1.jpg,3-9,Male,East Asian,3,0,1
1,..\Pseudo-Labeling\FairFace\val\2.jpg,50-59,Female,East Asian,6,1,1
2,..\Pseudo-Labeling\FairFace\val\3.jpg,30-39,Male,White,4,0,6
3,..\Pseudo-Labeling\FairFace\val\4.jpg,20-29,Female,Latino_Hispanic,2,1,3
4,..\Pseudo-Labeling\FairFace\val\5.jpg,20-29,Male,Southeast Asian,2,0,5


In [5]:
data_dir =  "./fairface_race_dataset"

# Define the folders in the training and validation directories
train_folders = ['male', 'female']
val_folders = ['male', 'female']

# Function to count the number of pictures in each folder
def count_pictures(folder_path):
    count = 0
    for root, dirs, files in os.walk(folder_path):
        count += len(files)
    return count

# Print the number of pictures in each training folder
print("Number of pictures in each training folder:")
for folder in train_folders:
    folder_path = os.path.join(data_dir, 'Training', folder)
    num_pics = count_pictures(folder_path)
    print(f"{folder}: {num_pics}")

# Print the number of pictures in each validation folder
print("\nNumber of pictures in each validation folder:")
for folder in val_folders:
    folder_path = os.path.join(data_dir, 'Validation', folder)
    num_pics = count_pictures(folder_path)
    print(f"{folder}: {num_pics}")

Number of pictures in each training folder:
male: 45986
female: 40758

Number of pictures in each validation folder:
male: 5792
female: 5162


In [6]:

import os
import shutil
import pandas as pd
from pathlib import Path
import random
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt

import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device object
print(device)
print(torch.version.cuda)

print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device Name:", torch.cuda.get_device_name(0))
    print("CUDA Version:", torch.version.cuda)
else:
    print("CUDA not detected.")

random.seed(123)

# change to cpu
# device = torch.device("cpu")


cuda:0
12.1
CUDA Available: True
Device Name: NVIDIA GeForce GTX 1650 Ti
CUDA Version: 12.1


In [11]:
import requests

# Define the URL for the file
url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"

# Define the path where the file will be saved
output_path = "classification_model.pth"

# Send a GET request to download the file
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    with open(output_path, "wb") as file:
        file.write(response.content)  # Write the content of the response to the file
    print(f"File downloaded successfully and saved as {output_path}")
else:
    print(f"Failed to download file. Status code: {response.status_code}")

File downloaded successfully and saved as classification_model.pth


In [7]:
# Load pre-trained ResNet model CNN-based Face-Gender-Classification PyTorch model 
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification

# Load pre-trained weights from .pth file
pretrained_weights_path = 'classification_model.pth'
model.load_state_dict(torch.load(pretrained_weights_path, map_location=torch.device('cpu')))

# Send model to device
model.to(device)


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=torch.device('cpu')))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
### Dataset to be Evaluated
import os
import shutil
import pandas as pd
from pathlib import Path
import random
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt

import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device object
print(device)
random.seed(123)

# Dataset 3
dataset_path_sets = Path("../Pseudo-Labeling/gender_classification_dataset_asia_raw/All-Age-Faces Dataset/image sets")
dataset_path_orig = Path("../Pseudo-Labeling/gender_classification_dataset_asia_raw/All-Age-Faces Dataset/original images")
dataset_path_dest = Path("./aaf_dataset")
os.makedirs(dataset_path_dest, exist_ok=True)

dataset_path_training = dataset_path_dest / "Training"
dataset_path_validation = dataset_path_dest / "Validation"
os.makedirs(dataset_path_training, exist_ok=True)
os.makedirs(dataset_path_validation, exist_ok=True)

dataset_path_training_female = dataset_path_training / "female"
dataset_path_training_male = dataset_path_training / "male"
dataset_path_validation_female = dataset_path_validation / "female"
dataset_path_validation_male = dataset_path_validation / "male"
os.makedirs(dataset_path_training_female, exist_ok=True)
os.makedirs(dataset_path_training_male, exist_ok=True)
os.makedirs(dataset_path_validation_female, exist_ok=True)
os.makedirs(dataset_path_validation_male, exist_ok=True)

# Dataset 3
# https://pythonawesome.com/all-age-faces-dataset-mostly-asian/
# From original dataset webpage:
# Individuals from serial number 00000 to 07380 are all female, from 07381 to 13321 are male.
# Typical code: 00013A02.jpg --> image 00013, Age 02 years old
cols = ['img', 'label']
df_train = pd.read_csv(dataset_path_sets / "train.txt", sep=' ', header=None, names=cols)
df_val = pd.read_csv(dataset_path_sets / "val.txt", sep=' ', header=None, names=cols)

for idx, row in df_train.iterrows():
  img_name = row['img']
  label = row['label']
  img_path_orig = dataset_path_orig / img_name
  img_num = img_name[:5]
  img_age = img_name[6:8]
  if int(img_age) >= 18: # Only consider adults
    if label == 0:
      img_path_dest = dataset_path_training_female / img_path_orig.name
    elif label == 1:
      img_path_dest = dataset_path_training_male / img_path_orig.name
    else:
      print('Weird things happening')

    shutil.copy(str(img_path_orig), str(img_path_dest))

for idx, row in df_val.iterrows():
  img_name = row['img']
  label = row['label']
  img_path_orig = dataset_path_orig / img_name
  img_num = img_name[:5]
  img_age = img_name[6:8]
  if int(img_age) >= 18:
    if label == 0:
      img_path_dest = dataset_path_validation_female / img_path_orig.name
    elif label == 1:
      img_path_dest = dataset_path_validation_male / img_path_orig.name
    else:
      print('Weird things happening')

    shutil.copy(str(img_path_orig), str(img_path_dest))
    

cuda:0


In [19]:
#CREATE UNLABELED DATALOADER FROM THE TRAINING SPLIT
#From training dataset, take the data out of "female" and "male" folders
import os
from shutil import copy

data_dir = "./aaf_dataset" 
fairface_data_dir = "./fairface_race_dataset"

male_folder = "aaf_dataset/Training/male"
female_folder = "aaf_dataset/Training/female" 

male_folder_val = "aaf_dataset/Validation/male"
female_folder_val = "aaf_dataset/Validation/female"

unlabeled_folder = os.path.join(data_dir, "Unlabeled")
unlabeled_training_folder = os.path.join(data_dir, "Training/Unlabeled")
os.makedirs(unlabeled_folder, exist_ok=True)
os.makedirs(unlabeled_training_folder, exist_ok=True)
# Copy all files from male and female subfolders to the unlabeled folder
for folder in [male_folder, female_folder]:
    for filename in os.listdir(folder):
        source_path = os.path.join(folder, filename)
        destination_path = os.path.join(unlabeled_training_folder, filename)
        copy(source_path, destination_path)

# Copy all files from male and female subfolders to the unlabeled folder
for folder in [male_folder_val, female_folder_val]:
    for filename in os.listdir(folder):
        source_path = os.path.join(folder, filename)
        destination_path = os.path.join(unlabeled_folder, filename)
        copy(source_path, destination_path)
# Copy all files from male and female subfolders to the unlabeled folder
for folder in [male_folder, female_folder]:
    for filename in os.listdir(folder):
        source_path = os.path.join(folder, filename)
        destination_path = os.path.join(unlabeled_folder, filename)
        copy(source_path, destination_path)

print(f"Training Unlabeled dataset prepared in: {unlabeled_training_folder}")
print(f"Training Number of images in the unlabeled dataset: {len(os.listdir(unlabeled_training_folder))}")

print(f"Unlabeled dataset prepared in: {unlabeled_folder}")
print(f"Number of images in the unlabeled dataset: {len(os.listdir(unlabeled_folder))}")


# The Asia dataset remains intact for evaluation
evaluation_data_dir = fairface_data_dir
print(f"Evaluation dataset available at: {evaluation_data_dir}")

# Define the folders in the training and validation directories
train_folders = ['male', 'female']
val_folders = ['male', 'female']

# Function to count the number of pictures in each folder
def count_pictures(folder_path):
    count = 0
    for root, dirs, files in os.walk(folder_path):
        count += len(files)
    return count

# Print the number of pictures in each training folder
print("Number of pictures in each training folder:")
for folder in train_folders:
    folder_path = os.path.join(evaluation_data_dir, 'Training', folder)
    num_pics = count_pictures(folder_path)
    print(f"{folder}: {num_pics}")

# Print the number of pictures in each validation folder
print("\nNumber of pictures in each validation folder:")
for folder in val_folders:
    folder_path = os.path.join(evaluation_data_dir, 'Validation', folder)
    num_pics = count_pictures(folder_path)
    print(f"{folder}: {num_pics}")

Training Unlabeled dataset prepared in: ./aaf_dataset\Training/Unlabeled
Training Number of images in the unlabeled dataset: 5654
Unlabeled dataset prepared in: ./aaf_dataset\Unlabeled
Number of images in the unlabeled dataset: 11345
Evaluation dataset available at: ./fairface_race_dataset
Number of pictures in each training folder:
male: 45986
female: 40758

Number of pictures in each validation folder:
male: 5792
female: 5162


In [8]:
# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [9]:
# clean cache
torch.cuda.empty_cache()
# check if the cache is empty
print(torch.cuda.memory_summary(device=None, abbreviated=False))
# check if cuda is available
print(torch.cuda.is_available())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  43715 KiB |  43715 KiB |  43715 KiB |      0 B   |
|       from large pool |  40320 KiB |  40320 KiB |  40320 KiB |      0 B   |
|       from small pool |   3395 KiB |   3395 KiB |   3395 KiB |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |  43715 KiB |  43715 KiB |  43715 KiB |      0 B   |
|       from large pool |  40320 KiB |  40320 KiB |  40320 KiB |      0 B   |
|       from small pool |   3395 KiB |   3395 KiB |   3395 KiB |      0 B   |
|---------------------------------------------------------------

In [12]:
import os
import shutil
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

# Define the paths for validation and training
data_dir = "./fairface_race_dataset"  
validation_path = os.path.join(data_dir, 'Validation')
training_path = os.path.join(data_dir, 'Training')
east_asian_val_path = os.path.join(data_dir, 'East_Asian_Validation')
balanced_val_path = os.path.join(data_dir, 'Balanced_Validation')

# Ensure directories exist
os.makedirs(east_asian_val_path, exist_ok=True)
os.makedirs(balanced_val_path, exist_ok=True)

# Load metadata
validation_data = pd.read_csv("../Pseudo-Labeling/FairFace/val_labels.csv")
training_data = pd.read_csv("../Pseudo-Labeling/FairFace/train_labels.csv")


# Function to copy files and separate by gender
def copy_files(file_list, gender_list, source_dir, dest_dir):
    for file, gender in tqdm(zip(file_list, gender_list), desc=f"Copying files to {dest_dir}", total=len(file_list)):
        gender_folder = "male" if gender == "Male" else "female"
        file_path = os.path.normpath(os.path.join(source_dir, gender_folder, os.path.basename(file)))
        dst_dir = os.path.join(dest_dir, gender_folder)
        os.makedirs(dst_dir, exist_ok=True)  # Create gender-specific folder if it doesn't exist
        dst = os.path.join(dst_dir, os.path.basename(file))
        if os.path.exists(file_path):
            shutil.copy(file_path, dst)
        else:
            print(f"Warning: {file_path} does not exist.")

# Create East Asian validation set
east_asian_val = validation_data[validation_data['race'] == 'East Asian']
copy_files(east_asian_val['file'], east_asian_val['gender'], validation_path, east_asian_val_path)
print(f"East Asian Validation Set created with {len(east_asian_val)} samples.")

# Create a balanced validation set across all races
min_samples_per_race = validation_data['race'].value_counts().min()
balanced_val = validation_data.groupby('race').apply(lambda x: x.sample(min_samples_per_race, random_state=42)).reset_index(drop=True)
copy_files(balanced_val['file'], balanced_val['gender'], validation_path, balanced_val_path)
print(f"Balanced Validation Set created with {len(balanced_val)} samples.")

print("Dataset splitting complete.")



Copying files to ./fairface_race_dataset\East_Asian_Validation:   0%|          | 0/1550 [00:00<?, ?it/s]

East Asian Validation Set created with 1550 samples.


Copying files to ./fairface_race_dataset\Balanced_Validation:   0%|          | 0/8463 [00:00<?, ?it/s]

Balanced Validation Set created with 8463 samples.
Dataset splitting complete.


In [None]:
#PRINT INFO
import os

evaluation_dataset_path = "./fairface_race_dataset"
training_folder = os.path.join(evaluation_dataset_path, "Training")
validation_folder = os.path.join(evaluation_dataset_path, "Validation")
east_asian_val_folder = os.path.join(evaluation_dataset_path, "East_Asian_Validation")
balanced_val_folder = os.path.join(evaluation_dataset_path, "Balanced_Validation")

# Define the paths to the female and male folders within the validation folder
validation_female_folder = os.path.join(validation_folder, "female")
validation_male_folder = os.path.join(validation_folder, "male")

# Define the paths to the east asian validation folders
east_asian_female_folder = os.path.join(east_asian_val_folder, "female")
east_asian_male_folder = os.path.join(east_asian_val_folder, "male")

# Define the paths to the balanced validation folders
balanced_female_folder = os.path.join(balanced_val_folder, "female")
balanced_male_folder = os.path.join(balanced_val_folder, "male")

# Count the number of samples in each folder
num_validation_female_samples = len(os.listdir(validation_female_folder))
num_validation_male_samples = len(os.listdir(validation_male_folder))
num_east_asian_female_samples = len(os.listdir(east_asian_female_folder))
num_east_asian_male_samples = len(os.listdir(east_asian_male_folder))
balanced_female_samples =  len(os.listdir(balanced_female_folder))
balanced_male_samples = len(os.listdir(balanced_male_folder))



# Calculate the totals
total_validation_samples = num_validation_female_samples + num_validation_male_samples
total_east_asian_samples = num_east_asian_female_samples + num_east_asian_male_samples
total_balanced = balanced_female_samples + balanced_male_samples


print("Number of samples in Validation - Female:", num_validation_female_samples)
print("Number of samples in Validation - Male:", num_validation_male_samples)
print("Total samples in Validation:", total_validation_samples)
print("Number of samples in East - Female:", num_east_asian_female_samples)
print("Number of samples in East - Male:", num_east_asian_male_samples)
print("Total samples in East:", total_east_asian_samples)

print("Number of samples in Balanced - Female:", balanced_female_samples)
print("Number of samples in Balanced - Male:", balanced_male_samples)
print("Total samples in Balanced:", total_balanced)



Number of samples in Validation - Female: 5162
Number of samples in Validation - Male: 5792
Total samples in Validation: 10954
Number of samples in East - Female: 773
Number of samples in East - Male: 777
Total samples in East: 1550
Number of samples in Balanced - Female: 3935
Number of samples in Balanced - Male: 4528
Total samples in Balanced: 8463


In [17]:
# create dataloaders
import os
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Define the paths to the folders
fairface_dataset_path = "fairface_race_dataset" # this is the FairFace dataset
east_asian_val_folder = os.path.join(fairface_dataset_path, "East_Asian_Validation")
balanced_val_folder = os.path.join(fairface_dataset_path, "Balanced_Validation")

aaf_dataset_path = "aaf_dataset" # this is the All-Age-Faces dataset
# # create training_pseudo

train_folder = os.path.join(aaf_dataset_path, "Unlabeled") # this contains the training data from pseudo-labelling of all-asian-faces
os.makedirs(train_folder, exist_ok=True)

validation_folder = os.path.join(east_asian_val_folder) # validate on east asian faces from FairFace
test_folder = os.path.join(balanced_val_folder) # test on a balanced dataset from FairFace


# Create ImageFolder datasets
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
test_dataset = ImageFolder(test_folder, transform=transforms_test)

# Define batch size
batch_size = 16

# Create DataLoader objects
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
import numpy as np

def show_samples(dataloader, dataset_name):
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    fig.suptitle(f"Samples from {dataset_name} Dataset")
    for ax in axes.flatten():
        ax.axis('off')

    for i, (inputs, labels) in enumerate(dataloader):
        if i == 6:
            break
        ax = axes[i // 3, i % 3]
        # Show the image using imshow function
        imshow(inputs[0], title=f"Label: {labels[0]}")
        ax.axis('on')

# Show samples from training dataset
show_samples(train_dataloader, "Training")

# Show samples from validation dataset
show_samples(val_dataloader, "Validation")

# Show samples from test dataset
show_samples(test_dataloader, "Test")



In [None]:
# Balanced Training dataset using Method 1 (Substitute pseudo-labeled training set with the newly pseudo-labeled from the current model)

import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np 
import requests
import psutil

# Define the paths to the folders
fairface_dataset_path = "fairface_race_dataset" # this is the FairFace dataset
east_asian_val_folder = os.path.join(fairface_dataset_path, "East_Asian_Validation")
balanced_val_folder = os.path.join(fairface_dataset_path, "Balanced_Validation")
aaf_dataset_path = "aaf_dataset" # this is the All-Age-Faces dataset
# # create training_pseudo
train_folder = os.path.join(aaf_dataset_path, "Training_pseudo") # this contains the training data from pseudo-labelling of all-asian-faces
aaf_training_female_path= os.path.join(train_folder, "female")
aaf_training_male_path = os.path.join(train_folder, "male")

os.makedirs(train_folder, exist_ok=True)
validation_folder = os.path.join(east_asian_val_folder) # validate on east asian faces from FairFace
test_folder = os.path.join(balanced_val_folder) # test on a balanced dataset from FairFace

unlabeled_folder = os.path.join(aaf_dataset_path, "Unlabeled") # pseudo-labeling path of the training dataset of AAF

# Create directories if they don't exist
os.makedirs(aaf_dataset_path, exist_ok=True)
os.makedirs(train_folder, exist_ok=True)
os.makedirs(aaf_training_female_path, exist_ok=True)
os.makedirs(aaf_training_male_path, exist_ok=True)
# Define the transformations
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Load validation dataset
val_dataset = ImageFolder(validation_folder, transform=transforms_val)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load pre-trained ResNet model CNN-based Face-Gender-Classification PyTorch model 
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
# Load pre-trained weights from .pth file
pretrained_weights_path = 'classification_model.pth'
model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
model = model.to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import shutil
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image

# Define the non-linear mapping function (e.g., convex function)
def non_linear_mapping(x):
    return x / (2 - x)  # Convex function for threshold adjustment

# Initialize class-specific thresholds and counts
num_classes = 2  # Male and Female
base_threshold = 0.95  # Base confidence threshold for pseudo-labeling
iterations = 20  # Number of self-training iterations
num_epochs = 10  # Epochs per iteration
batch_size = 16
patience = 2  # Early stopping patience
# Define model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the URL for the file
url = "https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/EVd9bFWzqztMrXRDdNnCHQkBsHaM4n5_1q1fue77vtQVtw?download=1"
# Define the path where the file will be saved
output_path = "classification_model.pth"
# Send a GET request to download the file
response = requests.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
    with open(output_path, "wb") as file:
        file.write(response.content)  # Write the content of the response to the file
    print(f"File downloaded successfully and saved as {output_path}")
else:
    print(f"Failed to download file. Status code: {response.status_code}")

# Load pre-trained ResNet model CNN-based Face-Gender-Classification PyTorch model 
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification
pretrained_weights_path = 'classification_model.pth'
model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# Initialize class thresholds and counts
class_thresholds = torch.ones(num_classes) * base_threshold  # Initial thresholds
class_counts = torch.zeros(num_classes)  # Track pseudo-labeled samples per class


for iteration in range(14, iterations):
    print(f"--- Starting Iteration {iteration + 1}/{iterations} ---")
    print("Unlabeled dataset size:", len(os.listdir(unlabeled_folder)))

    # 1. Clean the training folder
    shutil.rmtree(train_folder, ignore_errors=True)
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(aaf_training_female_path, exist_ok=True)
    os.makedirs(aaf_training_male_path, exist_ok=True)

    # 2. Pseudo-labeling using FlexMatch
    model.eval()
    class_counts = torch.zeros(num_classes)  # Reset counts for each iteration
    new_samples = 0

    for unlabeled_data in os.listdir(unlabeled_folder):
        img_path = os.path.join(unlabeled_folder, unlabeled_data)
        img = Image.open(img_path).convert('RGB')
        input = transforms_val(img).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input)
            probs = F.softmax(output, dim=1)
            max_probs, preds = torch.max(probs, dim=1)

            # Apply FlexMatch thresholds
            for c in range(num_classes):
                mask = (preds == c) & (max_probs > class_thresholds[c])
                if mask.any():
                    class_counts[c] += mask.sum().item()
                    # Save pseudo-labeled data
                    if c == 0:
                        shutil.copy(img_path, os.path.join(aaf_training_female_path, unlabeled_data))
                    elif c == 1:
                        shutil.copy(img_path, os.path.join(aaf_training_male_path, unlabeled_data))
                    new_samples += 1

        if new_samples % 10000 == 0:
            print(f"Processed {new_samples} images")

    # 3. Update class thresholds based on learning status
    normalized_counts = class_counts / class_counts.sum()
    for c in range(num_classes):
        class_thresholds[c] = non_linear_mapping(normalized_counts[c]) * base_threshold

    print("FlexMatch thresholds of the two classes:", class_thresholds)

    # 4. Balance the training dataset
    male_samples = os.listdir(aaf_training_male_path) if os.path.exists(aaf_training_male_path) else []
    female_samples = os.listdir(aaf_training_female_path) if os.path.exists(aaf_training_female_path) else []
    num_male = len(male_samples)
    num_female = len(female_samples)
    target_size = min(num_male, num_female)
    print(f"Number of pseudo-labeled data: Male {num_male}, Female {num_female} samples before balancing.")

    if num_male > target_size:
        excess_male_samples = np.random.choice(male_samples, num_male - target_size, replace=False)
        for sample in excess_male_samples:
            os.remove(os.path.join(aaf_training_male_path, sample))
    elif num_female > target_size:
        excess_female_samples = np.random.choice(female_samples, num_female - target_size, replace=False)
        for sample in excess_female_samples:
            os.remove(os.path.join(aaf_training_female_path, sample))

    # Recount samples after balancing
    num_male = len(os.listdir(aaf_training_male_path))
    num_female = len(os.listdir(aaf_training_female_path))
    print(f"Balanced pseudo-labeled samples: Male - {num_male}, Female - {num_female}")

    # Reload training dataset with new pseudo-labeled samples
    train_dataset = datasets.ImageFolder(train_folder, transform=transforms_train)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # 5. Training with early stopping
    best_val_loss = float('inf')
    wait = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            probs = F.softmax(outputs, dim=1)
            max_probs, preds = torch.max(probs, dim=1)

            # Supervised loss
            supervised_loss = criterion(outputs, labels)

            # Unsupervised loss (FlexMatch)
            unsupervised_loss = 0.0
            for c in range(num_classes):
                mask = (preds == c) & (max_probs > class_thresholds[c])
                if mask.any():
                    unsupervised_loss += F.cross_entropy(outputs[mask], preds[mask])

            # Total loss
            loss = supervised_loss + unsupervised_loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_dataset)
        print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Train Loss: {epoch_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in val_dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_loss /= len(val_dataset)
        val_accuracy = correct / total
        print(f"Iteration {iteration + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            wait = 0
            torch.save(model.state_dict(), f"asian_faces_FlexMatch_{iteration + 1}_best.pth")
        else:
            wait += 1
            if wait >= patience: 
                print("Early stopping triggered.")
                break

# Save the final model
torch.save(model.state_dict(), f'asian_facesFlexMatch_{iteration+1}_final.pth')
print("Training completed with FlexMatch and CPL.")

  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


File downloaded successfully and saved as classification_model.pth


  model.load_state_dict(torch.load(pretrained_weights_path, map_location=device))


--- Starting Iteration 1/20 ---
Unlabeled dataset size: 11345
Processed 0 images
FlexMatch thresholds of the two classes: tensor([0.0842, 0.6839])
Number of pseudo-labeled data: Male 4339, Female 844 samples before balancing.
Balanced pseudo-labeled samples: Male - 844, Female - 844
Iteration 1, Epoch 1, Train Loss: 0.0464
Iteration 1, Epoch 1, Validation Loss: 0.5597, Accuracy: 0.7723
Iteration 1, Epoch 2, Train Loss: 0.0236
Iteration 1, Epoch 2, Validation Loss: 0.5994, Accuracy: 0.7697
Iteration 1, Epoch 3, Train Loss: 0.0106
Iteration 1, Epoch 3, Validation Loss: 0.5390, Accuracy: 0.7735
Iteration 1, Epoch 4, Train Loss: 0.0085
Iteration 1, Epoch 4, Validation Loss: 0.5549, Accuracy: 0.7852
Iteration 1, Epoch 5, Train Loss: 0.0101
Iteration 1, Epoch 5, Validation Loss: 0.6159, Accuracy: 0.7529
Early stopping triggered.
--- Starting Iteration 2/20 ---
Unlabeled dataset size: 11345
Processed 10000 images
FlexMatch thresholds of the two classes: tensor([0.2198, 0.4311])
Number of pseu

KeyboardInterrupt: 

In [None]:
# Test Phase
# import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pandas as pd
def imshow(input, title):
    # Convert torch.Tensor to numpy array
    input = input.numpy().transpose((1, 2, 0))
    # Undo image normalization
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    # Display images
    plt.imshow(input)
    plt.title(title)
    plt.show()

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

aaf_dataset_path = "./fairface_race_dataset"
# Load the model weights
balanced_test_folder = os.path.join(aaf_dataset_path, "East_Asian_Validation")
# Now the model is ready for inference
batch_size = 16
balanced_test_dataset = ImageFolder(balanced_test_folder, transform=transforms_val)
balanced_test_dataloader = DataLoader(balanced_test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

class_names = balanced_test_dataset.classes
print('Class names:', class_names)
criterion = nn.CrossEntropyLoss()
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
results = []
for j in range(1, 15):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    state_dict = torch.load(f'asian_faces_FlexMatch_{j}_best.pth', map_location=device)
    filtered_state_dict = {k: v for k, v in state_dict.items() if not k.startswith("fc.")}
    model.load_state_dict(filtered_state_dict, strict=False)  # strict=False ignores missing layers
    # binary classification
    model.fc = nn.Linear(num_features, 2)
    
    print('Iteration: ', j)
    # Move the model to the device
    model.to(device)
    model.eval()  # Set the model to evaluation mode

    start_time = time.time()

    male_corrects = 0
    male_total = 0
    female_corrects = 0
    female_total = 0

    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0

        for i, (inputs, labels) in enumerate(balanced_test_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            male_mask = labels == class_names.index('male')
            female_mask = labels == class_names.index('female')

            male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
            male_total += torch.sum(male_mask)

            female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
            female_total += torch.sum(female_mask)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            if i == 0:
                print('[Prediction Result Examples]')
                pred_labels = [class_names[x] for x in preds]
                images = torchvision.utils.make_grid(inputs[:8])
                # imshow(images.cpu(), title=pred_labels)

        male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
        female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0

        epoch_loss = running_loss / len(balanced_test_dataset)
        epoch_acc = running_corrects / len(balanced_test_dataset) * 100.
        print('[Test] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch_loss, epoch_acc,
                                                                            time.time() - start_time))
        print('[Test] Male Accuracy: {:.4f}%'.format(male_acc))
        print('[Test] Female Accuracy: {:.4f}%'.format(female_acc))
        # save male, female accuracy and iteration to csv
        results.append([j, epoch_loss, epoch_acc, male_acc.item(), female_acc.item()])

# Save results to CSV
df = pd.DataFrame(results, columns=["Iteration", "Loss", "Accuracy", "Male Accuracy", "Female Accuracy"])
df.to_csv("aaf_flexmatch_validation_results.csv", index=False)



Class names: ['female', 'male']


  state_dict = torch.load(f'asian_faces_FlexMatch_{j}_best.pth', map_location=device)


Iteration:  1
[Prediction Result Examples]
[Test] Loss: 0.6834 Acc: 53.2903% Time: 19.1626s
[Test] Male Accuracy: 86.6152%
[Test] Female Accuracy: 19.7930%
Iteration:  2
[Prediction Result Examples]
[Test] Loss: 0.8026 Acc: 37.1613% Time: 21.9370s
[Test] Male Accuracy: 38.3526%
[Test] Female Accuracy: 35.9638%
Iteration:  3
[Prediction Result Examples]
[Test] Loss: 0.7358 Acc: 46.2581% Time: 25.5070s
[Test] Male Accuracy: 19.0476%
[Test] Female Accuracy: 73.6093%
Iteration:  4
[Prediction Result Examples]
[Test] Loss: 0.7091 Acc: 48.9032% Time: 23.5176s
[Test] Male Accuracy: 83.6551%
[Test] Female Accuracy: 13.9715%
Iteration:  5
[Prediction Result Examples]
[Test] Loss: 0.6639 Acc: 56.1936% Time: 26.6687s
[Test] Male Accuracy: 17.2458%
[Test] Female Accuracy: 95.3428%
Iteration:  6
[Prediction Result Examples]
[Test] Loss: 0.7160 Acc: 50.8387% Time: 35.0273s
[Test] Male Accuracy: 9.0090%
[Test] Female Accuracy: 92.8849%
Iteration:  7
[Prediction Result Examples]
[Test] Loss: 0.6845 Ac

In [None]:
# Test Phase
# import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pandas as pd
def imshow(input, title):
    # Convert torch.Tensor to numpy array
    input = input.numpy().transpose((1, 2, 0))
    # Undo image normalization
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    # Display images
    plt.imshow(input)
    plt.title(title)
    plt.show()

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

aaf_dataset_path = "./fairface_race_dataset"
# Load the model weights
balanced_test_folder = os.path.join(aaf_dataset_path, "East_Asian_Validation")
# Now the model is ready for inference
batch_size = 16
balanced_test_dataset = ImageFolder(balanced_test_folder, transform=transforms_val)
balanced_test_dataloader = DataLoader(balanced_test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

class_names = balanced_test_dataset.classes
print('Class names:', class_names)
criterion = nn.CrossEntropyLoss()
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
results = []
for j in range(1, 11):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    state_dict = torch.load(f'DST_0.6_{j}_balanced_best.pth', map_location=device)
    filtered_state_dict = {k: v for k, v in state_dict.items() if not k.startswith("fc.")}
    model.load_state_dict(filtered_state_dict, strict=False)  # strict=False ignores missing layers
    # binary classification
    model.fc = nn.Linear(num_features, 2)
    
    print('Iteration: ', j)
    # Move the model to the device
    model.to(device)
    model.eval()  # Set the model to evaluation mode

    start_time = time.time()

    male_corrects = 0
    male_total = 0
    female_corrects = 0
    female_total = 0

    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0

        for i, (inputs, labels) in enumerate(balanced_test_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            male_mask = labels == class_names.index('male')
            female_mask = labels == class_names.index('female')

            male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
            male_total += torch.sum(male_mask)

            female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
            female_total += torch.sum(female_mask)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            if i == 0:
                print('[Prediction Result Examples]')
                pred_labels = [class_names[x] for x in preds]
                images = torchvision.utils.make_grid(inputs[:8])
                # imshow(images.cpu(), title=pred_labels)

        male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
        female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0

        epoch_loss = running_loss / len(balanced_test_dataset)
        epoch_acc = running_corrects / len(balanced_test_dataset) * 100.
        print('[Validation] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch_loss, epoch_acc,
                                                                            time.time() - start_time))
        print('[Validation] Male Accuracy: {:.4f}%'.format(male_acc))
        print('[Validation] Female Accuracy: {:.4f}%'.format(female_acc))
        # save male, female accuracy and iteration to csv
        results.append([j, epoch_loss, epoch_acc, male_acc.item(), female_acc.item()])

# Save results to CSV
df = pd.DataFrame(results, columns=["Iteration", "Loss", "Accuracy", "Male Accuracy", "Female Accuracy"])
df.to_csv("aaf_flexmatch_validation_results.csv", index=False)


In [None]:
# Test Phase
# import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
import numpy as np
import matplotlib.pyplot as plt
import time
import os

def imshow(input, title):
    # Convert torch.Tensor to numpy array
    input = input.numpy().transpose((1, 2, 0))
    # Undo image normalization
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    # Display images
    plt.imshow(input)
    plt.title(title)
    plt.show()

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

aaf_dataset_path = "./gender_classification_dataset_asia"
# Load the model weights
balanced_test_folder = os.path.join(aaf_dataset_path, "Test_balanced")
# Now the model is ready for inference
batch_size = 16
balanced_test_dataset = ImageFolder(balanced_test_folder, transform=transforms_val)
balanced_test_dataloader = DataLoader(balanced_test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

class_names = balanced_test_dataset.classes
print('Class names:', class_names)
criterion = nn.CrossEntropyLoss()
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Output layer for binary classification

for j in range(1, 13):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    state_dict = torch.load(f'method_1_0.9_{j}_balanced_best.pth', map_location=device)
    model.load_state_dict(state_dict) 
    print('Iteration: ', j)
    # Move the model to the device
    model.to(device)
    model.eval()  # Set the model to evaluation mode

    start_time = time.time()

    male_corrects = 0
    male_total = 0
    female_corrects = 0
    female_total = 0

    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0

        for i, (inputs, labels) in enumerate(balanced_test_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            male_mask = labels == class_names.index('male')
            female_mask = labels == class_names.index('female')

            male_corrects += torch.sum(preds[male_mask] == labels[male_mask])
            male_total += torch.sum(male_mask)

            female_corrects += torch.sum(preds[female_mask] == labels[female_mask])
            female_total += torch.sum(female_mask)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            if i == 0:
                # print('[Prediction Result Examples]')
                pred_labels = [class_names[x] for x in preds]
                images = torchvision.utils.make_grid(inputs[:8])
                # imshow(images.cpu(), title=pred_labels)

        male_acc = male_corrects.double() / male_total * 100. if male_total > 0 else 0
        female_acc = female_corrects.double() / female_total * 100. if female_total > 0 else 0

        epoch_loss = running_loss / len(balanced_test_dataset)
        epoch_acc = running_corrects / len(balanced_test_dataset) * 100.
        print('[Test] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch_loss, epoch_acc,
                                                                            time.time() - start_time))
        print('[Test] Male Accuracy: {:.4f}%'.format(male_acc))
        print('[Test] Female Accuracy: {:.4f}%'.format(female_acc))

In [21]:

torch.save(model.state_dict(), 'self_training_unbalanced_female_model-0.6_5_best.pth')
print("Saved PyTorch Model State to self_training_final_model-0.6_5_best.pth")

Saved PyTorch Model State to self_training_final_model-0.6_5_best.pth


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms



# Define transforms for training and validation data
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_unlabeled = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
