First, let's import the necessary libraries and modules:

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor
import os
import random
from PIL import Image
from sklearn.preprocessing import MinMaxScaler

2023-05-04 13:47:26.068279: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# import Keras layers
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import BatchNormalization
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Lambda, Reshape, Conv2DTranspose

## One-hot encoding


In [3]:
def convert_age(age):
    age_dict = {'0-2': 0, '3-9': 1, '10-19': 2, '20-29': 3, '30-39': 4, '40-49': 5, '50-59': 6, '60-69': 7, 'more than 70': 8}
    return np.eye(9)[[age_dict[a] for a in age]]

def convert_gender(gender):
    gender_dict = {'Male': 0, 'Female': 1}
    return np.eye(2)[[gender_dict[g] for g in gender]]

def convert_race(race):
    race_dict = {'Black': 0, 'Latino_Hispanic': 1, 'East Asian': 2, 'White': 3, 'Southeast Asian': 4, 'Middle Eastern': 5, 'Indian': 6}
    return np.eye(7)[[race_dict[r] for r in race]]

## Loading FairFace dataset

In [4]:
# function to read image
def read_image(image_path):
    with Image.open(image_path) as image:
        return np.asarray(image)

# function to read image
def read_image(image_path):
    with Image.open(image_path) as image:
        return np.asarray(image)

# read data, labels in lists
def get_dataset(DATA_DIR, mode, sample=False):
    if mode == 'train':
        df = pd.read_csv(os.path.join(DATA_DIR, 'fairface_label_train.csv'))
    elif mode == 'val':
        df = pd.read_csv(os.path.join(DATA_DIR, 'fairface_label_val.csv'))
    else:
        raise ValueError

    age = df['age'].values.tolist()
    gender = df['gender'].values.tolist()
    race = df['race'].values.tolist()
    filenames = df['file'].values.tolist()

    image_paths = [os.path.join(DATA_DIR, name) for name in filenames]

    if sample:
        if mode == 'train':
            sample_size = 6000
        elif mode == 'val':
            sample_size = 1000
        sampled_indexes = random.sample(range(len(image_paths)), sample_size)
        image_paths = [image_paths[i] for i in sampled_indexes]
        age = [age[i] for i in sampled_indexes]
        gender = [gender[i] for i in sampled_indexes]
        race = [race[i] for i in sampled_indexes]


    with ThreadPoolExecutor() as executor:
        all_img = list(executor.map(read_image, image_paths))
    
    onehot_age = convert_age(age)
    onehot_gender = convert_gender(gender)
    onehot_race = convert_race(race)

    return all_img, onehot_age, onehot_gender, onehot_race

In [5]:
DATA_DIR = '/Users/harshvardhan/Library/CloudStorage/Dropbox/Academics/UTK Classes/Spring 2023/Deep Learning/Final Project - FairFace Data/cosc-525-final-project/fairface-img-margin025-trainval/'

In [6]:
train_img, train_age, train_gender, train_race = get_dataset(DATA_DIR, 'train', sample=True)
val_img, val_age, val_gender, val_race = get_dataset(DATA_DIR, 'val', sample=True)

In [7]:
img = Image.open("/Users/harshvardhan/Library/CloudStorage/Dropbox/Academics/UTK Classes/Spring 2023/Deep Learning/Final Project - FairFace Data/cosc-525-final-project/fairface-img-margin025-trainval/train/1.jpg")
print(img.size)  # prints the width and height of the image

(224, 224)


In [10]:
# min-max scaling
flattened_train_img = [img.reshape(224*224*3) for img in train_img]
flattened_val_img = [img.reshape(224*224*3) for img in val_img]

scaler = MinMaxScaler()
scaler.fit(flattened_train_img)

scaled_train_img = scaler.transform(flattened_train_img)
scaled_val_img = scaler.transform(flattened_val_img)

scaled_train_img = np.array([img.reshape(224,224,3) for img in scaled_train_img])
scaled_val_img = np.array([img.reshape(224,224,3) for img in scaled_val_img])

In [None]:
len(scaled_val_img)

In [None]:
from tensorflow.keras import backend as K
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.

    # Arguments
        args (tensor): mean and log of variance of Q(z|X)

    # Returns
        z (tensor): sampled latent vector
    """
    #Extract mean and log of variance
    z_mean, z_log_var = args
    #get batch size and length of vector (size of latent space)
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]

    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = K.random_normal(shape=(batch, dim))
    #Return sampled number (need to raise var to correct power)
    return z_mean + K.exp(z_log_var) * epsilon

## Encoder Model

In [None]:
latent_dim = 10

In [None]:
inputs = Input(shape=(224, 224, 3), name='encoder_input')
x = Conv2D(filters=16, kernel_size=3, strides=(1, 1), padding="valid", activation='relu')(inputs)
x = MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid")(x)
x = Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='valid', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid")(x)
x = Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='valid', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid")(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

2023-04-26 16:14:10.362918: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# use reparameterization trick to push the sampling out as input
z = Lambda(sampling, name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, z, name='encoder_output')
encoder.summary()

Model: "encoder_output"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 222, 222, 16  448         ['encoder_input[0][0]']          
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 111, 111, 16  0           ['conv2d[0][0]']                 
                                )                                                    

# Decoder Model

In [None]:
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')

x = Dense(7*7*512, activation='relu', name="decoder_hidden_layer")(latent_inputs)
x = Reshape((7,7,512))(x)
x = Conv2DTranspose(filters = 256, kernel_size = (4,4), strides = 2, padding = 'same',activation = 'relu')(x)
x = Conv2DTranspose(filters=128,kernel_size=(4,4),strides=2,padding='same', activation='relu')(x)
x = Conv2DTranspose(filters=64,kernel_size=(4,4),strides=2,padding='same', activation='relu')(x)
x = Conv2DTranspose(filters=32,kernel_size=(4,4),strides=2,padding='same', activation='relu')(x)
x = Conv2DTranspose(filters=3,kernel_size=(4,4),strides=2,padding='same', activation='sigmoid')(x)


In [None]:
# instantiate decoder model
decoder = Model(latent_inputs, outputs=x, name='decoder_output')
decoder.summary()

Model: "decoder_output"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 z_sampling (InputLayer)     [(None, 10)]              0         
                                                                 
 decoder_hidden_layer (Dense  (None, 25088)            275968    
 )                                                               
                                                                 
 reshape (Reshape)           (None, 7, 7, 512)         0         
                                                                 
 conv2d_transpose (Conv2DTra  (None, 14, 14, 256)      2097408   
 nspose)                                                         
                                                                 
 conv2d_transpose_1 (Conv2DT  (None, 28, 28, 128)      524416    
 ranspose)                                                       
                                                    

In [None]:
outputs = decoder(encoder(inputs))
vae = Model(inputs = inputs, outputs = outputs)
vae.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 224, 224, 3)]     0         
                                                                 
 encoder_output (Functional)  (None, 10)               5564084   
                                                                 
 decoder_output (Functional)  (None, 224, 224, 3)      3063267   
                                                                 
Total params: 8,627,351
Trainable params: 8,627,351
Non-trainable params: 0
_________________________________________________________________


In [None]:
from tensorflow.keras.losses import mse, binary_crossentropy
# setting loss
reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
reconstruction_loss *= (32*32) #image_width*image_height

kl_loss = K.exp(z_log_var) + K.square(z_mean) - z_log_var - 1
print(kl_loss.shape)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= 0.05

vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

(None, 10)


In [35]:
bs = 32
epochs = 1

In [36]:
train_history = vae.fit(scaled_train_img, scaled_train_img, batch_size=bs, epochs=epochs, verbose=1)



In [37]:
encoded_val_images = encoder.predict(scaled_val_img)



In [39]:
len(encoded_val_images)

547

# Classifier Model

In [40]:
# Base shared layers
base_input = Input(shape=(latent_dim,), name='base_input')
x = Dense(7*7*64, activation='relu')(base_input)
x = Reshape((7, 7, 64))(x)

In [41]:
# Common CNN layers
x = Conv2D(128, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)


In [42]:

# Age output
age_output = Dense(64, activation='relu')(x)
age_output = Dense(9, activation='softmax', name='age_output')(age_output)

# Gender output
gender_output = Dense(64, activation='relu')(x)
gender_output = Dense(2, activation='softmax', name='gender_output')(gender_output)

# Race output
race_output = Dense(64, activation='relu')(x)
race_output = Dense(7, activation='softmax', name='race_output')(race_output)


In [43]:

# Combine into a single model
classifier_model = Model(inputs=base_input, outputs=[age_output, gender_output, race_output])

# Compile the classifier model with appropriate loss functions and metrics
classifier_model.compile(optimizer='adam',
                         loss={'age_output': 'categorical_crossentropy',
                               'gender_output': 'categorical_crossentropy',
                               'race_output': 'categorical_crossentropy'},
                         metrics={'age_output': 'accuracy',
                                  'gender_output': 'accuracy',
                                  'race_output': 'accuracy'})

In [44]:
classifier_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 base_input (InputLayer)        [(None, 10)]         0           []                               
                                                                                                  
 dense_1 (Dense)                (None, 3136)         34496       ['base_input[0][0]']             
                                                                                                  
 reshape_1 (Reshape)            (None, 7, 7, 64)     0           ['dense_1[0][0]']                
                                                                                                  
 conv2d_3 (Conv2D)              (None, 7, 7, 128)    73856       ['reshape_1[0][0]']              
                                                                                            

### Training Classifier

In [46]:
encoded_train_images = encoder.predict(scaled_train_img)



In [47]:
history = classifier_model.fit(encoded_train_images,
                               {'age_output': train_age,
                                'gender_output': train_gender,
                                'race_output': train_race},
                               epochs=1,  # adjust this value based on desired training time and performance
                               batch_size=128,  # adjust this value based on your hardware capabilities
                               validation_split=0.1)  # 10% of the training data will be used for validation



In [49]:
len(scaled_val_img)

547

In [48]:
encoded_val_images = encoder.predict(scaled_val_img)

results = classifier_model.evaluate(encoded_val_images,
                                    {'age_output': val_age,
                                     'gender_output': val_gender,
                                     'race_output': val_race},
                                    batch_size=128)



ValueError: Data cardinality is ambiguous:
  x sizes: 547
  y sizes: 10000, 10000, 10000
Make sure all arrays contain the same number of samples.