In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

! cp "/content/gdrive/My Drive/InfoFaceRecognize/dataset/mydataset.zip" .
! unzip mydataset.zip

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Archive:  mydataset.zip
replace train/43.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


### Import Libs ###

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (GlobalAveragePooling2D, Dense, Flatten, 
                                     Dropout, ZeroPadding2D, Convolution2D,
                                     MaxPooling2D, Activation)
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model, Sequential
import matplotlib.pyplot as plt
from PIL import ImageFile
import pandas as pd
import numpy as np
import random

### Set Params ###

In [None]:
ImageFile.LOAD_TRUNCATED_IMAGES = True
IMG_PATH = ""
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
BATCH_SIZE = 128
EPOCHS = 150

### Load and transform data ###

In [None]:
def create_list(value, n_labels):
    cat_list = np.zeros(n_labels)
    cat_list[value] = 1
    return cat_list

In [None]:
train_df = pd.read_csv("fairface_label_new_train.csv")
# idx = train_df[(train_df['race'] == 'East Asian') | (train_df['race'] == 'Southeast Asian')].index
# train_df.loc[idx, 'race'] = 'Asian'
train_df.head()

Unnamed: 0,file,age,gender,race,service_test
0,train/2.jpg,30-39,Female,Indian,False
1,train/3.jpg,3-9,Female,Black,False
2,train/4.jpg,20-29,Female,Indian,True
3,train/5.jpg,20-29,Female,Indian,True
4,train/7.jpg,40-49,Male,Middle Eastern,False


In [None]:
train_df.age = pd.Categorical(train_df.age)
train_df['cat_age'] = train_df.age.cat.codes
train_df['cat_age'] = train_df.cat_age.apply(create_list, args=(train_df.age.nunique(),))

train_df.gender = pd.Categorical(train_df.gender)
train_df['cat_gender'] = train_df.gender.cat.codes
train_df['cat_gender'] = train_df.cat_gender.apply(create_list, args=(train_df.gender.nunique(),))

train_df.race = pd.Categorical(train_df.race)
train_df['cat_race'] = train_df.race.cat.codes
train_df['cat_race'] = train_df.cat_race.apply(create_list, args=(train_df.race.nunique(),))

print(train_df.shape)
train_df.head()

(54126, 8)


Unnamed: 0,file,age,gender,race,service_test,cat_age,cat_gender,cat_race
0,train/2.jpg,30-39,Female,Indian,False,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0]","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
1,train/3.jpg,3-9,Female,Black,False,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
2,train/4.jpg,20-29,Female,Indian,True,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0]","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
3,train/5.jpg,20-29,Female,Indian,True,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0]","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
4,train/7.jpg,40-49,Male,Middle Eastern,False,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]","[0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]"


In [None]:
train_df, valid_df = train_test_split(train_df, test_size=.2, random_state=42, 
                                      stratify=train_df[['race', 'age', 'gender']])

In [None]:
test_df = pd.read_csv("fairface_label_new_val.csv")
test_df.head()

Unnamed: 0,file,age,gender,race,service_test
0,val/1.jpg,3-9,Male,East Asian,False
1,val/2.jpg,50-59,Female,East Asian,True
2,val/4.jpg,20-29,Female,Latino_Hispanic,True
3,val/5.jpg,20-29,Male,Southeast Asian,False
4,val/6.jpg,30-39,Male,Latino_Hispanic,False


### Create and build model ###

In [None]:
# Create the base model from the pre-trained model MobileNet V2
base_model = MobileNetV2(input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT,3), 
                         include_top=False, weights='imagenet')

In [None]:
base_model.trainable = True

# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

Number of layers in the base model:  154


In [None]:
output_base_model = base_model.output

x = GlobalAveragePooling2D()(output_base_model)
x = Dense(1024, activation='relu')(x)
x = Dropout(.2)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(.2)(x)
race_output = Dense(7, activation='softmax', name='race')(x)

x = GlobalAveragePooling2D()(output_base_model)
x = Dense(1024, activation='relu')(x)
x = Dropout(.2)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(.2)(x)
gender_output = Dense(2, activation='softmax', name='gender')(x)

x = GlobalAveragePooling2D()(output_base_model)
x = Dense(1024, activation='relu')(x)
x = Dropout(.2)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(.2)(x)
age_output = Dense(9, activation='softmax', name='age')(x)

hybrid_model = Model(inputs=base_model.input, outputs=[race_output, 
                                                  gender_output, age_output])

In [None]:
hybrid_model.compile(optimizer = Adam(), 
              loss = {
                  'race': 'categorical_crossentropy',
                  'gender': 'categorical_crossentropy',
                  'age': 'categorical_crossentropy'
                  
              }, 
              metrics = {
                  'race': 'accuracy',
                  'gender': 'accuracy',
                  'age': 'accuracy'
              }
             )

In [None]:
hybrid_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112, 32) 0           bn_Conv1[0][0]                   
____________________________________________________________________________________________

### Create generators ###

In [None]:
train_datagen = ImageDataGenerator(
    horizontal_flip=True, 
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    rescale=1. / 255
    )

validation_datagen = ImageDataGenerator(
    rescale=1./255
)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    IMG_PATH, 
    x_col='file',
    y_col=['cat_race', 'cat_gender', 'cat_age'],
    target_size=IMAGE_SIZE,
    class_mode="multi_output",
    batch_size=BATCH_SIZE,
    seed=42,
)

Found 43300 validated image filenames.


In [None]:
validation_generator = validation_datagen.flow_from_dataframe(
    valid_df, 
    IMG_PATH, 
    x_col='file',
    y_col=['cat_race', 'cat_gender', 'cat_age'],
    target_size=IMAGE_SIZE,
    class_mode="multi_output",
    batch_size=BATCH_SIZE,
    seed=42,
)

Found 10826 validated image filenames.


In [None]:
total_train = train_df.shape[0]
total_validate = valid_df.shape[0]

### Create callbacks ###

In [None]:
earlystop = EarlyStopping(patience=10)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.2, 
                                            min_lr=1e-8)

mcp_save = ModelCheckpoint("/content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5", 
                           save_best_only=True, 
                           monitor='val_loss', mode='min', verbose=1)

callbacks = [earlystop, learning_rate_reduction, mcp_save]

### Train model ###

In [None]:
history = hybrid_model.fit(
    train_generator, 
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=total_validate//BATCH_SIZE,
    steps_per_epoch=total_train//BATCH_SIZE,
    callbacks=callbacks
)

Epoch 1/150

Epoch 00001: val_loss improved from inf to 30.75573, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5




Epoch 2/150

Epoch 00002: val_loss improved from 30.75573 to 24.74598, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5
Epoch 3/150

Epoch 00003: val_loss improved from 24.74598 to 11.48956, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5
Epoch 4/150

Epoch 00004: val_loss did not improve from 11.48956
Epoch 5/150

Epoch 00005: val_loss improved from 11.48956 to 7.15994, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5
Epoch 6/150

Epoch 00006: val_loss improved from 7.15994 to 4.85910, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5
Epoch 7/150

Epoch 00007: val_loss improved from 4.85910 to 4.28174, saving model to /content/gdrive/My Drive/InfoFaceRecognize/weights/last_model_mnv2.h5
Epoch 8/150

Epoch 00008: val_loss did not improve from 4.28174
Epoch 9/150

Epoch 00009: val_loss improved from 4.28174 to 3.18911, saving model to /conte