In [1]:
import os
import math
import json
import pickle
import random
import numpy as np
import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow import keras

from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from tensorflow.keras.optimizers import RMSprop, SGD, Adam
from tensorflow.keras.applications import MobileNet, ResNet50, InceptionV3
from tensorflow.keras.applications.mobilenet import preprocess_input as mobilenet_preprocess
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preprocess
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing import image
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback, LearningRateScheduler
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Lambda, Conv1D, Attention, GlobalAveragePooling1D, BatchNormalization, Layer
from keras_facenet import FaceNet

random.seed(123)
tf.random.set_seed(12)
np.random.seed(123)

2021-09-16 12:56:52.797701: I tensorflow/stream_executor/platform/default/dso_loader.cc:54] Successfully opened dynamic library libcudart.so.11.0


In [2]:
with open('data/train_img_embeddings.pkl', 'rb') as f:
       train_embeddings = pickle.load(f)
print(f'The keys examples: {list(train_embeddings.keys())[:5]}')

embedding_shape = list(list(train_embeddings.values())[0].values())[0].shape
print(f'Embeddings shape: {embedding_shape}')

The keys examples: ['F0475/MID3', 'F0475/MID7', 'F0475/MID6', 'F0475/MID4', 'F0475/MID2']
Embeddings shape: (512,)


In [3]:
cnt = 0
for k, v in train_embeddings.items():
    cnt += len(v)

print(f'Total imgs: {cnt}')

Total imgs: 20080


In [4]:
input_shape = (224, 224, 3)
train_path = './data/train'

In [5]:
def mobilenet(input_shape, l2_value, dropout):
    mobile = MobileNet(
        input_shape=input_shape,
        dropout=dropout,
        include_top=False,
        pooling='avg',
        alpha=.75,
        weights='imagenet'
    )
    
    for layer in mobile.layers:
        layer.trainable = True
        if hasattr(layer, 'kernel_regularizer'):
            setattr(layer, 'kernel_regularizer', keras.regularizers.l2(l2_value))
        
    x = Dense(512, kernel_regularizer=l2(l2_value), activation='relu')(mobile.output)
    x = Lambda(lambda x: K.l2_normalize(x,axis=1))(x)
    return Model(mobile.input, x)

In [6]:
def batching(embeddings, batch_size, input_shape, preprocess):
    cnt = 0
    imgs = []
    labels = []
    for person, embs in embeddings.items():
        person_path = os.path.join(train_path, person)
        
        for img_name, emb in embs.items():
            img_path = os.path.join(person_path, img_name)
            img = image.load_img(img_path, target_size=(input_shape[0], input_shape[1]))
            img = np.array(img).astype('float32')
            imgs.append(img)
            labels.append(emb)
            if len(labels) == batch_size:
                yield (preprocess(np.array(imgs)), np.array(labels).astype(float))
                imgs, labels = [], []

def repeat_generator(embeddings, batch_size, input_shape, preprocess):
    while True:
        for e in batching(embeddings, batch_size, input_shape, preprocess):
            yield e

In [7]:
# Training-Validation split
VAL_FACTOR = 0.12
keys = list(train_embeddings.keys())
random.shuffle(keys)
keys_length = len(keys)
val_factor = int(keys_length * VAL_FACTOR)
val_keys = keys[:val_factor]
train_keys = keys[val_factor:]
print(f'Total keys: {keys_length}, train keys: {len(train_keys)}, valid keys: {len(val_keys)}')

val_embs = {k:train_embeddings[k] for k in val_keys}
train_embs = {k:train_embeddings[k] for k in train_keys}

train_len = 0
for k, v in train_embs.items():
    train_len +=len(v.keys())

val_len = 0
for k, v in val_embs.items():
    val_len +=len(v.keys())

print(f'Total - train imgs: {train_len}, valid imgs: {val_len}')

Total keys: 3965, train keys: 3490, valid keys: 475
Total - train imgs: 17516, valid imgs: 2564


In [8]:
lr = 1e-3
l2_value = 1e-5
dropout = 0.1
optimizer = 'Adam'
batch_size = 16
epochs = 1000

model = mobilenet(input_shape, l2_value, dropout)
optimizer = eval(optimizer)(learning_rate=lr)
model.compile(loss='mean_squared_error', optimizer=optimizer)

2021-09-16 12:56:55.338480: W tensorflow/stream_executor/platform/default/dso_loader.cc:65] Could not load dynamic library 'libcuda.so.1'; dlerror: /lib/x86_64-linux-gnu/libcuda.so.1: file too short; LD_LIBRARY_PATH: /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-09-16 12:56:55.338529: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-09-16 12:56:55.338569: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist
2021-09-16 12:56:55.339285: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 16. Tune using inter_op_parallelism_threads for best performance.


In [None]:
train_generator = repeat_generator(train_embs, batch_size, input_shape, mobilenet_preprocess)
model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=train_len // batch_size,
    validation_data=batching(val_embs, batch_size, input_shape, mobilenet_preprocess),
    validation_steps=val_len // batch_size
)

2021-09-16 12:56:57.577622: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-09-16 12:56:57.596817: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3600000000 Hz


Epoch 1/1000
 189/1094 [====>.........................] - ETA: 9:20 - loss: 0.0053