In [47]:
import numpy as np
import cv2
from tensorflow import keras
import pandas as pd
import tensorflow as tf
import os
from random import shuffle

In [48]:

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras - adapted from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly'
    def __init__(self, list_IDs, batch_size=32, dim=(95,95), n_channels=3, 
                 datapath='/vagrant/imgs/training_data/training_data/aligned',
                 attribute_path='/vagrant/imgs/list_attr_celeba.csv',
                 label_size=40, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.label_size = label_size
        self.datapath = datapath
        self.on_epoch_end()
    
        self.df = pd.read_csv(attribute_path)
        

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def get_numpy_image(self, image_path):
        img =  cv2.imread(os.path.join(self.datapath, image_path))
        return img / 255
    
    def get_label(self, image_path):
        if 'png' in image_path:
            image_path = image_path.replace('png', 'jpg')
        row = self.df.loc[self.df['image_id'] == image_path]
        label = np.array(row.values.tolist()[0][1:])
#         print("label: {}".format(d))
        label[label < 0] = 0
        return label

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        Y = np.empty((self.batch_size, self.label_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            X[i,] = self.get_numpy_image(ID)
            Y[i,] = self.get_label(ID)
            
        return X, Y

In [60]:
class PredictionDataGenerator(DataGenerator):
    def __init__(self, list_IDs, dim=(95,95), n_channels=3, 
                 datapath='/vagrant/imgs/training_data/training_data/aligned'):
        'Initialization'
        self.dim = dim
        self.batch_size = len(list_IDs)
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = False
        self.datapath = datapath
        self.on_epoch_end()
        
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X = self.__data_generation(list_IDs_temp)

        return X
    
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            X[i,] = self.get_numpy_image(ID)
            
        return X

In [61]:

def create_partition(amount='all', datapath='/vagrant/imgs/training_data/training_data/aligned', split=(60, 20, 20)):
    directory = os.listdir(datapath)
    shuffle(directory)
    if amount != 'all':
        directory = directory[:amount]
    l = len(directory)
    train = int(l *split[0]/100)
    val = int(l * split[1]/100) + train
    test = int(l * split[2]/100) + val
    
    return {
        "train": directory[:train],
        "validation": directory[train:val],
        "test": directory[val:]
    }


In [92]:
def create_model(input_shape=(95,95,3), optimizer=tf.train.AdamOptimizer, loss='binary_crossentropy', metrics=['accuracy']):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        keras.layers.Dense(128, activation=tf.nn.relu),
        keras.layers.Dense(40, activation=tf.nn.sigmoid)
    ])
    model.compile(optimizer=optimizer(), 
                  loss=loss,
                  metrics=metrics)    
    return model

In [96]:
def evaluate_model(model, data_generators, patience=20, workers=8, epochs=100, verbose=1):
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    history = model.fit_generator(generator=data_generators['training_generator'],
                        validation_data=data_generators['validation_generator'],
                        use_multiprocessing=True,
                        workers=workers,
                        epochs=epochs,
                        verbose=verbose,
                        callbacks=[early_stop])

    result = model.evaluate_generator(generator=data_generators['test_generator'], verbose=verbose)
    predictions = model.predict_generator(generator=data_generators['predition_generator'], verbose=verbose)
    return history, result, predictions

In [97]:
def determine_attributes(prediction):
    label_names = ['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 
                   'Big_Nose', 'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', 
                   'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open', 
                   'Mustache', 'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline', 
                   'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat', 
                   'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie', 'Young']
    label_dict = {i:name for i,name in enumerate(label_names)}
    reverse_label_dict = {name:i for i, name in label_dict.items()}
    
    related = [
        ['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Gray_Hair'],
        ['Straight_Hair', 'Wavy_Hair']
    ]

    reinforcement = {
        '5_o_Clock_Shadow': .1,
        'Goatee': .1,
        'Mustache': .1,
        'Sideburns': .1,
        'Wearing_Necktie': .1,
        'Heavy_Makeup': -.1,
        'Wearing_Earrings': -.1,
        'Wearing_Lipstick': -.1,
        'Wearing_Necklace': -.1,
    }
    
    threshold = .3
    male = 20
    
    predition = prediction.tolist()
    intermediary = []
    
    # high threshold
    for value in prediction:
        if value < threshold:
            intermediary.append(0)
        elif value > 1 - threshold:
            intermediary.append(1)
        else:
            intermediary.append(value)

    # reinforce gender  
    if intermediary[male] not in (1,0):
        for key in reinforcement:
            if intermediary[reverse_label_dict[key]] == 1:
                intermediary[male] += reinforcement[key]
    if intermediary[male] < threshold:
        intermediary[male] = 0
    elif intermediary[male] > 1 - threshold:
        intermediary[male] = 1
        
    # remove related if one is strong
    for d in related:
        print(d)
        if any([intermediary[reverse_label_dict[name]] == 1 for name in d]):
            for name in d:
                if name != 1:
                    del intermediary[reverse_label_dict[i]]
        
    results = {
        'sure': {'pos': [], 'neg': []},
        'unsure': {'pos': [], 'neg': []}
    }
    
    for i, value in enumerate(intermediary):
        name = label_dict[i]
        if value == 0:
            results['sure']['neg'].append(name)
        elif value == 1:
            results['sure']['pos'].append(name)
        elif value < .5:
            results['unsure']['neg'].append(name)
        else:
            results['unsure']['pos'].append(name)
    
    return results

    
readable_attrs = determine_attributes(preditions[0])

['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Gray_Hair']
['Straight_Hair', 'Wavy_Hair']


In [98]:
# 'adapted from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly'
import numpy as np


# Parameters
params = {'dim': (95,95),
          'batch_size': 5,
          'n_channels': 3,
          'shuffle': True}

# Datasets
partition = create_partition(amount=100)

# Generators
data_generators = {
    'training_generator': DataGenerator(partition['train'], **params),
    'validation_generator': DataGenerator(partition['validation'], **params),
    'test_generator': DataGenerator(partition['test'], **params),
    'predition_generator': PredictionDataGenerator(partition['test'])
}

model = create_model()
history, result, predictions = evaluate_model(model, data_generators)




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
(20, 95, 95, 3)
(20, 95, 95, 3)


In [69]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_8 (Flatten)          (None, 27075)             0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               3465728   
_________________________________________________________________
dense_17 (Dense)             (None, 40)                5160      
Total params: 3,470,888
Trainable params: 3,470,888
Non-trainable params: 0
_________________________________________________________________


In [70]:
history.history.keys()

dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

In [71]:
result

[0.4558965563774109, 0.7899999916553497]

In [72]:
preditions[0]

array([0.12588444, 0.32350117, 0.6725941 , 0.19547054, 0.01614203,
       0.02820873, 0.1741563 , 0.26098672, 0.252925  , 0.18323612,
       0.00596957, 0.3818913 , 0.07264109, 0.03637172, 0.08249784,
       0.03903544, 0.0471328 , 0.02642897, 0.23665993, 0.7427596 ,
       0.70710564, 0.3255983 , 0.0445119 , 0.07717391, 0.72756755,
       0.38242835, 0.02687896, 0.11712329, 0.05296161, 0.05350516,
       0.03041244, 0.71803063, 0.14684525, 0.17305525, 0.2152747 ,
       0.09664284, 0.26611808, 0.04464798, 0.14673384, 0.7768357 ],
      dtype=float32)