In [19]:
%load_ext tensorboard
%tensorflow_version 2.x

# Init global infos
import numpy as np
from tensorflow import keras
from keras.utils import np_utils
import matplotlib.pyplot as plt
import pandas as pd
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import tensorflow as tf
import os
import pathlib
import skimage


batch_size = 512
num_epochs = 30

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [20]:
from google.colab import drive
drive.mount('/content/drive')
!unzip "drive/My Drive/Data/celeba-dataset.zip"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archive:  drive/My Drive/Data/celeba-dataset.zip
replace __MACOSX/._celeba-dataset? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [30]:
class CelebA():
    '''Wraps the celebA dataset, allowing an easy way to:
       - Select the features of interest,
       - Split the dataset into 'training', 'test' or 'validation' partition.
       '''
    def __init__(self, main_folder='celeba-dataset/', selected_features=None, drop_features=[]):
        self.main_folder = main_folder
        self.images_folder   = os.path.join(main_folder, 'img_align_celeba/img_align_celeba/')
        self.attributes_path = os.path.join(main_folder, 'list_attr_celeba.csv')
        self.partition_path  = os.path.join(main_folder, 'list_eval_partition.csv')
        self.selected_features = selected_features
        self.features_name = []
        self.__prepare(drop_features)

    def __prepare(self, drop_features):
        '''do some preprocessing before using the data: e.g. feature selection'''
        # attributes:
        if self.selected_features is None:
            self.attributes = pd.read_csv(self.attributes_path)
            self.num_features = 40
        else:
            self.num_features = len(self.selected_features)
            self.selected_features = self.selected_features.copy()
            self.selected_features.append('image_id')
            self.attributes = pd.read_csv(self.attributes_path)[self.selected_features]

        # remove unwanted features:
        for feature in drop_features:
            if feature in self.attributes:
                self.attributes = self.attributes.drop(feature, axis=1)
                self.num_features -= 1
      
        self.attributes.set_index('image_id', inplace=True)
        self.attributes.replace(to_replace=-1, value=0, inplace=True)
        self.attributes['image_id'] = list(self.attributes.index)
  
        self.features_name = list(self.attributes.columns)[:-1]
  
        # load ideal partitioning:
        self.partition = pd.read_csv(self.partition_path)
        self.partition.set_index('image_id', inplace=True)
  
    def split(self, name='training', drop_zero=False):
      '''Returns the ['training', 'validation', 'test'] split of the dataset'''
      # select partition split:
      if name is 'training':
        to_drop = self.partition.where(lambda x: x != 0).dropna()
      elif name is 'validation':
        to_drop = self.partition.where(lambda x: x != 1).dropna()
      elif name is 'test':  # test
        to_drop = self.partition.where(lambda x: x != 2).dropna()
      else:
        raise ValueError('CelebA.split() => `name` must be one of [training, validation, test]')

      partition = self.partition.drop(index=to_drop.index)
      
      # join attributes with selected partition:
      joint = partition.join(self.attributes, how='inner').drop('partition', axis=1)

      if drop_zero is True:
        # select rows with all zeros values
        return joint.loc[(joint[self.features_name] == 1).any(axis=1)]
      elif 0 <= drop_zero <= 1:
        zero = joint.loc[(joint[self.features_name] == 0).all(axis=1)]
        zero = zero.sample(frac=drop_zero)
        return joint.drop(index=zero.index)
      return joint

In [31]:
##celeba = CelebA(drop_features = ['Male'])
celeba = CelebA()
celeba.attributes['Attractive'] = celeba.attributes['Attractive'].astype('str')
celeba.attributes.sample(5)

Unnamed: 0_level_0,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,Blurry,Brown_Hair,Bushy_Eyebrows,Chubby,Double_Chin,Eyeglasses,Goatee,Gray_Hair,Heavy_Makeup,High_Cheekbones,Male,Mouth_Slightly_Open,Mustache,Narrow_Eyes,No_Beard,Oval_Face,Pale_Skin,Pointy_Nose,Receding_Hairline,Rosy_Cheeks,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young,image_id
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
041269.jpg,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,041269.jpg
087427.jpg,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,087427.jpg
093482.jpg,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,093482.jpg
000859.jpg,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,000859.jpg
033118.jpg,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,033118.jpg


In [32]:
base_model = tf.keras.applications.VGG19(
    input_shape = (64,64,3),
    include_top = False, 
    weights = 'imagenet')

base_model.trainable = False

for layer in base_model.layers:
    print(layer, layer.trainable)

<tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7f5e7de32f28> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de32cc0> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de327b8> False
<tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x7f5e7deaf2b0> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7b4e3ba8> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de32128> False
<tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x7f5e7de38470> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de37588> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de327f0> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de39860> False
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f5e7de39d30> False
<tensorflow.python.keras.layers.pooling.MaxPooling2D object at 

In [33]:
last = base_model.layers[-1].output
flatten = keras.layers.Flatten()(last)

model_vgg = keras.Model(inputs=base_model.input, outputs=flatten)

model_vgg.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model_vgg.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 64, 64, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0   

In [34]:
# ------------------------------------------------------------------------------
# -- Preparing Data Generators for training and validation set
# ------------------------------------------------------------------------------

# data augmentation only for the training istances:
datagen = ImageDataGenerator(rescale=1./255)

# get training and validation set:
train_split = celeba.split('training'  , drop_zero=False)
val_split = celeba.split('validation', drop_zero=False)
test_split = celeba.split('test', drop_zero = False)

train_split_woman = train_split[train_split['Male'] == 0].drop(columns = ['Male'])
train_split_man = train_split[train_split['Male'] == 1 ].drop(columns = ['Male'])
val_split_woman = val_split[val_split['Male'] == 0 ].drop(columns = ['Male'])
val_split_man = val_split[val_split['Male'] == 1 ].drop(columns = ['Male'])
test_split_woman = test_split[test_split['Male'] == 0 ].drop(columns = ['Male'])
test_split_man = test_split[test_split['Male'] == 1 ].drop(columns = ['Male'])

# data generators:
train_generator = datagen.flow_from_dataframe(
    dataframe=train_split,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = datagen.flow_from_dataframe(
    dataframe=val_split,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = datagen.flow_from_dataframe(
    dataframe=test_split,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

# data generators:
train_generator_woman = datagen.flow_from_dataframe(
    dataframe=train_split_woman,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator_woman = datagen.flow_from_dataframe(
    dataframe=val_split_woman,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator_woman = datagen.flow_from_dataframe(
    dataframe=test_split_woman,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)


train_generator_man = datagen.flow_from_dataframe(
    dataframe=train_split_man,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator_man = datagen.flow_from_dataframe(
    dataframe=val_split_man,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator_man = datagen.flow_from_dataframe(
    dataframe=test_split_man,
    directory=celeba.images_folder,
    x_col='image_id',
    y_col='Attractive',
    target_size=(64,64),
    batch_size=batch_size,
    class_mode='categorical'
)


Found 162770 validated image filenames belonging to 2 classes.
Found 19867 validated image filenames belonging to 2 classes.
Found 19962 validated image filenames belonging to 2 classes.
Found 94509 validated image filenames belonging to 2 classes.
Found 11409 validated image filenames belonging to 2 classes.
Found 12247 validated image filenames belonging to 2 classes.
Found 68261 validated image filenames belonging to 2 classes.
Found 8458 validated image filenames belonging to 2 classes.
Found 7715 validated image filenames belonging to 2 classes.


In [26]:
 def get_sets(generator, model):
  X_batch, y = generator.next()
  X = model.predict(X_batch)
  print(range(len(generator)))
  for i in range(len(generator)-1):
    X_batch, y_batch = generator.next()
    x_out = model.predict(X_batch)
    X = np.concatenate((X,x_out))
    y = np.concatenate((y, y_batch))
  return(X,y)

In [27]:
np_woman_train_vgg, y_train_woman = get_sets(train_generator_woman, model_vgg)
np_woman_val_vgg, y_val_woman = get_sets(val_generator_woman, model_vgg) 
np_woman_test_vgg, y_test_woman = get_sets(test_generator_woman, model_vgg) 
print(np_woman_train_vgg.shape)
print(np_woman_val_vgg.shape)

np_man_train_vgg, y_train_man = get_sets(train_generator_man, model_vgg)
np_man_val_vgg, y_val_man = get_sets(val_generator_man, model_vgg) 
np_man_test_vgg,y_test_man = get_sets(test_generator_man, model_vgg) 
print(np_man_val_vgg.shape)

range(0, 185)
range(0, 23)
range(0, 24)
(94509, 2048)
(11409, 2048)
range(0, 134)
range(0, 17)
range(0, 16)
(8458, 2048)


In [None]:
np_train_vgg, y_train = get_sets(train_generator, model_vgg)
np_val_vgg, y_val = get_sets(val_generator, model_vgg) 
np_test_vgg, y_test = get_sets(test_generator, model_vgg) 
print(np_train_vgg.shape)
print(np_val_vgg.shape)

range(0, 318)
range(0, 39)
range(0, 39)
(162770, 2048)
(19867, 2048)


In [None]:
from numpy import save

save('drive/My Drive/Data/np_train_vgg_wo.npy', np_train_vgg)
save('drive/My Drive/Data/np_val_vgg_wo.npy', np_val_vgg)
save('drive/My Drive/Data/np_test_vgg_wo.npy', np_test_vgg)

In [28]:
from numpy import save
save('drive/My Drive/Data/np_woman_train_vgg_wo.npy', np_woman_train_vgg)
save('drive/My Drive/Data/np_woman_val_vgg_wo.npy', np_woman_val_vgg)
save('drive/My Drive/Data/np_woman_test_vgg_wo.npy', np_woman_test_vgg)
save('drive/My Drive/Data/np_man_train_vgg_wo.npy', np_man_train_vgg)
save('drive/My Drive/Data/np_man_val_vgg_wo.npy', np_man_val_vgg)
save('drive/My Drive/Data/np_man_test_vgg_wo.npy', np_man_test_vgg)

In [None]:
from numpy import save

save('drive/My Drive/Data/y_train_vgg_wo.npy', y_train)
save('drive/My Drive/Data/y_val_vgg_wo.npy', y_val)
save('drive/My Drive/Data/y_test_vgg_wo.npy', y_test)

In [29]:
from numpy import save
save('drive/My Drive/Data/y_woman_train_vgg1_wo.npy', y_train_woman)
save('drive/My Drive/Data/y_woman_val_vgg1_wo.npy', y_val_woman)
save('drive/My Drive/Data/y_woman_test_vgg1_wo.npy', y_test_woman)
save('drive/My Drive/Data/y_man_train_vgg1_wo.npy', y_train_man)
save('drive/My Drive/Data/y_man_val_vgg1_wo.npy', y_val_man)
save('drive/My Drive/Data/y_man_test_vgg1_wo.npy', y_test_man)