In [7]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Dropout, Flatten, Activation
from keras.layers.convolutional import ZeroPadding2D
from keras.models import Sequential
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.layers.core import Lambda
from keras import initializers
from keras.utils import get_file
import tensorflow as tf
import numpy as np
import os

Using TensorFlow backend.


In [8]:
path = '/work/04381/ymarathe/maverick/yearbook/keras_yearbook/'

In [25]:
def gen_batches(path, gen = ImageDataGenerator(), shuffle=True, class_mode="categorical", batch_size=32, 
                target_size=(171, 186)):
    return gen.flow_from_directory(path, shuffle=shuffle, batch_size=batch_size, target_size=target_size, 
                                   class_mode=class_mode)

def gen_batches_flow(path, gen = ImageDataGenerator(), shuffle=True, batch_size=32):
    return gen.flow(path, shuffle=shuffle, batch_size=batch_size)

In [10]:
female_train = gen_batches(path + 'train/F')

Found 12149 images belonging to 104 classes.


In [11]:
male_train = gen_batches(path + 'train/M')

Found 10691 images belonging to 104 classes.


In [12]:
female_valid = gen_batches(path + 'valid/F')

Found 2797 images belonging to 104 classes.


In [13]:
male_valid = gen_batches(path + 'valid/M')


Found 2212 images belonging to 104 classes.


In [14]:
def vgg19_conv_layers_sequential_bn():
    #Not possible, because ALL the weights will change
    model = Sequential()
        
    model.add(ZeroPadding2D((1, 1), input_shape=(171, 186, 3)))
    model.add(Conv2D(64, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    
    vgg_pretrain_weights = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                    "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5",
                                    cache_subdir='models')
    
    model.load_weights(vgg_pretrain_weights)
    
    return model

In [15]:
def vgg19_conv_layers_sequential():
    model = Sequential()
    
    model.add(ZeroPadding2D((1, 1), input_shape=(171, 186, 3)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))

    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides = (2, 2)))
    
    vgg_pretrain_weights = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                    "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5",
                                    cache_subdir='models')
    
    model.load_weights(vgg_pretrain_weights)
    
    return model

In [16]:
model = vgg19_conv_layers_sequential()
model.add(Flatten())
model.add(Dense(4096, activation='relu', kernel_initializer='glorot_normal', bias_initializer=keras.initializers.Ones()))
model.add(Dropout(0.7))
#model.add(BatchNormalization())
model.add(Dense(4096, activation='relu', kernel_initializer='glorot_normal', bias_initializer=keras.initializers.Ones()))
model.add(Dropout(0.7))
#model.add(BatchNormalization())
model.add(Dense(104, activation='softmax'))

In [17]:
for layer in model.layers:
    if type(layer) != Dense:
        layer.trainable = False

model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_1 (ZeroPaddin (None, 173, 188, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 171, 186, 64)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 173, 188, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 171, 186, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 85, 93, 64)        0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 87, 95, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 85, 93, 128)       73856     
__________

In [None]:
with tf.device('/gpu:0'):
    for i in range(5):
        #model.fit_generator(female_train, steps_per_epoch = female_train.samples/female_train.batch_size, epochs=1,
                    #validation_data=female_valid, validation_steps = female_valid.samples/female_valid.batch_size)
        #model.fit_generator(male_train, steps_per_epoch = male_train.samples/male_train.batch_size, epochs=1,
                    #validation_data=male_valid, validation_steps = male_valid.samples/male_valid.batch_size)
    model.save_weights(path + 'weights_exp6.h5')

Epoch 1/1


**Experiment 1**
- All layers trained
- lr = 1e-4
- Adam
- Dense layers weight initialized 
- OVERFITTING is apparent
- Epochs = 1

Total params: 89,664,680

Trainable params: 89,664,680

Non-trainable params: 0

Epoch 1/1

379/379 [==============================] - 668s - loss: 5.1517 - acc: 0.0237 - val_loss: 4.7387 - val_acc: 0.0223

Epoch 1/1

334/334 [==============================] - 578s - loss: 4.1187 - acc: 0.0723 - val_loss: 4.4772 - val_acc: 0.0245

**Experiment 2**
- Motivation: loss function keeps decreasing in Experiment 1, maybe more epochs will increase accuracy?
- Dense layers trained
- lr = 1e-5
- Dense layers weight initialized 
- alternatively improving values
- Overfitting or not is not clear
- Epochs = 5

=================================================================

Total params: 89,664,680

Trainable params: 69,640,296

Non-trainable params: 20,024,384

_________________________________________________________________

Epoch 1/1

379/379 [==============================] - 240s - loss: 13.6884 - acc: 0.0358 - val_loss: 11.4914 - val_acc: 0.0251

Epoch 1/1

334/334 [==============================] - 203s - loss: 11.5692 - acc: 0.0666 - val_loss: 9.2627 - val_acc: 0.0127

Epoch 1/1

379/379 [==============================] - 237s - loss: 8.4670 - acc: 0.1118 - val_loss: 6.3364 - val_acc: 0.0365

Epoch 1/1

334/334 [==============================] - 204s - loss: 6.4200 - acc: 0.1361 - val_loss: 5.8004 - val_acc: 0.0133

Epoch 1/1

379/379 [==============================] - 234s - loss: 5.2417 - acc: 0.1739 - val_loss: 5.1995 - val_acc: 0.0289

Epoch 1/1

334/334 [==============================] - 203s - loss: 4.6242 - acc: 0.1942 - val_loss: 5.1862 - val_acc: 0.0220

Epoch 1/1

379/379 [==============================] - 234s - loss: 4.0165 - acc: 0.2328 - val_loss: 4.8735 - val_acc: 0.0351

Epoch 1/1

334/334 [==============================] - 202s - loss: 3.7839 - acc: 0.2412 - val_loss: 5.0708 - val_acc: 0.0202

Epoch 1/1

379/379 [==============================] - 234s - loss: 3.4204 - acc: 0.2830 - val_loss: 4.7858 - val_acc: 0.0347

Epoch 1/1

334/334 [==============================] - 202s - loss: 3.2999 - acc: 0.2905 - val_loss: 5.0043 - val_acc: 0.0202


**Experiment 3**
- Will batchnormalization help since theres is no improvement in validation accuracy
- Weight initialized dense layers
- Only dense layers trainable
- Batchnormalized dense layers
- Dropout = 0.5
- lr = 1e-4
- Overfits

=================================================================

Total params: 89,697,448

Trainable params: 69,640,296

Non-trainable params: 20,057,152

_________________________________________________________________

Epoch 1/1

379/379 [==============================] - 242s - loss: 5.0252 - acc: 0.0388 - val_loss: 4.4513 - val_acc: 0.0190

Epoch 1/1

334/334 [==============================] - 205s - loss: 4.5141 - acc: 0.0941 - val_loss: 4.6469 - val_acc: 0.0136


**Experiment 4**
- Motivation: Overfitting in Experiment 3, do more dropout
- Weight initialized dense layers
- Only dense layers trainable
- Batchnormalization with dense layers
- Dropout = 0.7
- lr = 1e-4
- Not much improvement

Total params: 89,697,448

Trainable params: 69,640,296

Non-trainable params: 20,057,152

Epoch 1/1

379/379 [==============================] - 240s - loss: 5.4357 - acc: 0.0119 - val_loss: 4.5188 - val_acc: 0.0255

Epoch 1/1

334/334 [==============================] - 204s - loss: 5.2047 - acc: 0.0261 - val_loss: 4.5505 - val_acc: 0.0127



**Experiment 5**
- Motivation: Loss keeps decreasing in Experiment 4, do more number of epochs
- Weight initialized dense layers
- Only dense layers trainable
- Batchnormalization with dense layers
- Dropout = 0.7
- lr = 1e-4
- Validation loss is not stable across epochs
Total params: 89,697,448

Trainable params: 69,640,296

Non-trainable params: 20,057,152

_________________________________________________________________

Epoch 1/1

379/379 [==============================] - 240s - loss: 5.4000 - acc: 0.0125 - val_loss: 4.4933 - val_acc: 0.0381

Epoch 1/1

334/334 [==============================] - 205s - loss: 5.2335 - acc: 0.0199 - val_loss: 4.5120 - val_acc: 0.0213

Epoch 1/1

379/379 [==============================] - 239s - loss: 4.8773 - acc: 0.0416 - val_loss: 4.4056 - val_acc: 0.0206

Epoch 1/1

334/334 [==============================] - 205s - loss: 4.7485 - acc: 0.0563 - val_loss: 4.4966 - val_acc: 0.0138

Epoch 1/1

379/379 [==============================] - 236s - loss: 4.4608 - acc: 0.0814 - val_loss: 4.3931 - val_acc: 0.0184

Epoch 1/1

334/334 [==============================] - 204s - loss: 4.3454 - acc: 0.1029 - val_loss: 4.5639 - val_acc: 0.0119

Epoch 1/1

379/379 [==============================] - 236s - loss: 4.1044 - acc: 0.1265 - val_loss: 4.4471 - val_acc: 0.0217

Epoch 1/1

334/334 [==============================] - 204s - loss: 4.0820 - acc: 0.1358 - val_loss: 4.6092 - val_acc: 0.0133

Epoch 1/1

379/379 [==============================] - 236s - loss: 3.8478 - acc: 0.1620 - val_loss: 4.4567 - val_acc: 0.0231

Epoch 1/1

334/334 [==============================] - 204s - loss: 3.9038 - acc: 0.1616 - val_loss: 4.6779 - val_acc: 0.0119


**Experiment 6**
- Use mixiterator to mix and shuffle training examples (males and females)