# Age Estimation CNN (VGG16, VGG-Face Transfer Learning)

## Import Dependencies

In [10]:
import os
import numpy as np
from numpy import array
from numpy import argmax
import imageio
import matplotlib.pyplot as plt
import pandas as pd
import glob
import keras
import time
from keras import backend as K
from keras.layers.core import Dense
from keras.layers import Convolution2D, MaxPooling2D, Dropout
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.utils import np_utils
from keras.applications import imagenet_utils
from keras.optimizers import SGD
from keras.callbacks import CSVLogger, EarlyStopping, TensorBoard, ModelCheckpoint 
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input, Flatten, Dense
from keras.models import Model
from keras import backend as K
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split

%matplotlib inline



## Preprocessing

In [2]:
image_labels = pd.read_csv('train_target.csv')

### Data Augmentation Preprocessing (Canceled)

In [None]:
#prepare data for flow_from_dir
from shutil import copyfile
data_size = len(image_labels.index)
train_size = data_size - (data_size * 0.2)
count = 0

for index, row in image_labels.iterrows():
    name = row['Id']
    age = str(row['Age']) 
    
    if count<train_size:
        
        if os.path.isdir('data/train/' + age):
            copyfile('train/'+name, 'data/train/'+age+'/'+name)
        else:
            os.makedirs('data/train/'+age)
            copyfile('train/'+name, 'data/train/'+age+'/'+name)

        count = count + 1


    else:
        if os.path.isdir('data/validation/' + age):
            copyfile('train/'+name, 'data/validation/'+age+'/'+name)
        else:
            os.makedirs('data/validation/'+age)
            copyfile('train/'+name, 'data/validation/'+age+'/'+name)
    

In [3]:
#Define Helper Function to Plot Image
def plotImage(image):
    f, axarr = plt.subplots(1,2)
    axarr[0].imshow(image)
    axarr[0].grid()
    axarr[0].set_title('Image')

### Read images and prepare dataset

In [None]:
X_train = []
Y_train = []
for index, row in image_labels.iterrows():
    image = imread('train/' + row[0], mode='RGB')
    image = imresize(image, (244,244,3))
    X_train.append(np.array(image))
    Y_train.append(row[1])

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  after removing the cwd from sys.path.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  """


In [None]:
X_train = np.array(X_train)

In [None]:
Y_train = to_categorical(Y_train)

In [None]:
#Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.25, random_state=3)

In [None]:
#Image and Ages
num = 1043
plotImage(X_train[num])
print(argmax(y_train[num]))

In [None]:
img_data = np.asarray( images )

In [None]:
num_classes = np.size(Y_train, 1)

## VGG-Face Model 1

In [None]:
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

image_input = Input(shape=(224,224,3))
from keras_vggface.vggface import VGGFace
vgg_face_model = VGGFace(input_tensor=image_input, model = 'resnet50', include_top = False, weights='vggface', input_shape=((224,224,3)))
    
vgg_face_model.summary()

In [None]:
for layer in vgg_face_model.layers[:-31]:
    layer.trainable = False

In [None]:
LL = vgg_face_model.get_layer('avg_pool').output
x = Flatten(name='flatten')(LL)
x = Dense(5000,name = 'fc8')(x)
x = Dropout(0.5)(x)
x = Dense(4096,name = 'fc9')(x)
out = Dense(num_classes, activation='softmax',name='classifier')(x)
custom_vgg_face_model = Model(vgg_face_model.input, out)

In [None]:
custom_vgg_face_model.summary()

In [None]:
sgd = SGD(lr=1e-4, decay=1e-3, momentum=0.9)
custom_vgg_face_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
t=time.time()

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')
callback_list = [early_stop]

hist = custom_vgg_face_model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), epochs=75, callbacks = callback_list, verbose=1)
#print('Training time: %s' % (t - time.time()))


In [None]:
custom_vgg_face_model.evaluate(img_data, Y_train)

In [None]:
from keras.models import model_from_json
model_json = custom_vgg_face_model.to_json()
with open("vgg_face_trained.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
custom_vgg_face_model.save_weights("vgg_face_model_weights.h5")
print("Saved model to disk")

In [None]:
predictions = custom_vgg_face_model.predict(testing_data)

In [None]:
index = 208
print(argmax(predictions[index]))
print(predictions)
plotImage(testing_data[index])

In [None]:
from keras.models import load_model
from keras.models import model_from_json
json_file = open('vgg_face_trained.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

In [None]:
loaded_model.load_weights("vgg_face_model_weights.h5")
print("Loaded model from disk")

In [None]:
pred = loaded_model.predict(testing_data)

In [None]:
s = 4010
print(argmax(pred[s]))
plotImage(testing_data[s])

# VGG-FACE Experiment 2

In [None]:
from keras_vggface.vggface import VGGFace
vgg_face_model_2 = VGGFace(model = 'resnet50',include_top = False, weights='vggface', input_shape=((244,244,3)))

for layer in vgg_face_model_2.layers:
    layer.trainable = False
    
vgg_face_model_2.summary()

In [None]:
LL = vgg_face_model_2.get_layer('avg_pool').output
x = Flatten(name='flatten')(LL)
x = Dense(5000, activation='relu', name='fc6')(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc8')(x)
x = Dropout(0.5)(x)
x = Dense(4096,name = 'fc9')(x)
out = Dense(num_classes, activation='softmax',name='classifier')(x)
vgg_face_model_2 = Model(vgg_face_model_2.input, out)

In [None]:
vgg_face_model_2.summary()

In [None]:
sgd = SGD(lr=0.0005, decay=1e-3, momentum=0.95)
vgg_face_model_2.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
t=time.time()

#X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.25, random_state=42)

#ckpt_save = "ckpt_vgg_2_weights-{epoch:02d}.hdf5"
#checkpoint = ModelCheckpoint(ckpt_save, monitor='val_loss', verbose=1, save_best_only=True)
filepath="vgg_face_weights_improvment-{epoch:02d}-{val_acc:.2f}.hdf5"
early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
#tensorboard = TensorBoard(log_dir=".", histogram_freq=2000, write_graph=True, write_images=False)
callback_list = [early_stop, checkpoint]

hist = vgg_face_model_2.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), epochs=75, callbacks = callback_list, verbose=1)
print('Training time: %s' % (t - time.time()))

In [None]:
K.clear_session()
del custom_vgg_face_model_2

# VGG16 Model

In [None]:

last_layer = model.get_layer('fc2').output
out = Dense(num_classes, activation='softmax', name='predictions')(model.layers[-2].output)
custom_vgg_model = Model(image_input, out)
custom_vgg_model.summary()

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
t=time.time()
#	t = now()
hist = custom_vgg_model.fit(img_data, Y_train, batch_size=50, epochs=35, verbose=1)
print('Training time: %s' % (t - time.time()))

In [None]:
X_test = []
for file2 in glob.glob('test/*.jpg'):
    X_test.append(file2)

import re
X_test_sorted = sorted(X_test, key=lambda x: (int(re.sub('\D','',x)),x))

testing_images = []

for im in X_test_sorted:
        image2 = imread(im, mode='RGB')
        image3 = imresize(image2, (224,224,3))
        testing_images.append(np.asarray(image3))

In [None]:
testing_data = np.asarray(testing_images)

In [None]:
predictions = custom_vgg_model.predict(testing_data)

In [None]:
print(argmax(predictions[1619]))
print(predictions)
plotImage(testing_data[1619])

In [None]:
print(argmax(Y_train[4]))

In [None]:
from keras.models import model_from_json
model_json = custom_vgg_model.to_json()
with open("model_correct_trained.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("custom_vgg_model.h5")
print("Saved model to disk")

In [None]:
print(testing_data.shape)

In [None]:
file_names = []
ages = []
counter = 0
for file2 in os.listdir('test'):
    file_names.append(os.path.basename(file2))
    ages.append(argmax(predictions[counter]))
    counter= counter+1
    
ordered_files = sorted(file_names, key=lambda x: (int(re.sub('\D','',x)),x))

files = pd.DataFrame(ordered_files)  
age = pd.DataFrame(ages) 

In [None]:
result = pd.concat([files, age],axis=1)
result.columns = ['Id', 'Expected']

In [None]:
result.to_csv('answers_vgg_face_1.csv')

In [None]:
print(argmax(Y_train[2]))

In [None]:
plotImage(testing_data[0])