In [0]:
### Hunter Mitchell - 06/18/2020

# Description: This code will predict the gender/age from a picture of someone. It is trained on 3000 images 
# from the imdb-wiki dataset and uses the CNN DenseNet-201



#!pip install -q efficientnet


import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import date

import cv2
import os

from tensorflow.keras.applications import DenseNet201, DenseNet121
from tensorflow.keras.models import load_model
import tensorflow.keras.layers as L

from sklearn.model_selection import train_test_split

#import efficientnet.tfkeras as efn






############# SETTINGS ################


print(tf.__version__)
print(tf.keras.__version__)


PREDICT_AGE = True

TESTING = True
TESTING_SIZE = 3000

SEED = 2016
IMG_SIZE = 256

EPOCHS = 15
BATCH_SIZE = 32

LABELS_PATH = "/kaggle/input/facelabels/faceLabels.csv"
IMAGES_PATH = "/kaggle/input/faceimages/wiki_crop_stuff"
PRED_PIC_PATH = '/kaggle/input/predimages2/hunterpic05.jpg'
PRED_PIC_PATH2 = '/kaggle/input/predimages/kyla.jpeg'


############# FUNCTIONS ###############



def format_image(path):
    img = cv2.imread(IMAGES_PATH + '/' + path)
    img_new = cv2.resize(img,(IMG_SIZE,IMG_SIZE))
    return img_new #/ 255.0
    

def show_image(img):
    new_img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    plt.imshow(new_img)
    plt.show()


def reshape_list(old_list):
    length = len(old_list)
    new_list = [0]*length
    for i in range(length):
        new_list[i] = old_list[i]

    new_list = np.asarray(new_list)
    return new_list


### learning rate schedule
def lrfn(epoch):
    
    lr_start=0.0001
    lr_max=0.0003
    lr_min=0.00001
    lr_rampup_epochs=5
    lr_sustain_epochs=2
    lr_exp_decay=.7
    
    if epoch < lr_rampup_epochs:
        lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
    elif epoch < lr_rampup_epochs + lr_sustain_epochs:
        lr = lr_max
    else:
        lr = (lr_max - lr_min) * lr_exp_decay**(epoch - lr_rampup_epochs - lr_sustain_epochs) + lr_min
    return lr


def get_gender_model():
    
    model = tf.keras.Sequential([
        DenseNet201(
            input_shape=(IMG_SIZE, IMG_SIZE, 3),
            weights='imagenet',
            include_top=False
        ),
        L.GlobalAveragePooling2D(),
        L.Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss = 'binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model



def get_age_model():
    
    model = tf.keras.Sequential([
        DenseNet121(
            input_shape=(IMG_SIZE, IMG_SIZE, 3),
            weights='imagenet',
            include_top=False
        ),
        L.GlobalAveragePooling2D(),
        L.Dense(1)
    ])

    model.compile(
        optimizer='adam',
        loss='mae',
        metrics=['mae','mse']
    )
    
    return model



########## LOAD THE DATA ##############



# from https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/
data = pd.read_csv(LABELS_PATH).sort_values(by='full_path').reset_index(drop=True)

print(data.head())
#print(data.describe())



temp_pred_img = cv2.imread(PRED_PIC_PATH)
pred_img = cv2.resize(temp_pred_img,(IMG_SIZE,IMG_SIZE))
pred_img = pred_img.reshape(1,IMG_SIZE,IMG_SIZE,3) # for getting prediction

temp_pred_img2 = cv2.imread(PRED_PIC_PATH2)
pred_img2 = cv2.resize(temp_pred_img2,(IMG_SIZE,IMG_SIZE))
pred_img2 = pred_img2.reshape(1,IMG_SIZE,IMG_SIZE,3) # for getting prediction



########## CLEAN AND PREPROCESS DATA ##########



# get labels
if (TESTING == True):
    if (PREDICT_AGE == True):
        train_labels = data[['dob','photo_taken']][:TESTING_SIZE]
    else:
        train_labels = data.gender[:TESTING_SIZE] # also data.dob or data.name
else :
    if (PREDICT_AGE == TRUE):
        train_labels = data[['dob','photo_taken']]
    else:
        train_labels = data.gender

        

        


# preprocess age column, specify ages, and set up labels
if (PREDICT_AGE == True):
    
    train_labels['photo_taken_date'] = train_labels.photo_taken.apply(lambda x: date.toordinal( date(x,7,1) ) )
    train_labels['age_days'] = train_labels.photo_taken_date - train_labels.dob
    train_labels['age'] = train_labels.age_days.apply(lambda x: int (x / 365))
    train_labels = train_labels[ (train_labels.age >= 10) & (train_labels.age < 80) ]
    #train_labels = train_labels.sort_values(by='age',ascending=False) # if I want df to be sorted ascending or descending
    train_labels = train_labels.drop(['dob','photo_taken','photo_taken_date','age_days'],axis=1)




# drop null values 
if (PREDICT_AGE == False):
    
    train_labels = train_labels.dropna()





# get paths where there are labels
train_paths = data.full_path[train_labels.index]


train_paths = train_paths.reset_index(drop=True)
train_labels = train_labels.reset_index(drop=True)


print(train_paths.describe())
print(train_labels.describe())



train_images = train_paths.apply(format_image)


print(train_images.shape)
train_images = reshape_list(train_images)
print(train_images.shape)




print(train_labels.head())
#for i in range(5):
#    show_image(train_images[i])



#wait = input('PAUSING')




# splitting data
train_images,valid_images,train_labels,valid_labels = train_test_split(train_images,train_labels,test_size=0.15,random_state=SEED)




########### DEFINE THE MODEL ################



lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)


if PREDICT_AGE == True:
    model = get_age_model()
else:
    model = get_gender_model()


model.summary()



######## TRAIN THE MODEL ##########



history = model.fit(
    train_images,
    train_labels,
    epochs=EPOCHS,
    verbose=2,
    callbacks=[lr_schedule],
    #steps_per_epoch=STEPS_PER_EPOCH,
    batch_size=BATCH_SIZE,
    validation_data=(valid_images,valid_labels)
)



####### SAVE THE MODEL ###########


#model.save('/kaggle/output/kaggle/working/current_model_kaggle')
model.save('current_model_kaggle')






# check predictions 

prediction = model.predict(pred_img)

print(prediction)
show_image(pred_img[0])


prediction2 = model.predict(pred_img2)

print(prediction2)
show_image(pred_img2[0])





### load model

#model2 = load_model("/kaggle/working/current_model_kaggle")


#prediction = model2.predict(pred_img)

#print(prediction)
#show_image(pred_img[0])


#prediction2 = model2.predict(pred_img2)

#print(prediction2)
#show_image(pred_img2[0])



