In [1]:
import logging
 
import datetime
import pandas as pd
import os

from PIL import Image
import numpy as np
from numpy import save, load

import keras
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras import optimizers, losses, activations, models
from keras.layers import Convolution2D, Dense, Input, Flatten, Dropout, MaxPooling2D, BatchNormalization, \
    GlobalMaxPool2D, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, Lambda, Conv2D
from keras.applications.resnet50 import ResNet50
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras import backend as K
from keras.callbacks import TensorBoard

from tqdm import tqdm
from collections import Counter

Using TensorFlow backend.


In [2]:
logging.basicConfig(level=logging.DEBUG)

In [3]:
def read_and_resize(filepath, input_shape=(224, 224)):
    im = Image.open((filepath)).convert('RGB')
    im = im.resize(input_shape)
    im_array = np.array(im, dtype="uint8")#[..., ::-1]
    return np.array(im_array / (np.max(im_array)+ 0.001), dtype="float32")

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.2,1.5],
    zoom_range=0.2)

def augment(im_array):
    im_array = datagen.random_transform(im_array)
    return im_array

In [4]:
def gen(df, batch_size=8, aug=False):
    df = df.sample(frac=1)

    dict_age = {'(0, 2)' : 0,
                '(3, 5)' : 1,
                '(6, 10)' : 2,
                '(11, 15)' : 3,
                '(16, 20)' : 4,
                '(21, 30)' : 5,
                '(31, 40)' : 6,
                '(41, 50)' : 7,
                '(51, 60)' : 8,
                '(61, 70)' : 9,
                '(71, 80)' : 10,
                 '(81, 90)' : 11,
                 '(91, 100)' : 12}

    while True:
        for i, batch in enumerate([df[i:i+batch_size] for i in range(0,df.shape[0],batch_size)]):
            if aug:
                images = np.array([augment(read_and_resize(file_path)) for file_path in batch.path.values])
            else:
                images = np.array([read_and_resize(file_path) for file_path in batch.path.values])


            #labels = np.array([dict_age[g] for g in batch.out_ages.values])
            labels = np.array(batch.out_ages.values)

            labels = labels[..., np.newaxis]

            yield images, labels

In [5]:
def get_model(optimizer,n_classes=1):

    base_model = ResNet50(weights="./resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", include_top=False)

    #for layer in base_model.layers:
    #    layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.2)(x)
    #x = Flatten()
    x = Dense(1000, activation="relu")(x)
    x = Dropout(0.2)(x)
    x = Dense(750,activation="relu")(x)
    x = Dense(350,activation="relu")(x)
    x = Dense(100,activation="relu")(x)
    x = Dropout(0.2)(x)
    if n_classes == 1:
        x = Dense(n_classes, activation="sigmoid")(x)
    else:
        x = Dense(n_classes, activation="softmax")(x)

    base_model = Model(base_model.input, x, name="base_model")
    if n_classes == 1:
        base_model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=optimizer)
    else:
        base_model.compile(loss="sparse_categorical_crossentropy", metrics=['acc'], optimizer=optimizer)

    return base_model

In [6]:
# def get_model(optimizer,n_classes=1):
#     model = keras.Sequential()
#     model.add(Conv2D(filters=32, kernel_size=(5,5), input_shape=(256,256,3),padding='same', activation='relu'))
#     model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))
#     model.add(Conv2D(filters=64, kernel_size=(5,5), activation='relu', padding='same'))
#     model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))
#     model.add(Conv2D(filters=128, kernel_size=(5,5), activation='relu', padding='same'))
#     model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))
#     model.add(Conv2D(filters=128, kernel_size=(5,5), activation='relu', padding='same'))
#     model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))
#     model.add(Conv2D(filters=64, kernel_size=(5,5), activation='relu', padding='same'))
#     model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))
#     model.add(GlobalAveragePooling2D())
#     #model.add()
#     #model.add(Dense(47150, activation = 'relu'))
#     #model.add(Dense(47150/2, activation = 'relu'))
#     #model.add(Dense(47150/4, activation = 'relu'))
#     #model.add(Dense(47150/8, activation = 'relu'))
#     #model.add(Dense(n_classes, activation = 'softmax'))
        
#     if n_classes == 1:
#         model.add(Dense(n_classes, activation="sigmoid"))#(x)
#     else:
#         model.add(Dense(n_classes, activation="softmax"))#(x)
#     #base_model = Model(base_model.input, x, name="base_model")
#     if n_classes == 1:
#         model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=optimizer)
#     else:
#         model.compile(loss="sparse_categorical_crossentropy", metrics=['acc'], optimizer=optimizer)

#     return model

In [None]:
if __name__ == "__main__":
    base_path = "./Dataset-copy/"

    dict_age = {'(0, 2)' : 0,
                '(3, 5)' : 1,
                '(6, 10)' : 2,
                '(11, 15)' : 3,
                '(16, 20)' : 4,
                '(21, 30)' : 5,
                '(31, 40)' : 6,
                '(41, 50)' : 7,
                '(51, 60)' : 8,
                '(61, 70)' : 9,
                '(71, 80)' : 10,
                 '(81, 90)' : 11,
                 '(91, 100)' : 12}

    bag = 3

    all_indexes = list(range(5))
    
    accuracies = []
    print("Reading train and test CSV files ")
    train_df = pd.read_csv("expanded_data_shuffled.csv")
    #test_df = pd.read_csv("test_gender_filtered_data_with_path.csv")
    tr_tr, tr_val = train_test_split(train_df, test_size=0.1)
    tr_val['out_ages'].groupby
    print("Reading Done.")
    cnt_ave = 0
    predictions = 0
#     print("Extracting test labels and test images from files")
#     test_images = load("imdb_test_images.npy")
#     test_labels = load("imdb_test_labels.npy")
#     print("Extracting Done.")
    #tr_tr, tr_val = train_test_split(train_df, test_size=0.1,random_state = 100)
    file_path = "imdb_age_recog_weights.h5"
    
    print("Generating callback_list")
    
#     log_dir="./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#     tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
    
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

    #early = EarlyStopping(monitor="val_acc", mode="max", patience=5)

    reduce_on_plateau = ReduceLROnPlateau(monitor="val_loss",
                                          mode="min", 
                                          factor=0.1,
                                          #cooldown=0,
                                          patience=3,
                                          verbose=1,
                                          min_lr=0.00001)

    callbacks_list = [checkpoint,
                      reduce_on_plateau
                      #tensorboard_callback
                      #early
                     ]  # early
    
    print("Done Generating callbacklist.")
    print("generating Model")
    optimizer = Adam(lr=0.001)
    model = get_model( optimizer,n_classes=99)
    print("Done generating model")
    print("Running Fit_generator")
    batch_size = 8
    model.fit_generator(gen(tr_tr,batch_size=batch_size, aug=True), 
                        validation_data=gen(tr_val), 
                        epochs=200, 
                        verbose=1, 
                        workers=4,
                        callbacks=callbacks_list,
                        steps_per_epoch=500,#int(len(tr_tr)/batch_size),#int(10740.75), 
                        validation_steps=100,
                        #validation_data=((test_images), test_labels)
                        use_multiprocessing=True)
    #model.save(file_path)
    print("Trained Model saved to disk")

Reading train and test CSV files 
Reading Done.
Generating callback_list
Done Generating callbacklist.
generating Model
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.








Done generating model
Running Fit_generator






Epoch 1/200



In [None]:

train_df = pd.read_csv("imdb_dataset.csv")
tr_tr, tr_val = train_test_split(train_df, test_size=0.1,random_state = 100)
tr_val, tr_test = train_test_split(tr_val,test_size=0.1,random_state = 100)
test_images = np.array([read_and_resize(file_path) for file_path in tr_test.path.values])
test_labels = np.array([file_path for file_path in tr_test.path.values])

In [None]:
save("imdb_test_images.npy",test_images)

In [None]:
test_labels = np.array([int(g == "m") for g in tr_test.out_ages.values])

In [None]:
save("imdb_test_labels.npy",test_images)

In [None]:
len(test_labels)

In [None]:
model = load_model("imdb_age_recog_weights.h5")

In [None]:
y_predict = model.predict(test_images)

In [None]:
index = 0
prob_max_class = max(y_predict[index])
age_class = np.where(y_predict[index] == prob_max_class)

In [None]:
age_class[0][0]

In [None]:
dict_age = {'(0, 2)' : 0,
                '(3, 5)' : 1,
                '(6, 10)' : 2,
                '(11, 15)' : 3,
                '(16, 20)' : 4,
                '(21, 30)' : 5,
                '(31, 40)' : 6,
                '(41, 50)' : 7,
                '(51, 60)' : 8,
                '(61, 70)' : 9,
                '(71, 80)' : 10,
                 '(81, 90)' : 11,
                 '(91, 100)' : 12}

In [None]:
dict_age[age_class[0][0]]