In [58]:
import tensorflow as tf
import keras
import cv2
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
from keras.models import Sequential
from keras.layers import Conv2D , Dense, Activation, MaxPooling2D,Flatten
from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

import argparse

from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np
import cv2

In [59]:
def load_dataset(file_train, file_test, size=1.0):
    # loadtxt gives results with that b' char
    # data_train = np.loadtxt(file_train, dtype='str', delimiter=',')
    # data_test = np.loadtxt(file_test, dtype='str', delimiter=',')

    data_train = np.genfromtxt(file_train, dtype='str', delimiter=',')
    data_test = np.genfromtxt(file_test, dtype='str', delimiter=',')

    xtr, ytr = data_train[:,1], data_train[:,0].astype(int)
    xva, yva = data_test[:,1], data_test[:,0].astype(int)

    # use 15% of train data for testing
    xtr, xte, ytr, yte = train_test_split(xtr, ytr, test_size=0.30)

    if size >= 1.0 and size <=0:
        return xtr, ytr, xva, yva, xte, yte
    
    discard_size = 1.0 - size
    xtr, _, ytr, _ = train_test_split(xtr, ytr, test_size=discard_size)
    #xva, _, yva, _ = train_test_split(xva, yva, test_size=discard_size)
    xte, _, yte, _ = train_test_split(xte, yte, test_size=discard_size)
    
    xtr, ytr = reshape_dataset(xtr, ytr.ravel())
    xva, yva = reshape_dataset(xva, yva.ravel())
    xte, yte = reshape_dataset(xte, yte.ravel())

    return xtr, ytr, xva, yva, xte, yte

def reshape_dataset(x, y):
    # make sure classes are between 0 and num_classes
    new_y = np.ones_like(y)*(-1)
    num_classes = np.unique(y)
    for idx, label in enumerate(num_classes):
        new_y[y == label] = idx
    
    y = to_categorical(new_y)
    
    return x, y

def get_model_1(learning_rate):
    VGG=VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(64,64,3), pooling=None)
    model = Sequential()
    for l in VGG.layers:
        model.add(l)
    #model.add(Flatten(input_shape=VGG.output_shape[1:]))    
    model.add(Flatten())    
    model.add(Dense(7, activation='softmax'))

    for layer in model.layers:
        layer.trainable = False

    for layer in model.layers[19:20]:
        layer.trainable = True

    optimizers.SGD(lr=learning_rate, momentum=0.0, decay=0.0, nesterov=False )
    model.compile(optimizer='SGD', loss="categorical_crossentropy", metrics=["accuracy"])


    return model


def save_plot_metrics(log_file_name, history):
    keys = history.history.keys()

    f, ax = plt.subplots(len(keys), 1, figsize=(5, 22))

    for idx, k in enumerate(keys):
        ax[idx].plot(history.history[k])
        ax[idx].set_title("model " + k)
        ax[idx].set_ylabel(k)
        ax[idx].set_xlabel('epoch')
    
    f.savefig("{:s}.png".format(log_file_name), dpi=90)

def save_log_metrics(log_file_name, hyper, history):
    header = ""

    for key in hyper:
        header = header + ", " + key + ": " + str(hyper[key])

    header = header[2:]

    with open(log_file_name + ".txt", "w+") as log_file:
        log_file.write(header+"\n")
        
        keys = history.history.keys()
        head = ""
        
        c = 0
        for k in keys:
            if c == 0:
                l = len(history.history[k]) # number of epochs
                h = np.zeros(l)
            head = head + k + ","
            h = np.vstack((h, history.history[k]))
            c = c + 1

        head = head[:-1]
        head = head + "\n"
        log_file.write(head)

        h = h[1:,:]
        h = h.T

        for row in h:
            new_line = ""
            for value in row:
                new_line = new_line + "{:.8f},".format(value)
            new_line = new_line[:-1]
            new_line = new_line + "\n"
            log_file.write(new_line)

    log_file.close()

def get_images(paths):

    images = []

    for image_path in paths:
        image_path = "../data"+image_path[1:]
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        images.append(image)

    return np.array(images)

def load_clothes(size=1.0):
    return load_dataset("../data/clothes_train.txt", "../data/clothes_test.txt", size)
    
def load_faces(size=1.0):
    return load_dataset("../data/faces_train.txt", "../data/face_test.txt", size)


In [60]:
def train_model(model, hyper):
    training_size = hyper["training_size"]
    batch_size = hyper["batch_size"]
    num_epochs = hyper["num_epochs"]
    dataset = hyper["dataset_type"]
    dataset_size = hyper["dataset_size"]

    log_file_name = generate_log_file_name(hyper)

    if dataset == 1:
        xtr, ytr, xva, yva, xte, yte = load_clothes(dataset_size / 100.0)
    elif dataset_type == 2:
        xtr, ytr, xva, yva, xte, yte = load_faces(dataset_size / 100.0)
    else:
        raise NotImplementedError

    xtr = get_images(xtr)
    xte = get_images(xte)
    xva = get_images(xva)
    
    history = model.fit(
        xtr,
        ytr,
        batch_size,
        validation_data = (xva, yva),
        epochs=num_epochs
    )

    eval_ = model.evaluate(xte, yte)
    for val, key in zip(eval_, model.metrics_names):
        hyper[key] = val

    save_log_metrics(log_file_name, hyper, history)
    save_plot_metrics(log_file_name, history)
    model.save_weights(log_file_name + ".hdf")

def generate_log_file_name(hyper):
    exp_name = hyper["exp_name"]
    model_type = hyper["model_type"]
    
    dataset_type = hyper["dataset_type"]
    if dataset_type == 1:
        dataset_name = "clothes"
    elif dataset_type == 2:
        dataset_name = "faces"
    else:
        raise NotImplementedError

    dataset_size = hyper["dataset_size"]
    return "{:s}_model_{:d}_{:s}_{:d}".format(exp_name, model_type, dataset_name, dataset_size)

def train_networks(exp_name, model_type, learning_rate, training_size, batch_size, num_epochs, dataset_size, dataset_type):
    model = None

    if model_type == 1:
        model = get_model_1(learning_rate)
        model.summary()
    else:
        raise NotImplementedError

    hyper = OrderedDict()
    hyper["learning_rate"] = learning_rate
    hyper["training_size"] = training_size
    hyper["batch_size"] = batch_size
    hyper["num_epochs"] = num_epochs
    hyper["dataset_type"] = dataset_type
    hyper["dataset_size"] = dataset_size
    hyper["model_type"] = model_type
    hyper["exp_name"] = exp_name

    train_model(model, hyper)

In [None]:
train_networks("test", 1, 0.001, 2000, 50, 10, 1, 1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_19 (InputLayer)        (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

In [None]:
ImgGen=ImageDataGenerator()
train_gen=ImgGen.flow_from_directory( '../baseline/face/train_split_0.85',target_size=(64,64),batch_size=32,color_mode="grayscale",
                                    shuffle=False)
valid_gen=ImgGen.flow_from_directory( '../baseline/face/valid_split_0.15',target_size=(64,64),batch_size=32,color_mode="grayscale",
                                    shuffle=False)
tf.image.grayscale_to_rgb(
    train_gen,
    name=None
)

Found 35693 images belonging to 7 classes.
Found 6307 images belonging to 7 classes.


In [None]:
train_gen[1]

In [27]:

#model = Sequential()
#model.add(Conv2D(10,5,strides=(1,1) ,padding="valid",input_shape=(64,64,1), kernel_initializer='random_uniform', bias_initializer='zeros' ))
#model.add(MaxPooling2D(pool_size=(3, 3),padding='valid'))
#model.add(Activation('relu'))
#model.add(Conv2D(5,5,strides=1,padding='valid'))
#model.add(MaxPooling2D(pool_size=(3, 3),padding='valid'))
#model.add(Activation('relu'))
#model.add(Flatten())
#model.add(Dense(7, activation='softmax'))
#optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)  
#model.compile(optimizer='Adam', loss="categorical_crossentropy", metrics=["accuracy"])

VGG=VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(64,64,1), pooling=None)
model = Sequential()
for l in VGG.layers:
    model.add(l)
model.add(Flatten(input_shape=VGG.output_shape[1:]))
model.add(Dense(7, activation='softmax'))
for layer in model.layers:
    layer.trainable = False
for layer in model.layers[19:20]:
    layer.trainable = True
optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False )
model.compile(optimizer='SGD', loss="categorical_crossentropy", metrics=["accuracy"])
model.fit_generator(
        train_gen,
        steps_per_epoch=1000,
        epochs=10,
        validation_data=valid_gen,
        verbose=1)


ValueError: The input must have 3 channels; got `input_shape=(64, 64, 1)`

In [6]:
# 40% for training and 60% for validation 
#Epoch 1/10
#2000/2000 [==============================] - 156s 78ms/step - loss: 0.6978 - acc: 0.7422 - val_loss: 0.0068 - val_acc: 0.9997
#Epoch 2/10
#2000/2000 [==============================] - 152s 76ms/step - loss: 0.0456 - acc: 0.9879 - val_loss: 0.0046 - val_acc: 0.9995
#Epoch 3/10
# 157/2000 [=>............................] - ETA: 1:55 - loss: 0.0036 - acc: 0.9998
#
for i, layer in enumerate(model.layers):
   print(i, layer.name)

0 input_4
1 block1_conv1
2 block1_conv2
3 block1_pool
4 block2_conv1
5 block2_conv2
6 block2_pool
7 block3_conv1
8 block3_conv2
9 block3_conv3
10 block3_pool
11 block4_conv1
12 block4_conv2
13 block4_conv3
14 block4_pool
15 block5_conv1
16 block5_conv2
17 block5_conv3
18 block5_pool


In [None]:
optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False )
model.compile(optimizer='SGD', loss="categorical_crossentropy", metrics=["accuracy"])
model.fit_generator(
        train_gen,
        steps_per_epoch=1000,
        epochs=10,
        validation_data=valid_gen,
        verbose=1)

In [3]:
keras.__version__

'2.1.3'