In [1]:
import tensorflow as tf
import numpy as np
import h5py
import sys

In [2]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(axis=0, values=grads)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [3]:
def identity_block(input_tensor, filters, stage, block, is_training):
    with tf.variable_scope("stage-{}_block-{}".format(stage, block), reuse=tf.AUTO_REUSE) as scope:
        x = tf.layers.conv2d(input_tensor, filters[0], 1, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)

        x = tf.layers.conv2d(x, filters[0], 3, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)
        
        x = tf.layers.conv2d(x, filters[1], 1, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)

        x = tf.math.add(x, input_tensor)
        x = tf.nn.relu(x)
    return x

def conv_block(input_tensor, filters, stage, block, is_training, strides=2):
    with tf.variable_scope("stage-{}_block-{}".format(stage, block), reuse=tf.AUTO_REUSE) as scope:
        x = tf.layers.conv2d(input_tensor, filters[0], 1, strides=strides, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)

        x = tf.layers.conv2d(x, filters[0], 3, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)
        
        x = tf.layers.conv2d(x, filters[1], 1, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)

        shortcut = tf.layers.conv2d(input_tensor, filters[1], 1, strides=strides, padding="same")
        shortcut = tf.layers.batch_normalization(shortcut, training=is_training)

        x = tf.math.add(x, shortcut)
        x = tf.nn.relu(x)        
    return x

def classifier(inputs, num_classes, num_embeddings, is_training):
    with tf.variable_scope("stage-1_block-a", reuse=tf.AUTO_REUSE) as scope:
        x = tf.layers.conv2d(inputs, 16, 7, padding="same")
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)
        x = tf.layers.max_pooling2d(x, 3, strides=2, padding="same")

    x = conv_block    (x, [16, 64], stage=2, block='a', is_training=is_training)
    x = identity_block(x, [16, 64], stage=2, block='b', is_training=is_training)

    x = conv_block    (x, [32, 128], stage=3, block='a', is_training=is_training)
    x = identity_block(x, [32, 128], stage=3, block='b', is_training=is_training)

    x = conv_block    (x, [64, 128], stage=4, block='a', is_training=is_training)
    x = identity_block(x, [64, 128], stage=4, block='b', is_training=is_training)

    with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE) as scope:
        x = tf.layers.average_pooling2d(x, (x.get_shape()[-3], x.get_shape()[-2]), 1) #global average pooling
        x = tf.layers.flatten(x)
        embeddings = tf.layers.dense(x, num_embeddings, activation=None)
        
    with tf.variable_scope("logits", reuse=tf.AUTO_REUSE) as scope:
        pred_logits = tf.nn.relu(embeddings)
        pred_logits = tf.layers.dense(pred_logits, num_classes, activation=None)

    return embeddings, pred_logits

In [4]:
def get_center_loss(features, labels, alpha, num_classes):
    
    len_features = features.get_shape()[1] 
    with tf.variable_scope("central_loss", reuse=tf.AUTO_REUSE) as scope:
        centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32,
            initializer=tf.constant_initializer(0), trainable=False)
        
    labels = tf.reshape(labels, [-1])

    centers_batch = tf.gather(centers, labels)
    loss = tf.nn.l2_loss(features - centers_batch)

    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = centers_batch - features
    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    centers_update_op = tf.scatter_sub(centers, labels, diff)
    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, centers_update_op)

    return loss, centers

In [5]:
def unison_shuffled_copies(a, b):
    p = np.random.permutation(a.shape[0])
    return a[p], b[p]

def get_batch(data_x, data_y, batch_size):
    while True:
        data_x, data_y = unison_shuffled_copies(data_x, data_y)
        for index in range(0, data_x.shape[0], batch_size):
            x, y = data_x[index:index+batch_size], data_y[index:index+batch_size]
            if x.shape[0] == batch_size:
                yield x, y

In [6]:
data_file =        "/home/kjakkala/neuralwave/data/CSI_50_500.h5"
tensorboard_path = "/home/kjakkala/neuralwave/data/logs/central_loss_50_500/"
weights_path =     "/home/kjakkala/neuralwave/data/weights/central_loss_50_500/central_loss_model.ckpt"

num_embeddings = 64
save_step = 25
decay_rate = 0.999
batch_size = 16
num_gpus = 4
alpha = 0.5
ratio = 0.9
epochs = 1000
lr = 1e-3

hf = h5py.File(data_file, 'r')
train_classes = np.array(hf.get('labels')).astype(str)
num_classes = len(train_classes)
X_train = np.expand_dims(hf.get('X_train'), axis=-1)
X_test = np.expand_dims(hf.get('X_test'), axis=-1)
y_train = np.eye(num_classes)[hf.get('y_train')]
y_test = np.eye(num_classes)[hf.get('y_test')]
hf.close()

train_steps = X_train.shape[0]//(batch_size*num_gpus)
test_steps = X_test.shape[0]//(batch_size*num_gpus)
train_data = get_batch(X_train, y_train, batch_size*num_gpus)
test_data = get_batch(X_test, y_test, batch_size*num_gpus)
rows, cols, channels = X_train.shape[1:]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
sys.stdout.flush()

(1360, 2000, 270, 1) (1360, 40) (240, 2000, 270, 1) (240, 40)


In [7]:
tf.reset_default_graph()

with tf.device('/cpu:0'):
    with tf.variable_scope("Inputs") as scope:
        X = tf.placeholder(tf.float32, [None, rows, cols, channels])
        Y = tf.placeholder(tf.float32, [None, num_classes])
        
    is_training = tf.placeholder(tf.bool)

    global_step = tf.train.get_or_create_global_step()

    learning_rate = tf.train.exponential_decay(lr,
                                               global_step,
                                               train_steps,
                                               decay_rate,
                                               staircase=True)
    tf.summary.scalar("learning_rate", learning_rate)       

    opt = tf.train.AdamOptimizer(learning_rate=learning_rate)      

    # Calculate the gradients for each model tower.
    tower_grads = []
    center_losses = []
    softmax_losses = []
    combined_losses = []
    accuracies = []
    for i in range(num_gpus):
        with tf.device("/gpu:{}".format(i)):
            with tf.name_scope("resnet_{}".format(i)) as scope:
                _x = X[i * batch_size: (i+1) * batch_size]
                _y = Y[i * batch_size: (i+1) * batch_size]

                embeddings, pred_logits = classifier(_x, num_classes, num_embeddings, is_training)

                center_loss, centers = get_center_loss(embeddings, tf.argmax(_y, axis=1), alpha, num_classes)
                softmax_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=_y, logits=pred_logits))
                combined_loss = softmax_loss + ratio * center_loss
                grads = opt.compute_gradients(combined_loss) 
                
                accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred_logits, 1), tf.argmax(_y, 1)), tf.float32))

                center_losses.append(center_loss)
                softmax_losses.append(softmax_loss)
                combined_losses.append(combined_loss)
                accuracies.append(accuracy)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        apply_gradient_op = opt.apply_gradients(grads, global_step)
        
    avg_center_loss = tf.reduce_mean(center_losses)
    avg_softmax_loss = tf.reduce_mean(softmax_losses)
    avg_combined_loss = tf.reduce_mean(combined_losses)
    avg_accuracies = tf.reduce_mean(accuracies)

    tf.summary.scalar("avg_center_loss", avg_center_loss)      
    tf.summary.scalar("avg_softmax_loss", avg_softmax_loss)      
    tf.summary.scalar("avg_combined_loss", avg_combined_loss) 
    tf.summary.scalar("avg_accuracies", avg_accuracies) 
    
    # Initializing the variables
    init = tf.global_variables_initializer()
    merged = tf.summary.merge_all()
    saver = tf.train.Saver(max_to_keep=5)

In [None]:
# Launch the graph
with tf.Session(config=tf.ConfigProto(allow_soft_placement = True)) as sess:
    train_writer = tf.summary.FileWriter(tensorboard_path+"train", sess.graph)
    test_writer = tf.summary.FileWriter(tensorboard_path+"test", sess.graph)
    sess.run(init)

    for epoch in range(1, epochs + 1):
        for step in range(1, train_steps + 1):
            batch_x, batch_y = next(train_data)
            _, summary, curr_step = sess.run([apply_gradient_op, merged, global_step], feed_dict={X: batch_x, Y: batch_y, is_training: True})
            train_writer.add_summary(summary, curr_step)
        
        for step in range(1, test_steps+1):
            batch_x, batch_y = next(test_data)
            summary = sess.run(merged, feed_dict={X: batch_x, Y: batch_y, is_training: False})
            test_writer.add_summary(summary, (epoch*test_steps)+step)
            
        if (epoch % save_step == 0):
            saver.save(sess, weights_path, global_step=curr_step)
            
        sys.stdout.write("Current Epoch: {}\r".format(epoch))
        sys.stdout.flush()
            
    saver.save(sess, weights_path, global_step=curr_step)

Current Epoch: 78