In [1]:
import os
import math

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def load_and_preprocess_image(filename):
    filedata = tf.read_file(filename)
    image_data = tf.image.decode_png(filedata, channels=3)
    image_data = tf.image.crop_to_bounding_box(image_data, 0, 0, 300, 300) #Some images were saved by 301x301, must normalize it
    #Return the label values (whether the fifth letter is p of 'pressed')
    return image_data, tf.cast(tf.equal(tf.substr(filename,5, 1), 'p'), tf.float32)

def mirror_image(image_data, label):
    return tf.image.flip_left_right(image_data), label

def create_datasets(filenames, batch_size, n_epochs):
    
    dataset = tf.data.Dataset.from_tensor_slices(filenames)
    dataset = dataset.map(load_and_preprocess_image)
    mirror_dataset = dataset.map(mirror_image) #Mirror dataset for augment dataset
    aug_dataset = dataset.concatenate(mirror_dataset)
    aug_dataset = aug_dataset.shuffle(buffer_size=20000, seed=0)
    
    train_dataset_portion = 0.8
    
    point_of_split = int(len(filenames) * train_dataset_portion)
    
    train_dataset = aug_dataset.take(point_of_split).batch(batch_size).repeat(n_epochs)
    valid_dataset = aug_dataset.skip(point_of_split).batch(len(filenames) - point_of_split).repeat()
    
    return train_dataset, valid_dataset

In [11]:
def create_conv_net(image_batch, reuse=False):
    with tf.variable_scope("conv_net", reuse=reuse):
        
        image_batch /= 255 #Normalize image data
        
        conv1 = tf.layers.conv2d(image_batch, filters=8, kernel_size=[5,5], strides=[1, 1], padding='SAME',
                                kernel_initializer=None, bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu)
        conv1 = tf.layers.max_pooling2d(conv1, pool_size=[5,5], strides=[2,2], padding='SAME')
        #print(conv1.get_shape())
        
        conv2 = tf.layers.conv2d(conv1, filters=16, kernel_size=[3,3], strides=[1, 1], padding='SAME',
                                kernel_initializer=None, bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu)
        conv2 = tf.layers.max_pooling2d(conv2, pool_size=[3,3], strides=[2,2], padding='SAME')
        #print(conv2.get_shape())
        
        conv3 = tf.layers.conv2d(conv2, filters=32, kernel_size=[3,3], strides=[1, 1], padding='SAME',
                                kernel_initializer=None, bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu)
        conv3 = tf.layers.max_pooling2d(conv3, pool_size=[3,3], strides=[2,2], padding='SAME')
        #print(conv3.get_shape())
        
        conv4 = tf.layers.conv2d(conv3, filters=64, kernel_size=[3,3], strides=[1, 1], padding='SAME',
                                kernel_initializer=None, bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu)
        conv4 = tf.layers.max_pooling2d(conv4, pool_size=[3,3], strides=[2,2], padding='SAME')
        #print(conv4.get_shape())
        
        conv5 = tf.layers.conv2d(conv4, filters=128, kernel_size=[3,3], strides=[1, 1], padding='SAME',
                                kernel_initializer=None, bias_initializer=tf.zeros_initializer(), activation=tf.nn.relu)
        conv5 = tf.layers.max_pooling2d(conv5, pool_size=[3,3], strides=[2,2], padding='SAME')
        #print(conv5.get_shape())

        flatten_layer = tf.layers.flatten(conv5)
        
        h1 = tf.layers.dense(flatten_layer, 5000, tf.nn.relu)
        h2 = tf.layers.dense(h1, 1000, tf.nn.relu)
        h3 = tf.layers.dense(h2, 256, tf.nn.relu)
        
        logits = tf.squeeze(tf.layers.dense(h3, 1), axis=1) 
        
        outputs = tf.nn.sigmoid(logits)
        
        return logits, outputs
    

In [36]:
def create_optimizer(logits, labels, learning_rate=0.001):
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels)
    loss = tf.reduce_mean(loss)
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    return optimizer, loss

In [37]:
def create_accuracy(outputs, labels):
    
    outputs = tf.cast(outputs > 0.5, tf.bool)
    labels = tf.cast(labels, tf.bool)
    
    accuracy, update_op = tf.metrics.accuracy(outputs, labels)
    
    return accuracy, update_op

def reset_accuracy_variables():
    return [tf.assign(v,0) for v in tf.local_variables() if 'accuracy' in v.name]

In [None]:
N_EPOCHS = 1

BATCH_SIZE = 8

filenames = ["imgs/" + filename for filename in os.listdir("imgs")][:10000]

N_STEPS = math.ceil(len(filenames)* 0.8 * N_EPOCHS/BATCH_SIZE)

#-------------------------------------------------#

tf.reset_default_graph()

train_dataset, valid_dataset = create_datasets(filenames, BATCH_SIZE, N_EPOCHS)

train_dataset_x_batch, train_dataset_y_batch = train_dataset.make_one_shot_iterator().get_next()
valid_dataset_x_batch, valid_dataset_y_batch = valid_dataset.make_one_shot_iterator().get_next()

logits, outputs = create_conv_net(train_dataset_x_batch)
optimizer, loss = create_optimizer(logits, train_dataset_y_batch, 0.001)

valid_logits, valid_outputs = create_conv_net(valid_dataset_x_batch, reuse=True)
acc, update_acc = create_accuracy(valid_outputs, valid_dataset_y_batch)
reset_acc = reset_accuracy_variables()

print("Initializing session...")
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    i_step = 0
    
    while True:
        i_step += 1
        
        try:
            _, loss_value = sess.run([optimizer, loss])
            
            if i_step % 100 == 0:
                print("Step {}/{} \t Loss: {}".format(i_step, N_STEPS, loss_value))
                
                sess.run(reset_acc)
                acc_value = sess.run(update_acc)
                print("Validation accuracy: {}".format(acc_value))

        except tf.errors.OutOfRangeError:
            print("End of dataset")  # ==> "End of dataset"
            break

Initializing session...
Step 100/1000 	 Loss: 0.38526108860969543
