In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tensorflow as tf
from PIL import Image
import glob
import os
import re

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
filepath = "/data/asl_alphabet_{0}/"
cwd = os.getcwd()

In [3]:
num_classes = 26
batch_size = 117
train_size = 70200
val_size = 7800

# Data Exploration and Preprocessing

In [4]:
def get_imagepaths(type="train"):
    imagepaths = []

    for dir in glob.glob(cwd + filepath.format(type) + "*/"):
        for imagepath in glob.glob(dir + "*.jpg"):
            imagepaths.append(imagepath)
    imagepaths = np.array(imagepaths)
    np.random.shuffle(imagepaths)
    
    return imagepaths

In [5]:
def build_data(imagepaths):
    data = []
    for p in imagepaths:
        label = re.search(r"\/([A-Z])\/", p).group(0)[1]
        label = int(ord(label) - ord("A"))
        data.append((p, label))

    return np.array(data, dtype=object)

In [6]:
def train_val_split(imagepaths, val_amt):
    data = build_data(imagepaths)
    amt = data.shape[0]
    train = data[:int(amt*(1-val_amt))]
    val = data[int(amt*(1-val_amt)):]
    
    return train, val

In [7]:
def create_tfrecord(data, type="train"):
    np.random.shuffle(data)
    tfr_dir = "/data/tfrecords/"
    if not os.path.exists(cwd + tfr_dir):
        os.makedirs(cwd + tfr_dir)
    tfr_filename = "{0}.tfrecords".format(type)
    if os.path.isfile(cwd + tfr_dir + tfr_filename):
        os.remove(cwd + tfr_dir + tfr_filename)
    writer = tf.python_io.TFRecordWriter(cwd + tfr_dir + tfr_filename)
    
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
    for p in data:
        feature = {
            "image": _bytes_feature(tf.compat.as_bytes(p[0])),
            "label": _int64_feature(p[1])
        }

        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
            
    print(tfr_filename + " Complete")
    writer.close()
    
    return cwd + tfr_dir + tfr_filename

In [8]:
def _parse(proto):
    features = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
                "label": tf.FixedLenFeature((), tf.int64, default_value=0)}
    parsed_features = tf.parse_single_example(proto, features)
    image_file = tf.read_file(parsed_features["image"])
    image = tf.image.decode_image(image_file)
    label = parsed_features["label"]
    
    return image, label

In [9]:
def conv_layer(input, size_in, size_out, k_size, name):
    with tf.variable_scope(name) as scope:
        weights = tf.get_variable("weights", shape=[k_size, k_size, size_in, size_out])
        biases = tf.get_variable("biases", shape=[size_out], initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(input, weights, strides=[1,1,1,1], padding="SAME")
        conv = tf.nn.bias_add(conv, biases)
        act = tf.nn.relu(conv, scope.name)
        
        tf.summary.histogram("weights", weights)
        tf.summary.histogram("biases", biases)
        tf.summary.histogram("activations", act)
        
    return act

def batch_norm_layer(input, is_training, eps, name):
    with tf.variable_scope(name) as scope:
        gamma = tf.get_variable("gamma", shape=[input.shape[0]], initializer=tf.constant_initializer(1.0))
        beta = tf.get_variable("beta", shape=[input.shape[0]], initializer=tf.constant_initializer(0.0))
        mean, var = tf.nn.moments(input, [0,1,2])
        ema = tf.nn.ExponentialMovingAverage(decay=0.99)
        
        def mean_var_with_update():
            ema_apply_op = ema.apply([mean, var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(mean), tf.identity(var)
            
        mean, var = tf.cond(is_training, mean_var_with_update, lambda: (ema.average(mean), ema.average(var)))
        bn = tf.nn.batch_normalization(input, mean, var, gamma, beta, eps)
        
        tf.summary.histogram("gamma", gamma)
        tf.summary.histogram("beta", beta)
        
    return bn
        
def dropout_layer(input, dropout):
    do = tf.nn.dropout(input, dropout)
    
    return do

def maxpool_layer(input, name):
    return tf.nn.max_pool(input, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=name)

def fc_layer(input, size_in, size_out, name):
    with tf.variable_scope(name) as scope:
        weights = tf.get_variable("weights", shape=[size_in, size_out])
        biases = tf.get_variable("biases", shape=[size_out])
        act = tf.matmul(input, weights) + biases
        
        tf.summary.histogram("weights", weights)
        tf.summary.histogram("biases", biases)
        tf.summary.histogram("activations", act)
        
    return act

In [10]:
def model(X, dropout):
    
    tf.summary.image("input", X, 10)

    conv1 = conv_layer(X, 3, 64, 3, "conv1")
    conv2 = conv_layer(conv1, 64, 64, 3, "conv2")
    maxpool1 = maxpool_layer(conv2, "maxpool1")
    conv3 = conv_layer(maxpool1, 64, 64, 3, "conv3")
    conv4 = conv_layer(conv3, 64, 64, 3, "conv4")
    maxpool2 = maxpool_layer(conv4, "maxpool2")
    flatten = tf.reshape(maxpool2, shape=[-1, tf.reduce_prod(maxpool2.shape[1:])])
    fc1 = fc_layer(flatten, 160000, 1024, "fc1")
    dropout1 = dropout_layer(fc1, dropout)
    fc2 = fc_layer(dropout1, dropout1.shape[1], 1024, "fc2")
    dropout2 = dropout_layer(fc2, dropout)
    fc3 = fc_layer(dropout2, dropout2.shape[1], 26, "fc3")

    return fc3

In [11]:
def loss(logits, labels):
    with tf.name_scope("loss"):
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name="cross_entropy")

    tf.summary.scalar("cross_entropy", cross_entropy_mean)
        
    return cross_entropy_mean

In [12]:
def training(loss, learning_rate):
    with tf.name_scope("training"):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_op = optimizer.minimize(loss, global_step=global_step)

    return train_op

In [13]:
def evaluation(y_pred, y):
    correct = tf.equal(tf.argmax(tf.nn.softmax(y_pred), 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    tf.summary.scalar("accuracy", accuracy)
    
    return accuracy

In [14]:
imagepaths = get_imagepaths("train")
train, val = train_val_split(imagepaths, 0.1)
train_filepath = tf.placeholder(tf.string, name="train_filepath")
val_filepath = tf.placeholder(tf.string, name="val_filepath")

train_dataset = tf.data.TFRecordDataset(train_filepath).map(_parse)
train_dataset = train_dataset.batch(10)
val_dataset = tf.data.TFRecordDataset(val_filepath).map(_parse)
val_dataset = val_dataset.batch(100)

train_iter = train_dataset.make_initializable_iterator()
val_iter = val_dataset.make_initializable_iterator()
next_train = train_iter.get_next()
next_val = val_iter.get_next()

X = tf.placeholder(tf.float32, shape=[None, 200, 200, 3], name="X")
Y = tf.placeholder(tf.int64 , shape=[None, num_classes], name="Y")
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
logits = model(X, keep_prob)
loss = loss(logits, Y)
train_op = training(loss, 0.0001)
accuracy = evaluation(logits, Y)

In [17]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(cwd + "/checkpoints/")
    writer.add_graph(sess.graph)
    summary = tf.summary.merge_all()

    for e in range(10):
        train_fp = create_tfrecord(train, "train")
        sess.run(train_iter.initializer, feed_dict={train_filepath: train_fp})
        while True:
            try:
                image, label = sess.run(next_train)
                label = tf.keras.utils.to_categorical(label, num_classes)
                sess.run(train_op, feed_dict={X: image, Y: label, keep_prob: 0.5})
                break
            except tf.errors.OutOfRangeError:
                break
        l, acc, s= sess.run([loss, accuracy, summary], feed_dict={X: image, Y: label, keep_prob: 1.0})
        print("Epoch: {0}, Loss: {1}, Accuracy: {2}".format(e, l, acc))
        writer.add_summary(s, tf.train.get_global_step(graph=sess.graph))

train.tfrecords Complete
Epoch: 0, Loss: 199.20852661132812, Accuracy: 0.4000000059604645
train.tfrecords Complete
Epoch: 1, Loss: 930.8367309570312, Accuracy: 0.20000000298023224
train.tfrecords Complete
Epoch: 2, Loss: 409.23272705078125, Accuracy: 0.10000000149011612
train.tfrecords Complete
Epoch: 3, Loss: 509.79150390625, Accuracy: 0.30000001192092896
train.tfrecords Complete
Epoch: 4, Loss: 484.56219482421875, Accuracy: 0.10000000149011612
train.tfrecords Complete
Epoch: 5, Loss: 379.6321716308594, Accuracy: 0.0
train.tfrecords Complete
Epoch: 6, Loss: 366.2168884277344, Accuracy: 0.10000000149011612
train.tfrecords Complete
Epoch: 7, Loss: 316.76177978515625, Accuracy: 0.10000000149011612
train.tfrecords Complete
Epoch: 8, Loss: 439.8507385253906, Accuracy: 0.10000000149011612
train.tfrecords Complete
Epoch: 9, Loss: 334.611083984375, Accuracy: 0.10000000149011612
