In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tensorflow as tf
from PIL import Image
import glob
import os
import re

%matplotlib inline

In [3]:
filepath = "/data/asl_alphabet_{0}/"
cwd = os.getcwd()

In [4]:
num_classes = 26
batch_size = 117
train_size = 70200
val_size = 7800

# Data Exploration and Preprocessing

In [5]:
def get_imagepaths(type="train"):
    imagepaths = []

    for dir in glob.glob(cwd + filepath.format(type) + "*/"):
        for imagepath in glob.glob(dir + "*.jpg"):
            imagepaths.append(imagepath)
    imagepaths = np.array(imagepaths)
    np.random.shuffle(imagepaths)
    
    return imagepaths

In [6]:
def build_data(imagepaths):
    data = []
    for p in imagepaths:
        label = re.search(r"\/([A-Z])\/", p).group(0)[1]
        data.append((p, label))

    return np.array(data)

In [7]:
def train_val_split(imagepaths, val_amt):
    data = build_data(imagepaths)
    amt = data.shape[0]
    train = data[:int(amt*(1-val_amt))]
    val = data[int(amt*(1-val_amt)):]
    
    return train, val

In [9]:
def create_tfrecord(data, type="train"):
    np.random.shuffle(data)
    tfr_dir = "/data/tfrecords/"
    if not os.path.exists(cwd + tfr_dir):
        os.makedirs(cwd + tfr_dir)
    tfr_filename = "{0}.tfrecords".format(type)
    if os.path.isfile(cwd + tfr_dir + tfr_filename):
        os.remove(cwd + tfr_dir + tfr_filename)
    writer = tf.python_io.TFRecordWriter(cwd + tfr_dir + tfr_filename)
    
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    
    for p in data:
        feature = {
            "image": _bytes_feature(tf.compat.as_bytes(p[0])),
            "label": _bytes_feature(tf.compat.as_bytes(p[1]))
        }

        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
            
    print(tfr_filename + " Complete")
    writer.close()
    
    return cwd + tfr_dir + tfr_filename

In [10]:
def _parse(proto):
    features = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
                "label": tf.FixedLenFeature((), tf.string, default_value="")}
    parsed_features = tf.parse_single_example(proto, features)
    image_file = tf.read_file(parsed_features["image"])
    image = tf.image.decode_image(image_file)
    label = parsed_features["label"]
    
    return image, label

In [None]:
def conv_layer(input, size_in, size_out, k_size, name):
    with tf.variable_scope(name) as scope:
        weights = tf.get_variable("weights", shape=[k_size, k_size, size_in, size_out])
        biases = tf.get_variable("biases", shape=[size_out], initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(input, weights, strides=[1,1,1,1], padding="SAME")
        conv = tf.bias_add(conv, biases)
        act = tf.nn.relu(conv, scope.name)
        
        tf.summary.histogram("weights", weights)
        tf.summary.histogram("biases", biases)
        tf.summary.histogram("activations", act)
        
    return act

def batch_norm_layer(input, is_training, eps, name):
    with tf.variable_scope(name) as scope:
        gamma = tf.get_variable("gamma", shape=[input.shape[0]], initializer=tf.constant_initializer(1.0))
        beta = tf.get_variable("beta", shape=[input.shape[0]], initializer=tf.constant_initializer(0.0))
        mean, var = tf.nn.moments(input, [0,1,2])
        ema = tf.nn.ExponentialMovingAverage(decay=0.99)
        
        def mean_var_with_update():
            ema_apply_op = ema.apply([mean, var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(mean), tf.identity(var)
            
        mean, var = tf.cond(is_training, mean_var_with_update, lambda: (ema.average(mean), ema.average(var)))
        bn = tf.nn.batch_normalization(input, mean, var, gamma, beta, eps)
        
        tf.summary.histogram("gamma", gamma)
        tf.summary.histogram("beta", beta)
        
    return bn
        
def dropout_layer(input, dropout):
    do = tf.nn.dropout(input, dropout)
    
    return do

def maxpool_layer(input, name):
    return tf.nn.max_pool(input, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=name)

def fc_layer(input, size_in, size_out, name):
    with tf.variable_scope(name) as scope:
        weights = tf.get_variable("weights", shape=[size_in, size_out])
        biases = tf.get_variable("biases", shape=[size_out])
        act = tf.matmul(reshape, weights) + biases
        
        tf.summary.histogram("weights", weights)
        tf.summary.histogram("biases", biases)
        tf.summary.histogram("activations", act)
        
    return act

In [None]:
def model(X, dropout):
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.float32, [None, 30, 30, 3], name="input")
    tf.summary.image("input", X, 10)
    Y = tf.placeholder(tf.int32, [None, num_classes], name="output")

    conv1 = conv_layer(X, 3, 64, 3, "conv1")
    conv2 = conv_layer(conv2, 64, 64, 3, "conv2")
    maxpool1 = maxpool_layer(conv2, "maxpool1")
    conv3 = conv_layer(maxpool1, 64, 64, 3, "conv3")
    conv4 = conv_layer(conv3, 64, 64, 3, "conv4")
    maxpool2 = maxpool_layer(conv4, "maxpool")
    flatten = tf.reshape(maxpool2, shape=[-1, tf.multiply(maxpool2.shape()[1:])])
    fc1 = fc_layer(flatten, flatten.shape()[1], 1024, "fc1")
    dropout1 = dropout_layer(fc1, dropout)
    fc2 = fc_layer(dropout1, dropout1.shape()[1], 1024, "fc2")
    dropout2 = dropout_layer(fc2, dropout)
    fc3 = fc_layer(dropout2, dropout2.shape()[1], 26, "fc3")

    return fc3

In [None]:
def loss(logits, labels):
    with tf.name_scope("loss"):
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name="cross_entropy")

        tf.summary.scalar("cross entropy", cross_entropy_mean)
        
    return cross_entropy_mean

In [None]:
def training(loss, learning_rate):
    with tf.name_scope("training"):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_op = optimizer.minimize(loss, global_step=global_step)

    return train_op

In [None]:
def evaluation()

In [None]:
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    tf.summary.scalar("accuracy", accuracy)

In [15]:
imagepaths = get_imagepaths("train")
train, val = train_val_split(imagepaths, 0.1)
print("train shape: {0}".format(train.shape))
print("val shape: {0}".format(val.shape))
train_filepath = tf.placeholder(tf.string)
val_filepath = tf.placeholder(tf.string)
train_dataset = tf.data.TFRecordDataset(train_filepath).map(_parse)
train_dataset = train_dataset.batch(5)
val_dataset = tf.data.TFRecordDataset(val_filepath).map(_parse)
val_dataset = val_dataset.batch(10)


train_iter = train_dataset.make_initializable_iterator()
val_iter = val_dataset.make_initializable_iterator()
next_train = train_iter.get_next()
next_val = val_iter.get_next()

with tf.Session() as sess:

    for _ in range(10):
        train_fp = create_tfrecord(train, "train")
        sess.run(train_iter.initializer, feed_dict={train_filepath: train_fp})
        image, label = sess.run(next_train)
        label = label.astype("U1")
        print(label)
        print("\n")
        val_fp = create_tfrecord(val, "val")
        sess.run(val_iter.initializer, feed_dict={val_filepath: val_fp})
        for _ in range(10):
            image1, label1 = sess.run(next_val)
            label1 = label1.astype("U1")
            print(label1)
        print("\n")


train shape: (70200, 2)
val shape: (7800, 2)
train.tfrecords Complete
['K' 'E' 'A' 'X' 'P']


val.tfrecords Complete
['D' 'I' 'K' 'G' 'V' 'L' 'Z' 'Y' 'L' 'T']
['A' 'K' 'K' 'J' 'V' 'O' 'J' 'L' 'Z' 'O']
['K' 'H' 'O' 'R' 'Z' 'Z' 'E' 'Q' 'B' 'K']
['N' 'D' 'Q' 'E' 'B' 'M' 'S' 'M' 'Z' 'O']
['F' 'V' 'R' 'Y' 'X' 'V' 'F' 'J' 'Z' 'V']
['H' 'C' 'G' 'L' 'L' 'T' 'Y' 'A' 'P' 'R']
['S' 'W' 'L' 'Q' 'L' 'A' 'C' 'I' 'V' 'I']
['V' 'N' 'W' 'D' 'Q' 'T' 'F' 'K' 'W' 'F']
['S' 'P' 'I' 'D' 'U' 'D' 'Q' 'I' 'F' 'Z']
['I' 'C' 'U' 'Y' 'Q' 'H' 'D' 'R' 'I' 'X']


train.tfrecords Complete
['T' 'R' 'O' 'H' 'T']


val.tfrecords Complete
['J' 'N' 'C' 'T' 'O' 'Y' 'Y' 'J' 'C' 'N']
['G' 'S' 'B' 'A' 'M' 'F' 'J' 'W' 'O' 'U']
['C' 'O' 'X' 'S' 'N' 'J' 'F' 'K' 'I' 'S']
['K' 'Q' 'K' 'T' 'T' 'U' 'Y' 'S' 'I' 'A']
['V' 'M' 'B' 'O' 'H' 'Y' 'N' 'P' 'B' 'A']
['F' 'D' 'C' 'L' 'F' 'T' 'W' 'Z' 'D' 'Z']
['P' 'Y' 'V' 'D' 'L' 'K' 'K' 'Q' 'Y' 'H']
['D' 'D' 'W' 'S' 'W' 'J' 'D' 'W' 'D' 'S']
['X' 'O' 'N' 'S' 'P' 'Z' 'E' 'B' 'I' 'J']
['E' 'V' 'V