In [59]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tensorflow as tf
from PIL import Image
import glob
import shutil
import os
import re

%matplotlib inline

In [3]:
filepath = "/data/asl_alphabet_{0}/"
cwd = os.getcwd()

In [75]:
num_classes = 26
batch_size = 117
lr = 0.0001

# Data Exploration and Preprocessing

In [51]:
def get_imagepaths(type="train"):
    imagepaths = []

    for dir in glob.glob(cwd + filepath.format(type) + "*/"):
        for imagepath in glob.glob(dir + "*.jpg"):
            imagepaths.append(imagepath)
    imagepaths = np.array(imagepaths)
    np.random.shuffle(imagepaths)
    return imagepaths

In [34]:
def build_data(imagepaths):
    data = []
    for p in imagepaths:
        label = re.search(r"\/([A-Z])\/", p).group(0)[1]
        data.append((p, label))

    return np.array(data)

In [69]:
def train_val_split(imagepaths, val_amt):
    data = build_data(imagepaths)
    amt = data.shape[0]
    train = data[:int(amt*(1-val_amt))]
    val = data[int(amt*(1-val_amt)):]
    
    return train, val

In [35]:
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [72]:
def create_tfrecord(data, type="train"):
    tfr_dir = "/data/tfrecords/"
    if not os.path.exists(cwd + tfr_dir):
        os.makedirs(cwd + tfr_dir)
    tfr_filename = "{0}.tfrecords".format(type)
    if os.path.isfile(cwd + tfr_dir + tfr_filename):
        os.remove(cwd + tfr_dir + tfr_filename)
    writer = tf.python_io.TFRecordWriter(cwd + tfr_dir + tfr_filename)
    
    for i, p in enumerate(data):
        feature = {
            "train/image": _bytes_feature(p[0].encode()),
            "train/label": _bytes_feature(p[1].encode())
        }

        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
            
    print(tfr_filename + " Complete")
    writer.close()

In [73]:
imagepaths = get_imagepaths("train")
train, val = train_val_split(imagepaths, 0.1)
print("Train data size: {0}".format(train.shape[0]))
print("Validation data size: {0}".format(val.shape[0]))
create_tfrecord(train, "train")
create_tfrecord(val, "validation")

Train data size: 70200
Validation data size: 7800
train.tfrecords Complete
validation.tfrecords Complete


In [79]:
X = tf.placeholder(tf.float32, [None, 30, 30, 3], name="input")
Y = tf.placeholder(tf.int32, [None, num_classes], name="output")
lr = tf.placeholder(tf.float64, name="learning_rate")
batch = tf.placeholder(tf.int64, name="batch_size")
train_filepath = tf.placeholder(tf.string, name="train_filepath")
val_filepath = tf.placeholder(tf.string, name="val_filepath")

In [80]:
def setup_datasets():
    train_dataset = tf.data.TFRecordDataset(train_filepath)
    train_dataset = train_dataset.batch(batch)
    val_dataset = tf.data.TFRecordDataset(val_filepath)
    
    iterator = tf.data.Iterator.from_structure(train_dataset.output_types,
                                               train_dataset.output_shapes)
    
    next_elem = iterator.get_next()
    train_init_op = iterator.make_initializer(train_dataset)
    val_init_op = iterator.make_initializer(val_dataset)

In [81]:
def model(input):
    pass