In [26]:
import io
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

In [15]:
filename = '/tmp/data/mnist/tmpbjd0WU/train_shuffled-00000-of-00001.gz'

In [16]:
filenames = [filename]
raw_dataset = tf.data.TFRecordDataset(filenames, compression_type='GZIP')

In [17]:
FEATURE_DESCRIPTION = {
    'image': tf.FixedLenFeature([], tf.string, default_value=''),
    'label': tf.FixedLenFeature([], tf.int64, default_value=0),
}

def _parse_function(example_proto):
    # Parse the input tf.Example proto using the dictionary above.
    return tf.parse_single_example(example_proto, FEATURE_DESCRIPTION)

parsed_dataset = raw_dataset.map(_parse_function)

In [27]:
record = next(iter(parsed_dataset.take(1)))

### decode to `np.ndarray`

In [19]:
def convert_parsed_record_to_ndarray(parsed_record):
    x = parsed_record['image']
    x_np = x.numpy()
    bytestream = io.BytesIO(x_np)
    rows = 28
    cols = 28
    num_images = 1
    buf = bytestream.read(rows * cols * num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    shape = (rows, cols, num_images)
    data = data.reshape(*shape)
    assert isinstance(data, np.ndarray), type(data)
    assert data.shape == shape
    return data

img = convert_parsed_record_to_ndarray(record)
type(img), img.shape

(numpy.ndarray, (28, 28, 1))

In [20]:
record['label']

<tf.Tensor: id=71, shape=(), dtype=int64, numpy=4>

### convert byte string tensor to array

In [21]:
IMAGE_KEY = 'image'
LABEL_KEY = 'label'
IMAGE_PIXELS = 784
features = record

image = tf.decode_raw(features[IMAGE_KEY], tf.uint8)
image.set_shape((IMAGE_PIXELS))  # 784

# Convert label from a scalar uint8 tensor to an int32 scalar.
label = tf.cast(features[LABEL_KEY], tf.int32)

In [22]:
image.shape

TensorShape([Dimension(784)])

In [23]:
label

<tf.Tensor: id=77, shape=(), dtype=int32, numpy=4>

In [24]:
!pwd

/Users/aaron/Documents/github/transform


In [28]:
HEIGHT = WIDTH = 28
image = tf.reshape(image, (HEIGHT, WIDTH))
image.shape

TensorShape([Dimension(28), Dimension(28)])

In [29]:
from examples.aaron_simple_example import *

In [42]:
from logzero import logger

TRAIN_NUM_EPOCHS = 10

def get_feature_columns():
    image_column = tf.feature_column.numeric_column(IMAGE_KEY, shape=[HEIGHT, WIDTH])
    return [image_column]

def make_input_fn(filename, batch_size):
    def input_fn():
        dataset = tf.data.TFRecordDataset([filename],
                                      compression_type='GZIP')
        dataset = dataset.map(decode)
        dataset = dataset.map(augment)
        dataset = dataset.batch(batch_size)
        steps = NUM_TRAIN_INSTANCES // TRAIN_BATCH_SIZE
        dataset = dataset.repeat(TRAIN_NUM_EPOCHS * steps)
        image, label = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
        return {IMAGE_KEY: image}, label
    return input_fn

estimator = tf.estimator.DNNClassifier(
    feature_columns=get_feature_columns(),
    hidden_units=[256, 32],
    optimizer=tf.train.AdamOptimizer(1e-4),
    n_classes=10,
    dropout=0.1,
)

# train
train_steps = TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES / TRAIN_BATCH_SIZE
logger.info('train_steps: %s', train_steps)

estimator.train(
    input_fn=make_input_fn(
        filename='/tmp/data/mnist/tmpbjd0WU/train_shuffled-00000-of-00001.gz',
        batch_size=28
    ),
    max_steps=train_steps)

# eval
result = estimator.evaluate(
    input_fn=make_input_fn(
        filename='/tmp/data/mnist/tmpbjd0WU/test_shuffled-00000-of-00001.gz',
        batch_size=1
    ),
    steps=NUM_TEST_INSTANCES)

result

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1278b9890>, '_model_dir': '/var/folders/k5/bp3zwqms0bx9sp3p4mfl4vf80000gn/T/tmpvzt8yz', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': None, '_train_distribute': None, '_master': ''}


[I 190515 06:15:01 <ipython-input-42-12c3145a1832>:32] train_steps: 4687


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/k5/bp3zwqms0bx9sp3p4mfl4vf80000gn/T/tmpvzt8yz/model.ckpt.
INFO:tensorflow:loss = 127.95053, step = 1
INFO:tensorflow:global_step/sec: 101.243
INFO:tensorflow:loss = 33.263496, step = 101 (0.990 sec)
INFO:tensorflow:global_step/sec: 218.671
INFO:tensorflow:loss = 11.902019, step = 201 (0.457 sec)
INFO:tensorflow:global_step/sec: 241.799
INFO:tensorflow:loss = 5.7286205, step = 301 (0.414 sec)
INFO:tensorflow:global_step/sec: 244.35
INFO:tensorflow:loss = 13.468685, step = 401 (0.410 sec)
INFO:tensorflow:global_step/sec: 244.571
INFO:tensorflow:loss = 2.9759612, step = 501 (0.409 sec)
INFO:tensorflow:global_step/sec: 241.024
INFO:tensorflow:loss = 6.0545306, step = 601 (0.415 sec)
INFO:tensorflow:

{'accuracy': 0.4,
 'average_loss': 5.4092455,
 'global_step': 4680,
 'loss': 5.4092455}