In [1]:
import tensorflow as tf
import csv

# Input pipeline functions
Functions needed for processing train data

In [2]:
def get_csv_columns(file):
    
    with open(file, 'r') as f:
        reader = csv.reader(f)
        columns = next(reader)
        
    return columns

def create_array_types(file, type):
    return [type for c in get_csv_columns(file)]

train_record_defaults = create_array_types('train.csv', [1])
test_record_defaults = create_array_types('test.csv', [1])


def get_train_record(record):
    vector = tf.decode_csv(record, train_record_defaults)
    return vector[1:], vector[0]

def get_test_record(record):
    return tf.decode_csv(record, test_record_defaults)

def reshape_train_image(feature, label):
    return tf.reshape(feature, [28, 28, 1]), label

def reshape_test_image(feature):
    return tf.reshape(feature, [28, 28, 1])

## Input pipeline

In [3]:
def train_input_fn():
    filenames = ['train.csv']

    dataset = tf.contrib.data.Dataset.from_tensor_slices(filenames)
    dataset = dataset \
                .flat_map(lambda filename: (tf.contrib.data.TextLineDataset(filename).skip(1))) \
                .map(get_train_record) \
                .map(reshape_train_image) \
                .shuffle(buffer_size=10000)

    batch = dataset.batch(256)

    iterator = batch.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, tf.one_hot(tf.cast(labels, tf.int32), 10)

def test_input_fn():
    filenames = ['test.csv']

    dataset = tf.contrib.data.Dataset.from_tensor_slices(filenames)
    dataset = dataset \
                .flat_map(lambda filename: (tf.contrib.data.TextLineDataset(filename).skip(1))) \
                .map(get_test_record) \
                .map(reshape_test_image) \

    batch = dataset.batch(256)

    iterator = batch.make_one_shot_iterator()
    return iterator.get_next()

# Creating Convolutional 2D Network



## Classifier model

In [4]:
def cnn_model_fn(features, labels, mode):

    float_features = tf.cast(features, tf.float32)
    normalized_features = tf.layers.batch_normalization(float_features)
    
    conv1 = tf.layers.conv2d( \
                inputs=normalized_features, \
                filters=32, \
                kernel_size=[5, 5], \
                padding="same", \
                activation=tf.nn.relu \
            )

    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    
    conv2 = tf.layers.conv2d( \
                inputs=pool1, \
                filters=64, \
                kernel_size=[5, 5], \
                padding="same", \
                activation=tf.nn.relu \
            )
    
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    logits = tf.layers.dense(inputs=dropout, units=10)
    
    predictions = {
        'classes': tf.argmax(input=logits, axis = 1),
        'probabilities' : tf.nn.softmax(logits, name="softmax_tensor")
    }
    
    ## PREDICT MODE
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    
    ## FOR TRAINING AND EVALUATION MODE
    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
    
    
    ## TRAIN MODE
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    ## EVAL MODE
    eval_metric_ops = {
        'accuracy': tf.metrics.accuracy(labels=tf.argmax(labels, axis = 1), predictions=predictions["classes"])
    }
    return  tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)


## Second convolution

In [5]:
mnist_classifier = tf.estimator.Estimator(model_fn = cnn_model_fn, model_dir='/tmp/mnist_cnvnet_model')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_model_dir': '/tmp/mnist_cnvnet_model', '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_tf_random_seed': 1, '_session_config': None}


In [6]:
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

In [7]:
mnist_classifier.train( \
    input_fn=train_input_fn, \
    steps=200000, \
    hooks=[])

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/mnist_cnvnet_model\model.ckpt-2146
INFO:tensorflow:Saving checkpoints for 2147 into /tmp/mnist_cnvnet_model\model.ckpt.
INFO:tensorflow:loss = 0.0752455, step = 2147
INFO:tensorflow:global_step/sec: 4.59191
INFO:tensorflow:loss = 0.0722309, step = 2247 (21.777 sec)
INFO:tensorflow:Saving checkpoints for 2311 into /tmp/mnist_cnvnet_model\model.ckpt.
INFO:tensorflow:Loss for final step: 0.0255472.


<tensorflow.python.estimator.estimator.Estimator at 0x15f1da85eb8>

In [8]:
mnist_classifier.evaluate(input_fn=train_input_fn)

INFO:tensorflow:Starting evaluation at 2017-08-30-22:23:05
INFO:tensorflow:Restoring parameters from /tmp/mnist_cnvnet_model\model.ckpt-2311
INFO:tensorflow:Finished evaluation at 2017-08-30-22:23:33
INFO:tensorflow:Saving dict for global step 2311: accuracy = 0.978071, global_step = 2311, loss = 0.0722973


{'accuracy': 0.97807145, 'global_step': 2311, 'loss': 0.07229735}

In [40]:
def print_predictions(predictions, batch):
    with open('output.csv', 'a') as f:
        for i, p in enumerate(predictions):
            f.write('%s,%s\n' % (batch* i + 1, p['classes']))
        f.close()

In [None]:
with open('output.csv', 'w') as f:
    f.write('ImageId,Label\n')
    f.close()
    
for batch in range(300):
    predictions = mnist_classifier.predict(input_fn=test_input_fn)
    while True:
        try:
            print_predictions(predictions, batch+1)
        except tf.errors.OutOfRangeError:
            break

INFO:tensorflow:Restoring parameters from /tmp/mnist_cnvnet_model\model.ckpt-2146
