In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

tf.logging.set_verbosity(tf.logging.INFO)

In [2]:
# reading the Kaggle datasets
train = pd.read_csv('../input/train.csv') # training dataset
infer = pd.read_csv('../input/test.csv')  # prediction dataset

In [3]:
# summary
print('Size of the training dataset {}'.format(train.shape))
print('Size of the inference dataset {}'.format(infer.shape))

Size of the training dataset (42000, 785)
Size of the inference dataset (28000, 784)


In [4]:
X = train.drop('label',axis=1)
y = train['label']

# normalizing the images
X = X / 255.0
infer = infer / 255.0

X = X.values.tolist()
infer = infer.values.tolist()

X = np.asarray(X,dtype=np.float32)
y = np.asarray(y,dtype=np.int32)
infer = np.asarray(infer,dtype=np.float32)

In [5]:
# defining the structure of the CNN

def cnn_model_function(features, labels, mode):
    
    # input layer
    # reshaping x to 4-D tensor: [batch_size, width, height, channels]
    layer_1 = tf.reshape(features['x'], [-1, 28, 28, 1]) 
    
    # convolution layer 1
    # computes 32 features using 10x10 filter with ReLU activation.
    # input tensor: [batch_size, 28, 28, 1]
    # output tensor: [batch_size, 28, 28, 32]
    layer_2 = tf.layers.conv2d(                          
                inputs=layer_1,
                filters=32,
                kernel_size=[10, 10],
                padding="same",
                activation=tf.nn.relu)
    
    # convolution layer 2
    # computes 32 features using 5x5 filter with ReLU activation.
    # input tensor: [batch_size, 28, 28, 32]
    # output tensor: [batch_size, 28, 28, 64]
    layer_3 = tf.layers.conv2d(
                inputs=layer_2,
                filters=64,
                kernel_size=[5, 5],
                padding="same",
                activation=tf.nn.relu)
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 28, 28, 64]
    # Output Tensor Shape: [batch_size, 28 * 28 * 64]
    layer_4 = tf.reshape(layer_3, [-1, 28 * 28 * 64])
    
    # Dense Layer
    # Densely connected layer with 1024 neurons
    # Input Tensor Shape: [batch_size, 28 * 28 * 64]
    # Output Tensor Shape: [batch_size, 1024]
    layer_5 = tf.layers.dense(inputs=layer_4, units=1024, activation=tf.nn.relu)
    
    # Dropout operation; 0.6 probability that element will be kept
    layer_6 = tf.layers.dropout(inputs=layer_5, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))
    
    # Logits layer
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 10]
    logits = tf.layers.dense(inputs=layer_6, units=10)
    
    predictions = {
                "classes": tf.argmax(input=logits, axis=1),
                "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    
    accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions['classes'], name='acc_op')
    tf.summary.scalar('accuracy', accuracy[1])
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    eval_metric_ops = {
                    "accuracy": tf.metrics.accuracy(
                    labels=labels, predictions=predictions["classes"])}

    return tf.estimator.EstimatorSpec(
                mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [6]:
# defining input function for training

train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={'x':X},
      y=y,
      shuffle=True,
      num_epochs=1,
      batch_size=80)

In [7]:
# defining the input function for evaluating the model performance

eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x':X},
    y=y,
    shuffle=False,
    num_epochs=1)

In [8]:
#tensors_to_log = {"probabilities": "softmax_tensor"}
#logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

In [56]:
OUTDIR = './CNN_CLASSIFIER'

cnn_classifier = tf.estimator.Estimator(model_fn=cnn_model_function, model_dir=OUTDIR)

file_writer = tf.summary.FileWriter(OUTDIR)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './CNN_CLASSIFIER', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001130579E160>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [57]:
def train_and_evaluate(estimator, epochs=10):
    for i in range(epochs):
        estimator.train(input_fn=train_input_fn)
        estimator.evaluate(input_fn=eval_input_fn)

In [59]:
train_and_evaluate(cnn_classifier)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./CNN_CLASSIFIER\model.ckpt-3150
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 3150 into ./CNN_CLASSIFIER\model.ckpt.
INFO:tensorflow:loss = 0.012369426, step = 3151


KeyboardInterrupt: 

In [62]:
# prediction input function
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x':infer},
    shuffle=False,
    num_epochs=1)

# creating a generator to iterate through the predictions
generator = cnn_classifier.predict(input_fn=predict_input_fn)

# storing all predictions (plus probabilities into an array of dictionaries)
predictions = [next(generator) for i in range(len(infer))]

# storing the class predictions in a separate array
classes = [predictions[i]['classes'] for i in range(len(predictions))]

ERROR:tensorflow:Couldn't match files for checkpoint ./CNN_CLASSIFIER\model.ckpt-3150
INFO:tensorflow:Could not find trained model in model_dir: ./CNN_CLASSIFIER, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [63]:
def generate_submission_file(predictions, fileName):
    submission = pd.DataFrame()
    
    submission['ImageId'] = range(1,28001,1)
    submission['Label'] = predictions
    submission.set_index('ImageId', inplace=True)
    submission.to_csv('../output/' + fileName)
    
generate_submission_file(classes, 'submission-07.csv')