### Importing necessary stuff !

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


 ### Reading in data (and defining necessary constants ) !

In [2]:
full_data = pd.read_csv('../input/train.csv')

In [3]:
LABEL = 'label'

In [4]:
NUM_PREDICTOR_COLS = 784
PREDICTOR_COLS = ['pixel' + str(i) for i in range(NUM_PREDICTOR_COLS)]

In [5]:
REL_COLUMNS = PREDICTOR_COLS.copy()
REL_COLUMNS.append('label')

### Checking for null values.

In [6]:
full_data.isnull().values.any()

False

### Split into training and validation sets

In [7]:
LEN_TRAIN_SET = int(0.8 * len(full_data))
train_data = full_data[0:LEN_TRAIN_SET]
validation_data = full_data[LEN_TRAIN_SET:len(full_data)]

In [8]:
assert(len(train_data) + len(validation_data) == len(full_data))

In [9]:
len(full_data)

42000

In [10]:
len(train_data)

33600

In [11]:
len(validation_data) 

8400

### Fit model on training data


In [12]:
def make_train_input_fn(df, num_epochs):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=df[LABEL],
        shuffle=True,
        num_epochs=num_epochs)

In [13]:
def make_eval_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=df[LABEL],
        shuffle=False)

In [14]:
def make_prediction_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=None,
        shuffle=False)    

In [15]:
def make_features():
    input_cols = [tf.feature_column.numeric_column(col) for col in PREDICTOR_COLS]
    return input_cols

In [16]:
X_train = train_data[PREDICTOR_COLS]
Y_train = np.ravel(train_data[['label']])

In [17]:
OUTDIR = 'classification_outputs'
import shutil
shutil.rmtree(OUTDIR, ignore_errors=True)

In [18]:
N_CLASSES = 10

In [19]:
HIDDEN_UNITS = [256, 32]

In [20]:
model = tf.estimator.DNNClassifier(feature_columns=make_features(),
                                   hidden_units=HIDDEN_UNITS,
                                   n_classes=N_CLASSES,
                                   dropout=0.2,
                                   optimizer=tf.train.AdamOptimizer(1e-3),
                                   model_dir=OUTDIR)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'classification_outputs', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10284e748>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [21]:
NUM_EPOCHS = 100
model.train(input_fn=make_train_input_fn(train_data[REL_COLUMNS], num_epochs=NUM_EPOCHS))    

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into classification_outputs/model.ckpt.
INFO:tensorflow:loss = 18152.53, step = 1
INFO:tensorflow:global_step/sec: 6.45309
INFO:tensorflow:loss = 315.3837, step = 101 (15.493 sec)
INFO:tensorflow:global_step/sec: 8.1686
INFO:tensorflow:loss = 260.54797, step = 201 (12.242 sec)
INFO:tensorflow:Saving checkpoints for 263 into classification_outputs/model.ckpt.
INFO:tensorflow:Loss for final step: 126.65021.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1814e11f98>

In [22]:
metrics = model.evaluate(input_fn = make_eval_input_fn(train_data[REL_COLUMNS]))
print(metrics['accuracy'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-18-16:20:16
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from classification_outputs/model.ckpt-263
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-18-16:20:56
INFO:tensorflow:Saving dict for global step 263: accuracy = 0.24056548, average_loss = 1.9816853, global_step = 263, loss = 253.17348
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 263: classification_outputs/model.ckpt-263
0.24056548


In [23]:
metrics = model.evaluate(input_fn = make_eval_input_fn(validation_data[REL_COLUMNS]))
print(metrics['accuracy'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-18-16:21:03
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from classification_outputs/model.ckpt-263
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-18-16:21:16
INFO:tensorflow:Saving dict for global step 263: accuracy = 0.24345239, average_loss = 1.9717286, global_step = 263, loss = 250.94727
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 263: classification_outputs/model.ckpt-263
0.24345239


### Making predictions on validation data

In [24]:
validation_predictions = model.predict(input_fn=make_prediction_input_fn(validation_data[PREDICTOR_COLS]))

validation_data = validation_data.assign(prediction=[int(i['classes'][0]) for i in validation_predictions])
validation_data['ImageId'] = validation_data.index
validation_data[['ImageId', 'label', 'prediction']].to_csv('validation_dnn_tensorflow.csv', index=False)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from classification_outputs/model.ckpt-263
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


### Making predictions on test data

In [25]:
test_data = pd.read_csv('../input/test.csv')

In [26]:
OUTDIR = 'classification_outputs'
import shutil
shutil.rmtree(OUTDIR, ignore_errors=True)

In [27]:
model = tf.estimator.DNNClassifier(feature_columns=make_features(),
                                   hidden_units=HIDDEN_UNITS,
                                   n_classes=N_CLASSES,
                                   dropout=0.2,
                                   optimizer=tf.train.AdamOptimizer(1e-3),
                                   model_dir=OUTDIR)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'classification_outputs', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x18386370b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [28]:
model.train(input_fn=make_train_input_fn(full_data[REL_COLUMNS], num_epochs=NUM_EPOCHS))    

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into classification_outputs/model.ckpt.
INFO:tensorflow:loss = 18480.39, step = 1
INFO:tensorflow:global_step/sec: 6.2345
INFO:tensorflow:loss = 267.40735, step = 101 (16.035 sec)
INFO:tensorflow:global_step/sec: 8.25883
INFO:tensorflow:loss = 272.87494, step = 201 (12.108 sec)
INFO:tensorflow:global_step/sec: 8.32126
INFO:tensorflow:loss = 273.00043, step = 301 (12.017 sec)
INFO:tensorflow:Saving checkpoints for 329 into classification_outputs/model.ckpt.
INFO:tensorflow:Loss for final step: 34.06228.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1851b53d68>

In [29]:
test_predictions = model.predict(input_fn=make_prediction_input_fn(test_data[PREDICTOR_COLS]))

In [30]:
test_data['Label'] = [int(i['classes'][0]) for i in test_predictions]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from classification_outputs/model.ckpt-329
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [31]:
test_data['ImageId'] = np.arange(1, len(test_data)+1)

In [32]:
test_data[['ImageId', 'Label']].to_csv('submission_dnn_tensorflow.csv', index=False)