In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shutil
import os

In [2]:
validation_set_ratio = 0.3
learning_rate = 0.1
summery_dir = './logs_mlp'

In [3]:
all_train_csv = pd.read_csv('./train.csv')
test_csv = pd.read_csv('./test.csv')
all_train_csv.head(7)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
all_train_m = all_train_csv.as_matrix()

# Take first column for label, others for data
all_train_label = all_train_m[:, 0]
all_train_data = all_train_m[:, 1:]

test_data = test_csv.as_matrix()

In [5]:
# Preprocessing

def preprocess_data(data):
    # Data : int?(0~255) to float16(0.0~1.0)
    data = data.astype(np.float16) / 255

    # Reshape : [784] to [28][28]
    data = data.reshape([-1, 28, 28])
    
    return data

all_train_data = preprocess_data(all_train_data)
test_data = preprocess_data(test_data)

In [6]:
# Split train & validation datasets
num_all_train = len(all_train_label)
num_train_set = int(num_all_train * (1 - validation_set_ratio))
num_validate_set = num_all_train - num_train_set

train_label = all_train_label[:num_train_set]
train_data = all_train_data[:num_train_set]
validate_label = all_train_label[num_train_set:]
validate_data = all_train_data[num_train_set:]

'train', num_train_set, 'validate', num_validate_set

('train', 29399, 'validate', 12601)

In [7]:
# Placeholders
X = tf.placeholder(tf.float32, [None, 28, 28])
Y = tf.placeholder(tf.int64, [None])
Y_onehot = tf.one_hot(Y, 10)

In [8]:
# Defining model
X_flat = tf.layers.flatten(X)
L1 = tf.layers.dense(X_flat, 128, activation=tf.nn.relu, name='dense_1')
D1 = tf.layers.dropout(L1, 0.5)
L2 = tf.layers.dense(D1, 32, activation=tf.nn.relu, name='dense_2')
D2 = tf.layers.dropout(L2, 0.5)
L3 = tf.layers.dense(D2, 10, name='dense_3')
Y_ = tf.nn.softmax(L3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_onehot, logits=L3))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
prediction = tf.argmax(Y_, 1)
correct_prediction = tf.equal(prediction, Y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
# Summery
tf.summary.scalar('cost', cost)
tf.summary.scalar('accuracy', accuracy)

summaries = tf.summary.merge_all()

In [10]:
# Start session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Purge and write new summaries
try:
    shutil.rmtree(summery_dir)
except:
    pass
os.mkdir(summery_dir)
summary_writer = tf.summary.FileWriter(summery_dir + '/train', sess.graph)
summary_writer_validation = tf.summary.FileWriter(summery_dir + '/validation', sess.graph)

In [11]:
steps = 0

In [12]:
for n in range(500):
    _, step_cost, train_acc, summ = sess.run([train_step, cost, accuracy, summaries], feed_dict={
             X: train_data,
             Y: train_label
            })
    validate_acc, summ_validation = sess.run([accuracy, summaries], feed_dict={
        X: validate_data,
        Y: validate_label
    })
    
    summary_writer.add_summary(summ, steps)
    summary_writer_validation.add_summary(summ_validation, steps)
    
    if steps % 20 == 0:
        print('.', end='')
        print('step %d cost %.05f t_acc %.02f v_acc %.02f'
                % (steps, step_cost, train_acc, validate_acc))
        
    steps += 1

.step 0 cost 2.40397 t_acc 0.07 v_acc 0.22
.step 20 cost 1.22040 t_acc 0.59 v_acc 0.61
.step 40 cost 0.64515 t_acc 0.83 v_acc 0.82
.step 60 cost 0.41148 t_acc 0.88 v_acc 0.88
.step 80 cost 0.29746 t_acc 0.92 v_acc 0.90
.step 100 cost 0.24213 t_acc 0.93 v_acc 0.91
.step 120 cost 0.20485 t_acc 0.94 v_acc 0.92
.step 140 cost 0.17935 t_acc 0.95 v_acc 0.92
.step 160 cost 0.16584 t_acc 0.95 v_acc 0.93
.step 180 cost 0.14707 t_acc 0.96 v_acc 0.93
.step 200 cost 0.14393 t_acc 0.96 v_acc 0.92
.step 220 cost 0.12844 t_acc 0.96 v_acc 0.93
.step 240 cost 0.11396 t_acc 0.97 v_acc 0.93
.step 260 cost 0.10496 t_acc 0.97 v_acc 0.93
.step 280 cost 0.10860 t_acc 0.97 v_acc 0.93
.step 300 cost 0.09228 t_acc 0.97 v_acc 0.93
.step 320 cost 0.09047 t_acc 0.97 v_acc 0.92
.step 340 cost 0.15224 t_acc 0.96 v_acc 0.92
.step 360 cost 0.09724 t_acc 0.97 v_acc 0.93
.step 380 cost 0.07858 t_acc 0.98 v_acc 0.93
.step 400 cost 0.06982 t_acc 0.98 v_acc 0.93
.step 420 cost 0.06621 t_acc 0.98 v_acc 0.93
.step 440 cost 0

In [13]:
test_predictions = sess.run(prediction, feed_dict={
    X: test_data
})

In [14]:
# Generate a submit file

p = pd.DataFrame({
    'ImageId': range(1, len(test_predictions) + 1,),
    'Label': test_predictions
})

p.to_csv('./my_submit.csv', index=False)