Load Dataset

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

In [3]:
def make_val(train_data, val_size):

    val_idx = np.random.choice(range(len(train_data)), val_size, replace=False)
    val_data = train_data[val_idx]
    train_data = np.delete(train_data, val_idx, 0)

    return train_data, val_data

def make_onehot(dataset, cate_num):

    dataset = np.hstack((np.squeeze(np.eye(cate_num)[dataset[:,0]]),dataset[:,1:]))

    return dataset

In [4]:
def next_batch(dataset, N, i):
    left = i*N % len(dataset)
    right = (i+1)*N % len(dataset)
    
    if left < right :
        return dataset[left:right]
    else:
        return np.vstack((dataset[left:],dataset[:right]))

In [5]:
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
print(train.shape, test.shape)

(42000, 785) (28000, 784)


In [6]:
train_data = train[train.columns[:]]
train_data = np.array(train_data)
test_data = test[test.columns[:]]
test_data = np.array(test_data)

In [7]:
train_data = make_onehot(train_data,10)

In [8]:
train_data.shape

(42000, 794)

Build model

In [9]:
# hyper parameters
learning_rate = 0.001
training_epochs = 10
batch_size = 32

In [10]:
# input place holders
X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28, 1])   # img 28x28x1 (black/white)
Y = tf.placeholder(tf.float32, [None, 10])

In [11]:
# L1 ImgIn shape=(?, 28, 28, 1)
W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
#    Conv     -> (?, 28, 28, 32)
#    Pool     -> (?, 14, 14, 32)
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.leaky_relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1], padding='SAME')

# L2 ImgIn shape=(?, 14, 14, 32)
W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
#    Conv      ->(?, 14, 14, 64)
#    Pool      ->(?, 7, 7, 64)
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.nn.leaky_relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1], padding='SAME')

# L3 ImgIn shape=(?, 7, 7, 64)
W3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
#    Conv      ->(?, 7, 7, 128)
#    Pool      ->(?, 4, 4, 128)
#    Reshape   ->(?, 4 * 4 * 128) # Flatten them for FC
L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME')
L3 = tf.nn.leaky_relu(L3)
L3 = tf.nn.max_pool(L3, ksize=[1, 2, 2, 1], strides=[
                    1, 2, 2, 1], padding='SAME')
L3_flat = tf.reshape(L3, [-1, 128 * 4 * 4])

# L4 FC 4x4x128 inputs -> 625 outputs
W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([625]))
L4 = tf.nn.leaky_relu(tf.matmul(L3_flat, W4) + b4)

# L5 Final FC 625 inputs -> 10 outputs
W5 = tf.get_variable("W5", shape=[625, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
logits = tf.matmul(L4, W5) + b5

In [12]:
# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [13]:
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [14]:
# train my model
for epoch in range(training_epochs):
    print('{} Epoch Start!'.format(epoch + 1))
    avg_cost = 0
    avg_accu = 0
    total_batch = int(len(train_data)/ batch_size)

    for i in range(total_batch):
        batchs = next_batch(train_data, batch_size, i)
        train_batchs, val_batchs = make_val(batchs, int((len(batchs)*3)//10))
        batch_xs, batch_ys = train_batchs[:,10:], train_batchs[:,:10]
        c, accu, _ = sess.run([cost, accuracy, optimizer], feed_dict={X: batch_xs, Y: batch_ys})
        avg_cost += c / total_batch
        avg_accu += accu / total_batch
 
    val_accu = sess.run(accuracy, feed_dict={X: val_batchs[:,10:], Y: val_batchs[:,:10]})
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost = {} , train Accu = {}, val Accu = {}'.format(avg_cost, avg_accu, val_accu))


print('Learning Finished!')

1 Epoch Start!
Epoch: 0001 cost = 0.15498827482195615 , train Accu = 0.9523462401390472, val Accu = 1.0
2 Epoch Start!
Epoch: 0002 cost = 0.092885959740044 , train Accu = 0.9742179260384541, val Accu = 1.0
3 Epoch Start!
Epoch: 0003 cost = 0.08634625114214764 , train Accu = 0.9778632069596941, val Accu = 1.0
4 Epoch Start!
Epoch: 0004 cost = 0.11569232184355137 , train Accu = 0.9771672894769101, val Accu = 1.0
5 Epoch Start!
Epoch: 0005 cost = 0.09467623544587034 , train Accu = 0.9803486255156338, val Accu = 1.0
6 Epoch Start!
Epoch: 0006 cost = 0.10919176381808232 , train Accu = 0.9803486252430509, val Accu = 1.0
7 Epoch Start!
Epoch: 0007 cost = 0.13621000698603433 , train Accu = 0.9792881803359709, val Accu = 1.0
8 Epoch Start!
Epoch: 0008 cost = 0.1049766987236025 , train Accu = 0.9858496848766162, val Accu = 1.0
9 Epoch Start!
Epoch: 0009 cost = 0.13273415300430474 , train Accu = 0.9836625167418316, val Accu = 1.0
10 Epoch Start!
Epoch: 0010 cost = 0.1453674788985974 , train Accu 

In [26]:
pred1 = sess.run(tf.argmax(logits, 1), feed_dict={X: test_data[:14000]})
pred2 = sess.run(tf.argmax(logits, 1), feed_dict={X: test_data[14000:]})

In [33]:
pred= np.hstack((pred1,pred2))

In [36]:
pred

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

Submit

In [39]:
submission = pd.read_csv("data/sample_submission.csv")

In [42]:
submission["Label"] = pred

In [45]:
submission.to_csv("1st_Trial.csv", index=False)