In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import time

class Model:

    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
        self._build_net()

    def _build_net(self):
        with tf.variable_scope(self.name):
            # dropout (keep_prob) rate  0.7~0.5 on training, but should be 1
            # for testing
            self.training = tf.placeholder(tf.bool)

            # input place holders
            self.X = tf.placeholder(tf.float32, [None, 784])

            # img 28x28x1 (black/white), Input Layer
            X_img = tf.reshape(self.X, [-1, 28, 28, 1])
            self.Y = tf.placeholder(tf.float32, [None, 10])

            # Convolutional Layer #1
            conv1_1 = tf.layers.conv2d(inputs=X_img, filters=64, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv1_2 = tf.layers.conv2d(inputs=conv1_1, filters=64, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool1 = tf.layers.max_pooling2d(inputs=conv1_2, pool_size=[2, 2], padding="SAME", strides=2)
            #dropout1 = tf.layers.dropout(inputs=pool1, rate=0.7, training=self.training)

            # Convolutional Layer #2 and Pooling Layer #2
            conv2_1 = tf.layers.conv2d(inputs=pool1, filters=128, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv2_2 = tf.layers.conv2d(inputs=conv2_1, filters=128, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool2 = tf.layers.max_pooling2d(inputs=conv2_2, pool_size=[2, 2], padding="SAME", strides=2)
            #dropout2 = tf.layers.dropout(inputs=pool2, rate=0.7, training=self.training)

            # Convolutional Layer #3 and Pooling Layer #3
            conv3_1 = tf.layers.conv2d(inputs=pool2, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_2 = tf.layers.conv2d(inputs=conv3_1, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_3 = tf.layers.conv2d(inputs=conv3_2, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_4 = tf.layers.conv2d(inputs=conv3_3, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool3 = tf.layers.max_pooling2d(inputs=conv3_4, pool_size=[2, 2], padding="SAME", strides=2)
            #dropout3 = tf.layers.dropout(inputs=pool3, rate=0.7, training=self.training)

            # Dense Layer with Relu
            flat = tf.reshape(pool3, [-1, 256 * 4 * 4])
            dense4 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
            dropout4 = tf.layers.dropout(inputs=dense4, rate=0.5, training=self.training)

            # Logits (no activation) Layer: L5 Final FC 625 inputs -> 10 outputs
            self.logits = tf.layers.dense(inputs=dropout4, units=10)

        # define cost/loss & optimizer
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)

        correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
        self.correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, x_test, training=False):
        return self.sess.run(self.logits, feed_dict={self.X: x_test, self.training: training})

    def get_accuracy(self, x_test, y_test, training=False):
        return self.sess.run(self.accuracy, feed_dict={self.X: x_test, self.Y: y_test, self.training: training})
    
    def get_accuracy2(self, x_test, y_test, training=False):
        batch_size = 100 
        total_train_rows = y_test.shape[0]
        total_batch = int(total_train_rows / batch_size)
        total_correct_count = 0;
        for i in range(total_batch):
                begin = i * batch_size
                end = begin + batch_size
                batch_xs = train_x[begin : end].values # pandas 데이터에서 values로 값을 가져온다.
                batch_ys = train_y_onehot[begin : end].values
                cur_count = sess.run(self.correct_count, feed_dict={self.X: batch_xs, self.Y: batch_ys, self.training: training})
                total_correct_count += cur_count
        
        accuracy = total_correct_count / total_train_rows
        return accuracy

    def train(self, x_data, y_data, training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: x_data, self.Y: y_data, self.training: training})


  from ._conv import register_converters as _register_converters


In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

# feature 와 label 분리
train_y = train['label']
train_x = train.drop(labels='label', axis=1)

# label을 one-hot encoding
train_y_onehot = pd.get_dummies(train_y)

# train data count
total_train_rows = train_y_onehot.shape[0]

In [3]:
tf.set_random_seed(777)  # reproducibility

# hyper parameters
learning_rate = 0.001
training_epochs = 50
batch_size = 100

# initialize
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 

models = []
num_models = 10
for m in range(num_models):
    models.append(Model(sess, "model" + str(m)))
    
sess.run(tf.global_variables_initializer())

print('Learning Started!')

start_time = time.time()

# train my model
for epoch in range(training_epochs):
    avg_cost_list = np.zeros(len(models))
    total_batch = int(total_train_rows / batch_size)
    start_time_epoch = time.time()
    for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = train_x[begin : end].values
        batch_ys = train_y_onehot[begin : end].values

        # train each model
        for m_idx, m in enumerate(models):
            c, _ = m.train(batch_xs, batch_ys)
            avg_cost_list[m_idx] += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', avg_cost_list, " %.2f seconds" % (time.time() - start_time_epoch))

print('Learning Finished!')
print("--- %.2f seconds ---" %(time.time() - start_time))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Learning Started!
Epoch: 0001 cost = [0.33361966 0.30564576 0.28958323 0.32117981 0.3370779  0.29789731
 0.28877228 0.33582618 0.31967997 0.32934264]  230.63 seconds
Epoch: 0002 cost = [0.0662393  0.0662567  0.06156418 0.06846898 0.06405795 0.06378836
 0.06132336 0.06782933 0.06581233 0.06184477]  228.08 seconds
Epoch: 0003 cost = [0.04699357 0.04742366 0.04899674 0.05074114 0.04702732 0.04886975
 0.04769674 0.04753123 0.0520472  0.0472246 ]  227.56 seconds
Epoch: 0004 cost = [0.03850046 0.03988442 0.04125413 0.03759629 0.03816772 0.03552198
 0.03774074 0.04425926 0.03634452 0.03811041]  227.49 seconds
Epoch: 0005 cost = [0.0357471  0.03264071 0.03392928 0.03677256 0.03214378 0.0339181
 0.03386672 0.03566638 0.03487127 0.03485209]  227.46 seconds
Epoch: 0006 cost = [0.03314746 0.03017155 0.02898727 0

In [4]:
# 학습 데이터로 predict
test_size = total_train_rows
predictions = np.zeros([test_size, 10])

ensemble_correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(train_y_onehot, 1))
ensemble_accuracy = tf.reduce_mean(tf.cast(ensemble_correct_prediction, tf.float32))

total_correct_count = np.zeros([num_models])

for m_idx, m in enumerate(models):
    correct_prediction = tf.equal(tf.argmax(m.logits, 1), tf.argmax(m.Y, 1))
    correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
    
    for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = train_x[begin : end].values # pandas 데이터에서 values로 값을 가져온다.
        batch_ys = train_y_onehot[begin : end].values
        
        cur_count = sess.run(correct_count, feed_dict={m.X: batch_xs, m.Y: batch_ys, m.training: False})
        total_correct_count[m_idx] += cur_count

        p = sess.run(m.logits, feed_dict={m.X: batch_xs, m.training: False})
        predictions[begin:end] += p
        
for  m_idx, m in enumerate(models):
    accuracy = total_correct_count[m_idx] / total_train_rows
    print(m_idx, 'Accuracy:', accuracy)
    
print('Ensemble accuracy:', sess.run(ensemble_accuracy))

0 Accuracy: 0.998
1 Accuracy: 0.9970476190476191
2 Accuracy: 0.9970238095238095
3 Accuracy: 0.9968095238095238
4 Accuracy: 0.9990714285714286
5 Accuracy: 0.9984047619047619
6 Accuracy: 0.996547619047619
7 Accuracy: 0.9975238095238095
8 Accuracy: 0.9993333333333333
9 Accuracy: 0.998452380952381
Ensemble accuracy: 0.09838095


In [5]:
# 테스트 데이터로 predict
test_result = []
predictions = np.zeros([28000, 10])

for m_idx, m in enumerate(models):

    for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = test[begin : end].values
        p = sess.run(m.logits, feed_dict={m.X: batch_xs, m.training: False})
        predictions[begin:end] += p
        
test_prediction = tf.argmax(predictions, 1)
test_result = sess.run(test_prediction)

# pandas 형태로 predict column 생성
pd_test_result = pd.Series(test_result, name="Label")

# ImageId column 생성해서 predict와 결합
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),pd_test_result],axis = 1)

# submission 파일 생성
submission.to_csv("submission_mnist_cnn_ensemble.csv",index=False)

In [6]:
# 'Saver' op to save and restore all the variables
saver = tf.train.Saver()

model_path = "./ensemble.ckpt"

# Save model weights to disk
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)

# Restore model weights from previously saved model
saver.restore(sess, model_path)
print("Model restored from file: %s" % save_path)

# 학습 데이터로 predict
test_size = total_train_rows
predictions = np.zeros([test_size, 10])

ensemble_correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(train_y_onehot, 1))
ensemble_accuracy = tf.reduce_mean(tf.cast(ensemble_correct_prediction, tf.float32))

total_correct_count = np.zeros([num_models])

for m_idx, m in enumerate(models):
    correct_prediction = tf.equal(tf.argmax(m.logits, 1), tf.argmax(m.Y, 1))
    correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
    
    for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = train_x[begin : end].values # pandas 데이터에서 values로 값을 가져온다.
        batch_ys = train_y_onehot[begin : end].values
        
        cur_count = sess.run(correct_count, feed_dict={m.X: batch_xs, m.Y: batch_ys, m.training: False})
        total_correct_count[m_idx] += cur_count

        p = sess.run(m.logits, feed_dict={m.X: batch_xs, m.training: False})
        predictions[begin:end] += p
        
for  m_idx, m in enumerate(models):
    accuracy = total_correct_count[m_idx] / total_train_rows
    print(m_idx, 'Accuracy:', accuracy)
    
print('Ensemble accuracy:', sess.run(ensemble_accuracy))

Model saved in file: ./ensemble.ckpt
INFO:tensorflow:Restoring parameters from ./ensemble.ckpt
Model restored from file: ./ensemble.ckpt
0 Accuracy: 0.998
1 Accuracy: 0.9970476190476191
2 Accuracy: 0.9970238095238095
3 Accuracy: 0.9968095238095238
4 Accuracy: 0.9990714285714286
5 Accuracy: 0.9984047619047619
6 Accuracy: 0.996547619047619
7 Accuracy: 0.9975238095238095
8 Accuracy: 0.9993333333333333
9 Accuracy: 0.998452380952381
Ensemble accuracy: 0.09838095
