# 手書き文字の認識
## mnist (CNN版)

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

# データの読み込み
mnist = input_data.read_data_sets('.data/', one_hot=True)

# 訓練データの取得
train_images, train_labels = mnist.train.next_batch(50) # ミニバッチ数を指定して取得

# テスト用の画像データを取得
test_images = mnist.test.images

# テスト用のラベルデータ(正解)を取得
test_labels = mnist.test.labels

Extracting .data/train-images-idx3-ubyte.gz
Extracting .data/train-labels-idx1-ubyte.gz
Extracting .data/t10k-images-idx3-ubyte.gz
Extracting .data/t10k-labels-idx1-ubyte.gz


In [3]:
# 入力データを定義
x = tf.placeholder(tf.float32, [None, 784])

# 入力画像をログに出力
img = tf.reshape(x, [-1, 28, 28, 1]) # reshape の '-1' は他の次元のサイズから適切な大きさが設定される
tf.summary.image('input_data', img, 10)

# 畳み込み層1
f1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1)) # [縦, 横, チャンネル数, フィルタ枚数]
conv1 = tf.nn.conv2d(img, f1, strides=[1, 1, 1, 1], padding='SAME')
b1 = tf.Variable(tf.constant(0.1, shape=[32]))
h_conv1 = tf.nn.relu(conv1 + b1)

# プーリング層1
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # [28px, 28px, 1ch] > [14px, 14px, 32ch]

# 畳み込み層2
f2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
conv2 = tf.nn.conv2d(h_pool1, f2, strides=[1, 1, 1, 1], padding='SAME')
b2 = tf.Variable(tf.constant(0.1, shape=[64]))
h_conv2 = tf.nn.relu(conv2 + b2)

# プーリング層2
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # [14px, 14px, 32ch] > [7px, 7px, 64ch]

# 畳み込みをフラットに
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])

# 全結合層
w_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)

# 出力層
w_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b_fc2 = tf.Variable(tf.constant(0.1, shape=[10]))
out = tf.nn.softmax(tf.matmul(h_fc1, w_fc2) + b_fc2)

# 誤差関数(クロスエントロピー)
y = tf.placeholder(tf.float32, [None, 10])
loss = tf.reduce_mean(- tf.reduce_sum(y * tf.log(out + 1e-5), axis=[1]))

# 訓練
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

# 評価
correct = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# モデル保存の準備
saver = tf.train.Saver(max_to_keep = 3)

# 実行
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    test_images = mnist.test.images
    test_labels = mnist.test.labels
    
    for i in range(1000):
        step = i + 1
        train_images, train_labels = mnist.train.next_batch(50) # ミニバッチを取得
        sess.run(train_step, feed_dict={x:train_images, y:train_labels}) # 訓練を実行
        
        if step % 100 == 0:
            acc_val = sess.run(accuracy, feed_dict={x:test_images, y:test_labels}) # 精度計算を実行
            print('Step %d: accuracy = %.2f' % (step, acc_val))
            saver.save(sess, '.ckpt/mnist_cnn_model', global_step = step, write_meta_graph = False)

Step 100: accuracy = 0.83
Step 200: accuracy = 0.92
Step 300: accuracy = 0.94
Step 400: accuracy = 0.94
Step 500: accuracy = 0.95
Step 600: accuracy = 0.96
Step 700: accuracy = 0.95
Step 800: accuracy = 0.96
Step 900: accuracy = 0.96
Step 1000: accuracy = 0.97
