# MNIST

## Import

In [8]:
import numpy as np
import pandas as pd
import tensorflow as tf

import tensorflow_datasets as tfds

## データの作成

In [56]:
mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)

In [1]:
# mnistの情報を確認する
# mnist_info

In [94]:
# mnistのtrain data と　test dataを変数に格納する
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# test data のうち１割を検証用データとする
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples,tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples,tf.int64)

# datasetのfeature scaling
def scale(image, label):
    image = tf.cast(image,tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

# dataのシャッフル
# bufferサイズの指定 -> 指定した数ごとにシャッフルする
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

In [None]:
# train_size = int(0.7 * DATASET_SIZE)
# val_size = int(0.15 * DATASET_SIZE)
# test_size = int(0.15 * DATASET_SIZE)

# full_dataset = tf.data.TFRecordDataset(FLAGS.input_file)
# full_dataset = full_dataset.shuffle()
# train_dataset = full_dataset.take(train_size)
# test_dataset = full_dataset.skip(train_size)
# val_dataset = test_dataset.skip(test_size)
# test_dataset = test_dataset.take(test_size)

#### 入出力データ数の定義

In [100]:
input_size = 28 * 28
output_size = 10
hidden_layer_size = 50 #隠れ層の１層あたりの要素数

#### モデルの作成

In [102]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(output_size,activation='softmax')
])

#### 最適化アルゴリズムと損失関数の決定

In [106]:
model.compile(optimizer = 'adam', loss ='sparse_categorical_crossentropy', metrics=['accuracy'])

#### 訓練

In [107]:
NUM_EPOCHS = 5
VALIDATION_STEPS = num_validation_samples

model.fit(train_data, 
          epochs=NUM_EPOCHS, 
          validation_data = (validation_inputs, validation_targets),
          validation_steps = VALIDATION_STEPS,
          verbose=2)

Epoch 1/5
540/540 - 6s - loss: 0.4174 - accuracy: 0.8810 - val_loss: 0.1900 - val_accuracy: 0.9447
Epoch 2/5
540/540 - 5s - loss: 0.1789 - accuracy: 0.9481 - val_loss: 0.1359 - val_accuracy: 0.9613
Epoch 3/5
540/540 - 5s - loss: 0.1376 - accuracy: 0.9592 - val_loss: 0.1135 - val_accuracy: 0.9670
Epoch 4/5
540/540 - 5s - loss: 0.1154 - accuracy: 0.9660 - val_loss: 0.0968 - val_accuracy: 0.9732
Epoch 5/5
540/540 - 5s - loss: 0.0974 - accuracy: 0.9706 - val_loss: 0.0915 - val_accuracy: 0.9740


<tensorflow.python.keras.callbacks.History at 0x1dd39adc888>

#### モデルのテスト

In [108]:
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 1s 899ms/step - loss: 0.1106 - accuracy: 0.9653

In [110]:
0.974 - 0.9653

0.00869999999999993