In [5]:
import tensorflow as tf
import tensorflow_datasets as tfds

tf.compat.v1.disable_eager_execution()

tf.debugging.set_log_device_placement(True)

In [6]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5102425668949235924
xla_global_id: -1
]


데이터 셋 로드 (TFDS에서 가져온 EMNIST)

In [7]:
(etfds_train, etfds_test), etfds_info = tfds.load("emnist", split=['train', 'test'], shuffle_files=True, as_supervised=True, with_info=True)
print(etfds_info)
print(etfds_train)

def normalize_img_ascii_lable(image, label):
    ascii_Lb = label
    if ascii_Lb >= 10:
        ascii_Lb += 8
    elif ascii_Lb >= 43:
        ascii_Lb += 6
    ascii_Lb += 15
    image = tf.transpose(image)
    return tf.cast(image, tf.float32) / 255., ascii_Lb

etfds_train = etfds_train.map(normalize_img_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
etfds_train = etfds_train.cache()
etfds_train = etfds_train.shuffle(etfds_info.splits['train'].num_examples)
etfds_train = etfds_train.batch(128)
etfds_train = etfds_train.prefetch(tf.data.experimental.AUTOTUNE)
print(etfds_train)

etfds_test = etfds_test.map(normalize_img_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
etfds_test = etfds_test.batch(128)
etfds_test = etfds_test.cache()
etfds_test = etfds_test.prefetch(tf.data.experimental.AUTOTUNE)

tfds.core.DatasetInfo(
    name='emnist',
    full_name='emnist/byclass/3.0.0',
    description="""
    The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset.
    
    Note: Like the original EMNIST data, images provided here are inverted horizontally and rotated 90 anti-clockwise. You can use `tf.transpose` within `ds.map` to convert the images to a human-friendlier format.
    """,
    config_description="""
    EMNIST ByClass
    """,
    homepage='https://www.nist.gov/itl/products-and-services/emnist-dataset',
    data_path='/Users/varzero/tensorflow_datasets/emnist/byclass/3.0.0',
    file_format=tfrecord,
    download_size=535.73 MiB,
    dataset_size=349.16 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=62),
    }),
    supe

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>


직접 만든 데이터 셋 로드

In [8]:
import pathlib

data_dir = pathlib.Path('../dataset')

trainFG = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='training',
    seed=123,
    image_size=(28,28),
    batch_size=16,
    label_mode='int',
    color_mode="grayscale"
)



testFG = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='validation',
    seed=123,
    image_size=(28,28),
    batch_size=16,
    label_mode='int',
    color_mode="grayscale"
)

def normalize_myimg_ascii_lable(image, label):
    return tf.cast(image, tf.float32) / 255., tf.cast(int(label), dtype=tf.int64)

trainFG = trainFG.map(normalize_myimg_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
trainFG = trainFG.cache()
trainFG = trainFG.prefetch(tf.data.experimental.AUTOTUNE)
print(trainFG)

testFG = testFG.map(normalize_myimg_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
testFG = testFG.cache()
testFG = testFG.prefetch(tf.data.experimental.AUTOTUNE)


Found 1520 files belonging to 95 classes.
Using 1216 files for training.
Found 1520 files belonging to 95 classes.
Using 304 files for validation.
<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>


모델 세팅

In [9]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(95, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 95)                48735     
                                                                 
Total params: 450655 (1.72 MB)
Trainable params: 450655 (1.72 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


훈련시키기

In [11]:
model.fit(etfds_train, epochs=3)
model.fit(trainFG, epochs=25)

TensorSliceDataset: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
ShuffleDataset: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelInterleaveDatasetV4: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
AssertCardinalityDataset: (AssertCardinalityDataset): /job:localhost/replica:0/task:0/device:CPU:0
OptionsDataset: (OptionsDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelMapDatasetV2: (ParallelMapDatasetV2): /job:localhost/replica:0/task:0/device:CPU:0
MapDataset: (MapDataset): /job:localhost/replica:0/task:0/device:CPU:0
PrefetchDataset: (PrefetchDataset): /job:localhost/replica:0/task:0/device:CPU:0
TensorSliceDataset_1: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
ShuffleDataset_1: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelInterleaveDatasetV4_1: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
AssertCardinalityDataset_1: (As

2023-07-28 23:18:45.944572: I tensorflow/core/common_runtime/placer.cc:114] TensorSliceDataset: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:45.944598: I tensorflow/core/common_runtime/placer.cc:114] ShuffleDataset: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:45.944606: I tensorflow/core/common_runtime/placer.cc:114] ParallelInterleaveDatasetV4: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:45.944612: I tensorflow/core/common_runtime/placer.cc:114] AssertCardinalityDataset: (AssertCardinalityDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:45.944618: I tensorflow/core/common_runtime/placer.cc:114] OptionsDataset: (OptionsDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:45.944623: I tensorflow/core/common_runtime/placer.cc:114] ParallelMapDatasetV2: (ParallelMapDatasetV2): /job:localhost/replica:0/task:0/device:CPU:0
2023

TensorSliceDataset: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
ShuffleDataset: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelInterleaveDatasetV4: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
AssertCardinalityDataset: (AssertCardinalityDataset): /job:localhost/replica:0/task:0/device:CPU:0
OptionsDataset: (OptionsDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelMapDatasetV2: (ParallelMapDatasetV2): /job:localhost/replica:0/task:0/device:CPU:0
MapDataset: (MapDataset): /job:localhost/replica:0/task:0/device:CPU:0
PrefetchDataset: (PrefetchDataset): /job:localhost/replica:0/task:0/device:CPU:0
TensorSliceDataset_1: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
ShuffleDataset_1: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
ParallelInterleaveDatasetV4_1: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
AssertCardinalityDataset_1: (As

2023-07-28 23:18:46.185114: I tensorflow/core/common_runtime/placer.cc:114] TensorSliceDataset: (TensorSliceDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:46.185141: I tensorflow/core/common_runtime/placer.cc:114] ShuffleDataset: (ShuffleDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:46.185154: I tensorflow/core/common_runtime/placer.cc:114] ParallelInterleaveDatasetV4: (ParallelInterleaveDatasetV4): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:46.185164: I tensorflow/core/common_runtime/placer.cc:114] AssertCardinalityDataset: (AssertCardinalityDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:46.185172: I tensorflow/core/common_runtime/placer.cc:114] OptionsDataset: (OptionsDataset): /job:localhost/replica:0/task:0/device:CPU:0
2023-07-28 23:18:46.185179: I tensorflow/core/common_runtime/placer.cc:114] ParallelMapDatasetV2: (ParallelMapDatasetV2): /job:localhost/replica:0/task:0/device:CPU:0
2023

ValueError: Error when checking input: expected flatten_input to have 3 dimensions, but got array with shape (None, 28, 28, 1)

모델 저장하기

In [None]:
model.save('spemnist.keras')

확인하기

In [None]:
#model.evaluate(etfds_test, verbose=2)
model.evaluate(testFG, verbose=2)

19/19 - 0s - loss: 2.3641 - accuracy: 0.4868 - 368ms/epoch - 19ms/step


[2.3641421794891357, 0.4868420958518982]