In [13]:
import tensorflow as tf
import tensorflow_datasets as tfds


In [14]:
(etfds_train, etfds_test), etfds_info = tfds.load("emnist", split=['train', 'test'], shuffle_files=True, as_supervised=True, with_info=True)
print(etfds_info)
print(etfds_train)

def normalize_img_ascii_lable(image, label):
    ascii_Lb = label
    if ascii_Lb >= 10:
        ascii_Lb += 8
    elif ascii_Lb >= 43:
        ascii_Lb += 6
    ascii_Lb += 15
    return tf.cast(image, tf.float32) / 255., ascii_Lb

etfds_train = etfds_train.map(normalize_img_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
etfds_train = etfds_train.cache()
etfds_train = etfds_train.shuffle(etfds_info.splits['train'].num_examples)
etfds_train = etfds_train.batch(128)
etfds_train = etfds_train.prefetch(tf.data.experimental.AUTOTUNE)
print(etfds_train)

etfds_test = etfds_test.map(normalize_img_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
etfds_test = etfds_test.batch(128)
etfds_test = etfds_test.cache()
etfds_test = etfds_test.prefetch(tf.data.experimental.AUTOTUNE)

tfds.core.DatasetInfo(
    name='emnist',
    full_name='emnist/byclass/3.0.0',
    description="""
    The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset.
    
    Note: Like the original EMNIST data, images provided here are inverted horizontally and rotated 90 anti-clockwise. You can use `tf.transpose` within `ds.map` to convert the images to a human-friendlier format.
    """,
    config_description="""
    EMNIST ByClass
    """,
    homepage='https://www.nist.gov/itl/products-and-services/emnist-dataset',
    data_path='/Users/varzero/tensorflow_datasets/emnist/byclass/3.0.0',
    file_format=tfrecord,
    download_size=535.73 MiB,
    dataset_size=349.16 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=62),
    }),
    supe

In [15]:
import pathlib

data_dir = pathlib.Path('../dataset')

trainFG = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='training',
    seed=123,
    image_size=(28,28),
    batch_size=16,
    label_mode='int',
    color_mode="grayscale"
)

testFG = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='validation',
    seed=123,
    image_size=(28,28),
    batch_size=16,
    label_mode='int',
    color_mode="grayscale"
)

def normalize_myimg_ascii_lable(image, label):
    return tf.cast(image, tf.float32) / 255., tf.cast(int(label), dtype=tf.int64)

trainFG = trainFG.map(normalize_myimg_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
trainFG = trainFG.cache()
trainFG = trainFG.prefetch(tf.data.experimental.AUTOTUNE)
print(trainFG)

testFG = testFG.map(normalize_myimg_ascii_lable, num_parallel_calls=tf.data.experimental.AUTOTUNE)
testFG = testFG.cache()
testFG = testFG.prefetch(tf.data.experimental.AUTOTUNE)


Found 1520 files belonging to 95 classes.
Using 1216 files for training.
Found 1520 files belonging to 95 classes.
Using 304 files for validation.
<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>


In [16]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu',  input_shape=(28,28,1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(95, activation='softmax')
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 3, 3, 64)          36928     
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 1, 1, 64)         

In [17]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
model.fit(etfds_train, epochs=3)
model.fit(trainFG, epochs=25)

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x16da5c790>

In [19]:
model.save('cnnEmnist.keras')

In [20]:
model.evaluate(etfds_test, verbose=2)
model.evaluate(testFG, verbose=2)

909/909 - 20s - loss: 20.8091 - accuracy: 0.0501 - 20s/epoch - 22ms/step
19/19 - 0s - loss: 2.0615 - accuracy: 0.6382 - 194ms/epoch - 10ms/step


[2.0615291595458984, 0.6381579041481018]