In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import tensorflow as tf
import pathlib
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from emnist import extract_training_samples, extract_test_samples
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

#### IMPORTING AND COLLECTING DATA

In [2]:
x_train_l, y_train_l = extract_training_samples('letters')
x_test_l, y_test_l = extract_test_samples('letters')

x_train_d, y_train_d = extract_training_samples('digits')
x_test_d, y_test_d = extract_test_samples('digits')

In [3]:
train_data_generator = ImageDataGenerator(rescale=1/255.0, validation_split=0.2)

data_dir = pathlib.Path('tanda_baca')
train_ds = train_data_generator.flow_from_directory(
  data_dir,
  subset="training",
  seed=123,
  target_size=(28, 28),
  class_mode="categorical",
  batch_size=1,
  color_mode="grayscale")

val_ds = train_data_generator.flow_from_directory(
  data_dir,
  subset="validation",
  seed=123,
  target_size=(28, 28),
  class_mode="categorical",
  batch_size=1,
  color_mode="grayscale")

batch_index = 0
x_train_s = []
y_train_s = []
while batch_index <= train_ds.batch_index:
    data = train_ds.next()
    x_train_s.append(data[0])
    y_train_s.append(1)
    batch_index += 1
    
batch_index = 0
x_test_s = []
y_test_s = []
while batch_index <= val_ds.batch_index:
    data = val_ds.next()
    x_test_s.append(data[0])
    y_test_s.append(1)
    batch_index += 1
    
x_train_s, x_test_s = np.array(x_train_s), np.array(x_test_s)
y_train_s, y_test_s = np.array(y_train_s), np.array(y_test_s)

x_train_s = x_train_s.reshape(x_train_s.shape[0], 28, 28, 1)
x_test_s = x_test_s.reshape(x_test_s.shape[0], 28, 28, 1)

Found 488 images belonging to 4 classes.
Found 120 images belonging to 4 classes.


#### CATEGORIZING INTO LETTERS, DIGITS, AND SYMBOLS

In [4]:
y_train_l_new, y_test_l_new = np.array([0 for x in y_train_l]), np.array([0 for x in y_test_l])
y_train_d_new, y_test_d_new = np.array([1 for x in y_train_d]), np.array([1 for x in y_test_d])
y_train_s_new, y_test_s_new = np.array([2 for x in y_train_s]), np.array([2 for x in y_test_s])

#### NORMALIZING LETTERS AND DIGITS

In [6]:
x_train_d, x_test_d = x_train_d/255.0, x_test_d/255.0
x_train_l, x_test_l = x_train_l/255.0, x_test_l/255.0

In [7]:
x_train_s.shape

(32, 28, 28, 1)

#### REMOVING LAST DIMENSION

In [8]:
x_train_s =  x_train_s[:, :, :, 0]
x_test_s = x_test_s[:, :, :, 0]

In [9]:
x_train_s.shape

(32, 28, 28)

#### COMBINING DATASETS

In [10]:
comb_x_train = np.concatenate((x_train_l, x_train_d, x_train_s), axis=0)
comb_y_train = np.concatenate((y_train_l_new, y_train_d_new, y_train_s_new), axis=0)

comb_x_test = np.concatenate((x_test_l, x_test_d, x_test_s), axis=0)
comb_y_test = np.concatenate((y_test_l_new, y_test_d_new, y_test_s_new), axis=0)

#### ADDING LAST DIMENSION

In [11]:
comb_x_train = comb_x_train.reshape(comb_x_train.shape[0], 28, 28, 1)
comb_x_test = comb_x_test.reshape(comb_x_test.shape[0], 28, 28, 1)

In [12]:
comb_x_train.shape

(364832, 28, 28, 1)

In [13]:
comb_y_train.shape

(364832,)

#### ONE HOT ENCODING 

In [14]:
comb_y_train_oh = OneHotEncoder(sparse=False).fit_transform(comb_y_train.reshape(-1, 1))
comb_y_test_oh = OneHotEncoder(sparse=False).fit_transform(comb_y_test.reshape(-1, 1))

#### MODEL BUILDING & TRAINING

In [15]:
model = Sequential([
    Conv2D(16, (3, 3), input_shape=comb_x_train.shape[1:], activation="relu"),
    MaxPooling2D((2, 2)),
    Conv2D(32, (3, 3), activation="relu"),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(3, activation='softmax')])

model.compile(optimizer = "adam", loss="categorical_crossentropy", metrics=["acc"])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 16)        160       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 32)        4640      
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 9, 64)          18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 4, 4, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1024)              0

In [16]:
model.fit(comb_x_train, comb_y_train_oh, epochs=20, validation_data=(comb_x_test, comb_y_test_oh))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a62711a7d0>

In [None]:
model.save('model\combined_new.h5')