In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os

np.set_printoptions(threshold=np.inf)

DATA_DIR = "ImageDataset/"
CLASSES=['A', 'B', 'C', 'CH', 'TJ', 'D', 'DJ', 'E', 'F', 'G', 'H', 'I', 'J', 'JA', 'JU', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'SH', 'T', 'U', 'V', 'Z', 'ZH']
IMG_SIZE = 100

In [None]:
### LOAD IMAGES ###

training_data=[]

def create_training_data():
    for c in CLASSES:
        path = os.path.join(DATA_DIR, c)
        class_num = CLASSES.index(c)
        
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
                training_data.append([new_array,class_num])
            except Exception as e:
                pass
            
        print("DIR " + str(c) + " done.")

create_training_data()

In [None]:
### SHUFFLE AND DIVIDE DATA ###

import random
random.shuffle(training_data)

X = []
y = []

for image, label in training_data:
    X.append(image)
    y.append(label)

X =  np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

In [None]:
train_images = []
train_labels = []
test_images = []
test_labels = []

# 20% OF DATA TO TEST CNN
test_number = (len(X) * 2) // 10

# 80% OF DATA TO TRAIN CNN
train_number = len(X) - test_number

train_images = X[:train_number]
train_labels = y[:train_number]
test_images = X[train_number:]
test_labels = y[train_number:]

In [None]:
### COMPARE AMOUNT OF DATA ###

plt.rcParams['figure.figsize'] = [15, 5]
figure, axis = plt.subplots()

# NUMBER OF IMAGES FOR EACH LETTER IN WHOLE DATASET
dataset_label_count = [0] * 29
for label in y:
    dataset_label_count[label] += 1
    
# NUMBER OF IMAGES FOR EACH LETTER IN TRAIN SET
train_label_count = [0] * 29
for label in train_labels:
    train_label_count[label] += 1
    
# NUMBER OF IMAGES FOR EACH LETTER IN TEST SET
test_label_count = [0] * 29
for label in test_labels:
    test_label_count[label] += 1

x_axis = np.arange(len(CLASSES))
width = 0.25

axis.bar(x_axis, dataset_label_count, width, edgecolor="white", linewidth=0.7, color='darkolivegreen')
axis.bar(x_axis+width, train_label_count, width, edgecolor="white", linewidth=0.7, color='deepskyblue')
axis.bar(x_axis+width*2, test_label_count, width, edgecolor="white", linewidth=0.7, color='salmon')

plt.xlabel("Letter")
plt.ylabel("Number of images")

plt.xticks(x_axis+width, CLASSES)
plt.legend(['DATASET', 'TRAIN', 'TEST'])

plt.show()

In [None]:
import tensorflow as tf
from tensorflow import keras

train_images, test_images = train_images / 255.0, test_images / 255.0

train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)

In [None]:
### CREATE AND TRAIN CNN ###

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(keras.layers.MaxPooling2D((2, 2)))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dense(29, activation='softmax'))

#model.summary()

model.compile(optimizer='adam',
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=3, 
                    validation_data=(test_images, test_labels))

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')

model.evaluate(test_images,  test_labels, verbose=2)

In [None]:
### SAVE CNN MODEL ###

model.save('cnn_model')

In [None]:
### CONVERT AND SAVE TFLITE ###

converter = tf.lite.TFLiteConverter.from_saved_model('cnn_model/')
tflite_model = converter.convert()

with open('cnn_model.tflite', 'wb') as f:
    f.write(tflite_model)