In [31]:
from IPython.display import Image

# importing tensorflow and keras

import tensorflow as tf
from tensorflow import keras
import PIL
import PIL.Image
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.utils import to_categorical, plot_model



In [32]:
# Load Dataset
import pathlib
data_dir = pathlib.Path("images/Cyrillic")
image_count = len(list(data_dir.glob("*/*.png")))
print(image_count)
batch_size = 32
img_height = 32
img_width = 32

training_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    subset="training",
    validation_split=0.2,
    color_mode="rgba", #VERY IMPORTART AS WE ARE DEALING WITH IMAGES WITH TRANSPARENCY
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)


validation_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    subset="validation",
    validation_split=0.2,
    color_mode="rgba", #VERY IMPORTART AS WE ARE DEALING WITH IMAGES WITH TRANSPARENCY
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

15480
Found 15480 files belonging to 34 classes.
Using 12384 files for training.
Found 15480 files belonging to 34 classes.
Using 3096 files for validation.


In [33]:
class_names = training_dataset.class_names
print(class_names)


['I', 'Ё', 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я']


In [34]:
for image_batch, labels_batch in training_dataset:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

(32, 32, 32, 4)
(32,)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in training_dataset.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")


Configuring the dataset for performance
Let's make sure to use buffered prefetching so we can yield data from disk without having I/O become blocking.

In [36]:
AUTOTUNE = tf.data.AUTOTUNE

def configure_for_performance(ds):
  ds = ds.cache()
  ds = ds.shuffle(buffer_size=1000)
  # ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds
training_dataset = configure_for_performance(training_dataset)
validation_dataset = configure_for_performance(validation_dataset)


Training our Model


In [37]:
num_classes = 34

model = tf.keras.Sequential([
  tf.keras.layers.Rescaling(1./255),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])


In [38]:
model.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])


In [None]:
model.fit(
  training_dataset,
  validation_data=validation_dataset,
  epochs=15
)
