# Define the final model

First let's  import all the necessary python libraries:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import keras
import tensorflow as tf
import csv
import h5py
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pickle

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


We will use the followig configurations:

In [None]:
FONTS = [b'Raleway', b'Open Sans', b'Roboto', b'Ubuntu Mono', b'Michroma', b'Alex Brush', b'Russo One']
CSV_TITLES = ["", "image", "char"] + FONTS
CSV_FILE = "/content/drive/MyDrive/Computer Vision Project - Lea Ben Zvi/test_results.csv"
TEST_FILE = "/content/drive/MyDrive/Computer Vision Project - Lea Ben Zvi/SynthText_test.h5"

The final modal uses the already trained CNN (for classifying a single char to its font) but gives the following extra complexity: for every word in the image, it perfoms majority voting for choosing the font for all the letters in the word. Basically it looks what is the classification for most of the letters in the word - and assigns this font to all the letters in the word.

In [None]:
class FontsInTheWildClassifier():
  def __init__(self):
    self.classes = range(len(FONTS))
    self.csv_rows = []
    self.csv_line_counter = 0
    self.model = keras.models.Sequential([
        keras.layers.Rescaling(1.0 / 255, input_shape=(18, 18, 3)),
        keras.layers.Conv2D(32, kernel_size=3, activation="relu", strides=1, padding='same'),
        keras.layers.MaxPooling2D(pool_size=(2,2), strides=2, padding='same'),
        keras.layers.Dropout(0.2),
     
        keras.layers.Conv2D(64, kernel_size=3, activation="relu", padding='same', strides=1),
        keras.layers.MaxPooling2D(pool_size=(2,2), strides=2, padding='same'),
        keras.layers.Dropout(0.2),
     
        keras.layers.Conv2D(128, kernel_size=3, activation="relu", padding='same', strides=1),
        keras.layers.MaxPooling2D(pool_size=(2,2), strides=2, padding='same'),
        keras.layers.Dropout(0.2),
     
        keras.layers.Flatten(),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(7, activation="softmax"),
    ])
    self.model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    self.model.load_weights("/content/drive/MyDrive/Project/model_weights.ckpt")


  def classify_fonts_in_images_from_h5_file(self, file_path, evaluate=False, image_names=None, csv=False):
    """
    Classifies all fonts in the image, saves to csv the results and evalutes accuracy
    """
    # Read content of the h5 file
    db = h5py.File(file_path, 'r')
    if image_names is None:
      image_names = list(db['data'].keys())

    # Predict for each word in the image the font of its chars
    all_predictions = []
    for i, image_name in enumerate(image_names):
      print(f"image: {i} / {len(image_names)}")
      # Group chars by words
      words = db['data'][image_name].attrs['txt']
      chars = db['data'][image_name].attrs['charBB'].transpose().astype(int)
      grouped_chars = self._group_chars_by_words(words, chars)

      # Predict fonts of chars in each image
      image = db['data'][image_name][:]
      for i, word in enumerate(grouped_chars):
        predictions = self.predict_fonts_in_word(image, word)
        all_predictions += predictions

        # Add row to csv
        for char, prediction in zip(words[i].decode("utf-8") , predictions):
          classifications = [0] * len(FONTS)
          classifications[prediction] = 1
          row = [self.csv_line_counter, image_name, char] + classifications
          self.csv_rows.append(row)
          self.csv_line_counter += 1

    # Evaluate
    if evaluate:
      expected_predictions = []
      for image_name in image_names:
        expected_predictions += [self._convert_font_to_int(font) for font in list(db['data'][image_name].attrs['font'])]
      accuracy = sum(1 for actual, expected in zip(all_predictions,expected_predictions) if actual == expected) / float(len(all_predictions))
      print(f"Accuracy: {accuracy}")
    
    if csv:
      self._save_to_csv()
  
  def _group_chars_by_words(self, words, chars):
    """
    Groups the chars by words
    """
    grouped_chars = []
    for word in words:
      char_coordinates = chars[:len(word)]
      grouped_chars.append(char_coordinates)
      chars = chars[len(word):]

    return grouped_chars

  def predict_fonts_in_word(self, image, chars_coordinates):
    """
    Predicts the fonts of all chars in a word
    """
    predictions = []
    for char in chars_coordinates:
      cropped_image = self._extract_char_image(image, char)
      prediction = self.model.predict(cropped_image)
      predictions.append(prediction)

    # Choose most frequent classification and assign to all chars
    predictions = [prediction.tolist()[0] for prediction in predictions]
    predictions = [prediction.index(max(prediction)) for prediction in predictions]
    return [max(set(predictions), key=predictions.count)] * len(predictions)
    

  def _extract_char_image(self, image, coordinates):
    """ 
    Crops a char from an image given its coordinates
    """
    SIZE = 18
    pt_A, pt_D, pt_C, pt_B = coordinates.tolist()
    input_pts = np.float32([pt_A, pt_B, pt_C, pt_D])
    output_pts = np.float32([[0, 0], [0,SIZE - 1], [SIZE - 1, SIZE - 1], [SIZE - 1, 0]])
    M = cv2.getPerspectiveTransform(input_pts,output_pts)
    warped = cv2.warpPerspective(image,M,(SIZE, SIZE),flags=cv2.INTER_LINEAR)

    # Remove noise
    try:
      no_noise = cv2.fastNlMeansDenoisingColored(warped)
    except Exception:
      no_noise = warped
      print("Unable to remove noise")
    final = keras.preprocessing.image.img_to_array(no_noise)
    return np.expand_dims(final, axis=0)

  def _convert_font_to_int(self, font):
    """
    Converts font bytes to int
    """
    return FONTS.index(font)

  def _save_to_csv(self):
    """
    Saves the results of the h5 classifications to a csv file
    """
    # Write results to csv
    with open(CSV_FILE, 'w', newline='') as csvfile:
      writer = csv.writer(csvfile, delimiter=',')
      writer.writerow(CSV_TITLES)
      for row in self.csv_rows:
        writer.writerow(row)

    # Reinit csv rows and index
    self.csv_rows = []
    self.csv_line_counter = 0

# Ealuate the final model's accuracy

In [None]:
# # Read content of the h5 file
# db = h5py.File("/content/drive/MyDrive/Project/SynthText_test.h5", 'r')
# all_image_names = list(db['data'].keys())

# Split to train, test and validation set
# _, test_set, _ = np.split(all_image_names, [int(.8 * len(all_image_names)), int(.9 * len(all_image_names))])

# Evaludate model
classifier = ImageFontsClassifier()
classifier.classify_fonts_in_images_from_h5_file(file_path=TEST_FILE, evaluate=False, csv=True)

image: 0 / 1974
image: 1 / 1974
image: 2 / 1974
image: 3 / 1974
image: 4 / 1974
image: 5 / 1974
image: 6 / 1974
image: 7 / 1974
image: 8 / 1974
image: 9 / 1974
image: 10 / 1974
image: 11 / 1974
image: 12 / 1974
image: 13 / 1974
image: 14 / 1974
image: 15 / 1974
image: 16 / 1974
image: 17 / 1974
image: 18 / 1974
image: 19 / 1974
image: 20 / 1974
image: 21 / 1974
image: 22 / 1974
image: 23 / 1974
image: 24 / 1974
image: 25 / 1974
image: 26 / 1974
image: 27 / 1974
image: 28 / 1974
image: 29 / 1974
image: 30 / 1974
image: 31 / 1974
image: 32 / 1974
image: 33 / 1974
image: 34 / 1974
image: 35 / 1974
image: 36 / 1974
image: 37 / 1974
image: 38 / 1974
image: 39 / 1974
image: 40 / 1974
image: 41 / 1974
image: 42 / 1974
image: 43 / 1974
image: 44 / 1974
image: 45 / 1974
image: 46 / 1974
image: 47 / 1974
image: 48 / 1974
image: 49 / 1974
image: 50 / 1974
image: 51 / 1974
image: 52 / 1974
image: 53 / 1974
image: 54 / 1974
image: 55 / 1974
image: 56 / 1974
image: 57 / 1974
image: 58 / 1974
image: 