# Language-Recognition using ConvNets

_written by Joscha S. Rieber (Fraunhofer IAIS) in 2020_

## Environment

In [11]:
train = 'train'
test = 'test'

eng = 'english'
ger = 'german'

languages = [eng, ger]
categories = [train, test]

dataset_root_path = '../data/'
train_path = dataset_root_path + train

batch_size = 32
image_width = 500
image_height = 128

validation_split = 0.1
learning_rate = 0.005

model_output_file = dataset_root_path + 'model.h5'

In [3]:
from glob import glob
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# XLA compiles your TensorFlow graph into a sequence of GPU kernels generated specifically for your model.
# Since these kernels are unique to your program, they can exploit model-specific information for optimization.

# import os
# os.environ['TF_XLA_FLAGS'] = '--tf_xla_cpu_global_jit'

In [4]:
all_files = glob(train_path + '/*/*.png')

num_validation = len(all_files) * validation_split
num_train = len(all_files) - num_validation

validation_steps = int(num_validation / batch_size)
steps_per_epoch = int(num_train / batch_size)

print('Steps per Epoch: ' + str(steps_per_epoch))
print('Validation steps: ' + str(validation_steps))

Steps per Epoch: 4500
Validation steps: 500


## Training Batch Generator Function

The following function loads the available images for training, shuffles them and serves them to Keras' training algorithm.

In [5]:
image_data_generator = ImageDataGenerator(rescale=1./255, validation_split=validation_split)
train_generator = image_data_generator.flow_from_directory(train_path, batch_size=batch_size, class_mode='categorical', target_size=(image_width, image_height), color_mode='grayscale', subset='training')
validation_generator = image_data_generator.flow_from_directory(train_path, batch_size=batch_size, class_mode='categorical', target_size=(image_width, image_height), color_mode='grayscale', subset='validation')

Found 144000 images belonging to 2 classes.
Found 16000 images belonging to 2 classes.


## Model definition

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization, Input, Concatenate

from tensorflow.keras.optimizers import SGD

In [7]:
model = Sequential()

model.add(BatchNormalization(input_shape=(image_width, image_height, 1)))
model.add(Conv2D(16, (7, 7), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(32, (5, 5), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# from tensorflow.keras.applications import VGG16

# img_input = Input(shape=(128, 500, 1))

# img_conc = Concatenate(axis=3, name='input_concat')([img_input, img_input, img_input])

# model = VGG16(input_tensor=img_conc, weights=None, include_top=True, classes=2)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 500, 128, 1)       4         
_________________________________________________________________
conv2d (Conv2D)              (None, 500, 128, 16)      800       
_________________________________________________________________
batch_normalization_1 (Batch (None, 500, 128, 16)      64        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 250, 64, 16)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 250, 64, 32)       12832     
_________________________________________________________________
batch_normalization_2 (Batch (None, 250, 64, 32)       128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 125, 32, 32)       0

In [8]:
model.compile(optimizer=SGD(lr=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

## Training

In [9]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', patience=6, restore_best_weights=True)

In [10]:
model.fit(train_generator, validation_data=validation_generator, epochs=60, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, callbacks=[early_stopping])

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60


<tensorflow.python.keras.callbacks.History at 0x7f75c7851710>

In [12]:
model.save(model_output_file)