In [1]:
import cv2 as cv
import numpy as np
import os
import h5py
from keras.utils import to_categorical
from keras.layers import Conv2D, Dense, LeakyReLU, MaxPooling2D, BatchNormalization, Flatten
from keras.optimizers import Adam
from keras.models import Sequential, load_model
from keras.losses import categorical_crossentropy

Using TensorFlow backend.


In [21]:
saved_model_name = 'cnn_for_asl_grayscale'
already_used_images = []
labels_dict ={
    "A": 0,
    "B": 1,
    "C": 2,
    "D": 3,
    "del": 4,
    "E": 5,
    "F": 6,
    "G": 7,
    "H": 8,
    "I": 9,
    "J": 10,
    "K": 11,
    "L": 12,
    "M": 13,
    "N": 14,
    "nothing": 15,
    "O": 16,
    "P": 17,
    "Q": 18,
    "R": 19,
    "S": 20,
    "space": 21,
    "T": 22,
    "U": 23,
    "V": 24,
    "W": 25,
    "X": 26,
    "Y": 27,
    "Z": 28,
}

In [25]:
def load_image(image_path):
    img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
    img = np.reshape(img, (img.shape[0], img.shape[1], 1))
    
    return img

In [4]:
def map_labels_to_number(labels):
    numbers_array = []
    for label in labels:
        numbers_array.append(labels_dict[label])
    return numbers_array

**IMPORTANTE** La variable asl_directory hace referencia a la ruta relativa en la que se encuentra la carpeta con todos los ejemplos de entrenamiento es necesario editarla de acuerdo a esta ubicación para que el programa pueda cargar los ejemplos de entrenamiento \
**IMPORTANT** The variable asl_directory makes reference to the relative route where all the training examples are located, this route needs to be changed according to this ubication in order for the program to work

In [5]:
def load_train_samples():
    asl_directory = '../asl-alphabet/asl_alphabet_train/asl_alphabet_train'
    max_images_per_label = 500
    
    print('Already processed %i images' % len(already_used_images))
    print('Loading images....')
    absolute_path = os.path.abspath(asl_directory)
    
    x = []
    y = []
    
    for label in os.listdir(absolute_path):
        
        added_elements_per_label = 0
        label_path = os.path.join(absolute_path, label)
        
        for image in os.listdir(label_path):
            
            if(added_elements_per_label >= max_images_per_label):
                break;
            
            image_path = os.path.join(label_path, image)
            
            if(image_path not in already_used_images):
            
                img = load_image(image_path)
                img = np.array(img,  dtype=np.uint8)

                x.append(img)
                y.append(label)
                
                added_elements_per_label = added_elements_per_label + 1
                already_used_images.append(image_path)
        
    print('Loaded a total of %i images' % len(x))
    return x, y

**IMPORTANTE** La variable asl_test_directory hace referencia a la ruta relativa en la que se encuentra la carpeta con todos los ejemplos de prueba, es necesario editarla de acuerdo a esta ubicación para que el programa pueda cargar los ejemplos de entrenamiento \
**IMPORTANT** The variable asl_test_directory makes reference to the relative route where all the test examples are located, this route needs to be changed according to this ubication in order for the program to work

In [6]:
def load_test_samples():
    asl_test_directory = '../asl-alphabet/asl_alphabet_test/asl_alphabet_test'
    absolute_path = os.path.abspath(asl_test_directory)
    
    x = []
    y = []
    
    for label in os.listdir(absolute_path):
        
        image_path = os.path.join(absolute_path, label)
        
        img = load_image(image_path)
        
        processed_label = label.split('.')[0].split('_')[0]
        
        x.append(img)
        y.append(processed_label)
    
    return x, y

In [7]:
def generate_model(input_shape, output_shape):
    cnn_asl = Sequential()

    cnn_asl.add(Conv2D(16,
                       kernel_size=(16, 16),
                       activation='relu',
                       use_bias=True,
                       input_shape=input_shape))
    cnn_asl.add(MaxPooling2D(pool_size=(3, 3)))
    cnn_asl.add(Conv2D(32,
                       kernel_size=(8,8),
                       activation='relu',
                       use_bias=True))
    cnn_asl.add(Conv2D(64,
                       kernel_size=(8, 8),
                       activation='relu',
                       use_bias=True))
    cnn_asl.add(MaxPooling2D(pool_size=(3, 3)))
    cnn_asl.add(Flatten())
    cnn_asl.add(BatchNormalization())
    cnn_asl.add(Dense(128,
                      activation='linear',
                      use_bias=True))
    cnn_asl.add(LeakyReLU(alpha=0.01))
    cnn_asl.add(Dense(output_shape,
                      activation='softmax',
                     use_bias=True))
    
    return cnn_asl

In [8]:
def process_samples(x, y):
    x = np.array(x, dtype=np.uint8)
    y = map_labels_to_number(y)
    y = to_categorical(y, len(labels_dict))
    
    return x, y

In [9]:
def train_model(model, saved_model_name, x_train, y_train):
    while(len(x_train)>0):
        print('Running iteration')
        model_string = '%s.h5' % saved_model_name
        model.fit(x_train, y_train, batch_size=64, epochs=5)
        model.save(model_string)

        x_train, y_train = load_train_samples()
        x_train, y_train = process_samples(x_train, y_train)
    return model

In [10]:
def create_and_train_model():
    x_train, y_train = load_train_samples()
    x_train, y_train = process_samples(x_train, y_train)
    cnn_asl_model = generate_model(x_train[0].shape, y_train.shape[1])
    cnn_asl_model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['accuracy'])
    cnn_asl_model.summary()
    cnn_asl_model = train_model(cnn_asl_model, saved_model_name, x_train, y_train)
    return cnn_asl_model

In [11]:
def load_and_test_model():
    cnn_asl_model = load_model(saved_model_name + '.h5')
    return test_model(cnn_asl_model)

In [12]:
def compute_rectangle_coordinates(x_center, y_center, frame_height, frame_width):
    first_coordinate = (int(x_center - frame_width / 2), int(y_center - frame_height / 2))
    second_coordinate = (int(x_center + frame_width / 2), int(y_center + frame_height / 2))
    
    return first_coordinate, second_coordinate

In [13]:
def draw_rectangle_frame(image, first_coordinate, second_coordinate, color):
    image = cv.rectangle(image, first_coordinate, second_coordinate, color, 2)
    return image

In [14]:
def map_categorical_to_label(categorical_results):
    categorical_results = np.argmax(categorical_results, axis=-1)
    position = categorical_results[0]
    keys_list = list(labels_dict.keys())
    return keys_list[position]

In [15]:
def test_model(cnn_asl_model):
    x_test, y_test = load_test_samples()
    x_test, y_test = process_samples(x_test, y_test)
    results = cnn_asl_model.evaluate(x_test, y_test, verbose=1)
    return cnn_asl_model, results

El siguiente bloque de código se ejecuta en caso de que se desee crear y entrenar un modelo nuevo con los parámetros establecidos en la función `generate_model` \
The next code block is executed in case that a new model wants to be created and trained acording to the architecture stablished on the function `generate_model`

In [None]:
cnn_asl_model = create_and_train_model()

Already processed 0 images
Loading images....
Loaded a total of 14500 images
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 185, 185, 16)      12304     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 61, 61, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 54, 54, 32)        32800     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 47, 47, 64)        131136    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 15, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 14400)             0         
___________________________________________

In [None]:
cnn_asl_model,results = test_model(cnn_asl_model)

En caso de que se desee cargar un modelo ya guardado en la misma carpeta que el archivo se ejecuta el siguiente bloque de código, este bloque carga el modelo que tenga el mismo nombre que la variable `saved_model_name` con un formato .h5 \
In case that a model already saved on the same folder wants to be loaded the next code block must be executed, this block loads the model that has the same name saved on the variable `saved_model_name` with a format .h5 

In [26]:
cnn_asl_model,results = load_and_test_model()



In [27]:
results

[4.527571678161621, 0.9642857313156128]

El siguiente bloque de código puede ser utlizado para probar el modelo con la cámara del computador \
The next code block could be used to test the model with the computers camera

In [28]:

vid = cv.VideoCapture(0)

x_center = vid.get(3) / 2
y_center = vid.get(4) / 2

frame_height = 200
frame_width = 200

first_coordinate, second_coordinate = compute_rectangle_coordinates(x_center, y_center, frame_height, frame_width)
  
while(True):
      
    # Capture the video frame
    # by frame
    ret, frame = vid.read()

    cropped_frame = frame[first_coordinate[1]: first_coordinate[1]+frame_height, first_coordinate[0]: first_coordinate[0]+frame_width]
    cropped_frame = cv.cvtColor(cropped_frame, cv.COLOR_BGR2GRAY)
    processed_cropped_frame = np.reshape(cropped_frame, (1, cropped_frame.shape[0], cropped_frame.shape[1], 1))

    frame = draw_rectangle_frame(frame, first_coordinate, second_coordinate, (252, 19, 3))

    results = cnn_asl_model.predict(processed_cropped_frame)
    label = map_categorical_to_label(results)
    
    frame = cv.putText(frame, 'The predicted value is: ' + label, (int(0), int(y_center*2)), cv.FONT_HERSHEY_SIMPLEX, int(1), (int(255),int(255),int(255)), 2)
    # Display the resulting frame
    cv.imshow('frame', cropped_frame)
    cv.imshow('frame2', frame)
      
    # the 'q' button is set as the
    # quitting button you may use any
    # desired button of your choice
    if cv.waitKey(1) & 0xFF == ord('q'):
        break
  
# After the loop release the cap object
vid.release()
# Destroy all the windows
cv.destroyAllWindows()