In [1]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Flatten, TimeDistributed, Input
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications.mobilenet import MobileNet
from keras.layers.pooling import GlobalAveragePooling2D

import os
import json
import cv2

import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
frames = 100
root_folder = '/home/JulioCesar/sign_language/cnn_lstm'
classes_folder = os.path.join(root_folder, 'frames')
classes_list = os.listdir(classes_folder)
classes = len(classes_list)

dict_path = os.path.join(root_folder, 'dict')
encoding_path = os.path.join(dict_path, 'encoding.json')
decoding_path = os.path.join(dict_path, 'decoding.json')

print('Number of classes: {}'.format(classes))
print(classes_list)

Number of classes: 50
['drogueria', 'preocupar', 'jugar', 'jardin', 'olvidar', 'ducha', 'computador', 'domingo', 'apoyar', 'ahora', 'feliz', 'recordar', 'transmilenio', 'depender', 'gustar', 'miedo', 'apartamento', 'llevar', 'banco', 'furioso', 'aspiradora', 'calle', 'escalera', 'mareo', 'besar', 'vida', 'doler', 'problema', 'salir', 'empezar', 'futbol', 'telefono', 'radiografia', 'botella', 'invitar', 'banio', 'necesitar', 'discoteca', 'confundido', 'paciencia', 'celular', 'hospital', 'querer', 'zapato', 'cuchara', 'entender', 'saber', 'camion', 'oficina', 'bailar']


In [3]:
# # Un-comment code below to create dictionary
# # it is only required when new word is registered
# # or existing word is removed
# encoding = {}
# decoding = {}
# for ind, word in enumerate(classes_list):
#     # print('Current {}%'.format((ind/float(classes)) * 100))
#     encoding[ind] = word
#     decoding[word] = ind

# print('encoding has {} items'.format(len(encoding.keys())))
# print(encoding.keys())
# print('decoding has {} items'.format(len(decoding.keys())))
# print(decoding.keys())
# print('Test encoding: {} is {}'.format(21, encoding[21]))
# print('Test decoding: {} is {}'.format('calle', decoding['calle']))

# json_file = json.dumps(encoding)
# f = open(encoding_path, 'w')
# f.write(json_file)
# f.close()

# json_file = json.dumps(decoding)
# f = open(decoding_path, 'w')
# f.write(json_file)
# f.close()

In [4]:
# 50 videos
# min: 59, max: 125

encoding = {}
decoding = {}
with open(encoding_path, 'r') as f:
    encoding_tmp = json.load(f)
    for key in encoding_tmp.keys():
        encoding[int(key)] = encoding_tmp[key]

with open(decoding_path, 'r') as f:
    decoding = json.load(f)    

print('encoding has {} items'.format(len(encoding.keys())))
print(encoding.keys())
print('Test encoding: {} is {}'.format('21', encoding[21]))

print('decoding has {} items'.format(len(decoding.keys())))
print(decoding.keys())
print('Test decoding: {} is {}'.format('calle', decoding['calle']))

encoding has 50 items
dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])
Test encoding: 21 is calle
decoding has 50 items
dict_keys(['doler', 'problema', 'apoyar', 'bailar', 'futbol', 'feliz', 'celular', 'salir', 'gustar', 'aspiradora', 'computador', 'cuchara', 'miedo', 'ahora', 'llevar', 'querer', 'saber', 'paciencia', 'hospital', 'discoteca', 'oficina', 'recordar', 'empezar', 'furioso', 'olvidar', 'drogueria', 'zapato', 'invitar', 'banco', 'preocupar', 'calle', 'radiografia', 'ducha', 'botella', 'jugar', 'necesitar', 'banio', 'confundido', 'jardin', 'camion', 'transmilenio', 'depender', 'mareo', 'domingo', 'telefono', 'entender', 'apartamento', 'besar', 'vida', 'escalera'])
Test decoding: calle is 21


In [5]:
from keras.utils.np_utils import to_categorical
one_hot_labels = to_categorical(list(encoding.keys()))
print(one_hot_labels[:3])
# print(one_hot_labels[2])
print('----------------')
print(decoding['recordar'])
print(one_hot_labels[decoding['recordar']])

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
----------------
11
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0.]


In [6]:
from keras.applications.resnet50 import preprocess_input as preprocess_input_resnet
from keras.applications.vgg16 import preprocess_input as preprocess_input_vgg
from keras.applications.mobilenet import preprocess_input as preprocess_input_mobilenet

In [7]:
import random
import numpy as np

IMG_SIZE = 160

def get_img_from_folder(category):
    imgs_array = []
    items_per_category = os.listdir(os.path.join(classes_folder, category))
    sample = random.choice(items_per_category)
    len_imgs = len(os.listdir(os.path.join(classes_folder, category, sample)))
    # print('len_imgs:', len_imgs)
    # padding/crop to 100 images
    crop = False
    pad = False
    match = False
    
    difference = 0
    if len_imgs == frames:
        match = True
    elif len_imgs > frames:
        crop = True
        difference = int((len_imgs - frames)/2)
    else:
        pad = True
        difference = int((frames - len_imgs)/2)
        
    counter = 0
    for num_img in range(frames):
        if pad:
            if num_img < difference or num_img >= (frames - difference - 1):
                # add blank image
                blank_image = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
                imgs_array.append(blank_image)
                # cv2.imwrite(os.path.join(root_folder, 'test', category + '_' + str(num_img) + '.jpg'), blank_image)
                continue
            else:
                img_item = category + '_' + str(counter) + '.jpg'
                counter += 1
        elif match:
            img_item = category + '_' + str(num_img) + '.jpg'
        elif crop:
            img_item = category + '_' + str(num_img + difference) + '.jpg'
        
        img_path = os.path.join(classes_folder, category, sample, img_item)

        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        imgs_array.append(img)
        # cv2.imwrite(os.path.join(root_folder, 'test', category + '_' + str(num_img) + '.jpg'), img)
        
        
        '''
        for num_img in range(len_imgs):
            img_item = category + '_' + str(num_img) + '.jpg'
            img_path = os.path.join(classes_folder, category, sample, img_item)

            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            imgs_array.append(img)
        '''        
    # return np.array(imgs_array)
    return imgs_array
    

def generate_train_batch(batch_size = 8):
    while True:
        random_items = random.sample(list(encoding.keys()), batch_size)
        X_data = []
        Y_data = []
        for item in random_items:
            category = encoding[item]
            # print('category:', category)
            img = get_img_from_folder(category)
            img = cv2.normalize(np.float32(img), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            X_data.append(img)
            Y_data.append(one_hot_labels[item])
            
        X_data = np.array(X_data)
        Y_data = np.array(Y_data)
        # X_data /= 255. #preprocess_input_mobilenet(X_data)
        yield (X_data, Y_data)

In [8]:
train_gen = generate_train_batch(batch_size = 8)

In [9]:
for i in range(1):
    my_x, my_y = next(train_gen)
    print(my_x.shape, my_y.shape)
    print(np.max(my_x[0][50]))
    print(np.min(my_x[0][50]))

(8, 100, 160, 160, 3) (8, 50)
0.9612069
0.0


In [10]:
'''
inputs = Input(shape = (frames, IMG_SIZE, IMG_SIZE, 3))
cnn_base = MobileNet(include_top = False, weights='imagenet', input_shape = (IMG_SIZE, IMG_SIZE, 3))

cnn_out = GlobalAveragePooling2D()(cnn_base.output)
cnn = Model(inputs=cnn_base.input, outputs=cnn_out)
# cnn = Model(inputs=cnn_base.input, outputs=cnn_base.output)
encoded_frames = TimeDistributed(cnn)(inputs)
encoded_sequence = LSTM(128)(encoded_frames)

hidden_layer = Dense(256, activation="relu")(encoded_sequence)
outputs = Dense(classes, activation="softmax")(hidden_layer)
model = Model([inputs], outputs)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
'''
# # input_layer = Input(shape=(None,224,224,3))
# # td = TimeDistributed(cnn)(input_layer)
# # model = Model(input_layer, td)
# # # x = TimeDistributed(Flatten())(x)
# # model = LSTM(256)(model)

'\ninputs = Input(shape = (frames, IMG_SIZE, IMG_SIZE, 3))\ncnn_base = MobileNet(include_top = False, weights=\'imagenet\', input_shape = (IMG_SIZE, IMG_SIZE, 3))\n\ncnn_out = GlobalAveragePooling2D()(cnn_base.output)\ncnn = Model(inputs=cnn_base.input, outputs=cnn_out)\n# cnn = Model(inputs=cnn_base.input, outputs=cnn_base.output)\nencoded_frames = TimeDistributed(cnn)(inputs)\nencoded_sequence = LSTM(128)(encoded_frames)\n\nhidden_layer = Dense(256, activation="relu")(encoded_sequence)\noutputs = Dense(classes, activation="softmax")(hidden_layer)\nmodel = Model([inputs], outputs)\n\nmodel.compile(optimizer=\'adam\', loss=\'categorical_crossentropy\', metrics=[\'accuracy\'])\n'

In [11]:

model = Sequential()
model.add(TimeDistributed(Conv2D(2, (2,2), activation= 'relu' ), input_shape=(None, IMG_SIZE, IMG_SIZE, 3)))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50))
model.add(Dense(classes, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['acc'])


In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_1 (TimeDist (None, None, 159, 159, 2) 26        
_________________________________________________________________
time_distributed_2 (TimeDist (None, None, 79, 79, 2)   0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, None, 12482)       0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                2506600   
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
Total params: 2,509,176
Trainable params: 2,509,176
Non-trainable params: 0
_________________________________________________________________


In [13]:
import keras.callbacks as callbacks
callbacks_list = [callbacks.ModelCheckpoint(
        filepath=os.path.join(root_folder, 'exported_models', 'V4', 'sign-model-{epoch:02d}-{acc:.2f}.h5'),
        monitor='accuracy',
        save_best_only=False)
        ]

In [14]:
history = model.fit_generator(train_gen,
                              steps_per_epoch=250,
                              epochs=20,
                              callbacks=callbacks_list)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
 13/250 [>.............................] - ETA: 14:33 - loss: 0.1128 - acc: 1.0000

KeyboardInterrupt: 