In [1]:
import os
import time
import cv2
import numpy as np
import tensorflow as tf
from ObjectDetection.SSD_utils import *
import pandas as pd
import fnmatch
import string
import joblib
from cv2.ximgproc import guidedFilter
from Word_separation import *;

from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

In [2]:
char_list = string.ascii_letters+string.digits
 
def encode_to_labels(txt):
    # encoding each output word into digits
    dig_lst = []
    for index, char in enumerate(txt):
        try:
            dig_lst.append(char_list.index(char))
        except:
            print(char)
        
    return dig_lst


In [3]:
# input with shape of height=32 and width=128 
inputs = Input(shape=(32,128,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
 # bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
# model to be used at test time
act_model = Model(inputs, outputs)

In [4]:
def test_text(img):
    act_model.load_weights('best_model_v2.hdf5')

    # predict outputs on validation images
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    img = cv2.resize(img, (128,32))
    img = np.expand_dims(img , axis = 2)
    img = img.reshape(1,32,128,1)
    prediction = act_model.predict(img)

    # use CTC decoder
    out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                             greedy=True)[0][0])
    res = ""
    for p in out[0]:  
            if int(p) != -1:
                res+=(char_list[int(p)])
    return res

def text_extraction(image):
    out = []
    orig = image.copy()
    blurred = cv2.GaussianBlur(image, (5, 5), 18)
    edges = edge_detect(blurred)
    ret, edges = cv2.threshold(edges, 50, 255, cv2.THRESH_BINARY)
    bw_image = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, np.ones((20,20), np.uint8))
    
    boxes = text_detect(bw_image, image)
    lines = sort_words(boxes)
    for line in lines:
        for x1,y1,x2,y2 in line:
            guided = guidedFilter(orig,orig,8,0.05)    
            segmentation(orig)
            img = cv2.imread('seg.jpg')
            res = test_text(img[y1:y2,x1:x2])
            out.append(res)
    return out

In [5]:
def run_detection(image, interpreter):
    # Run model: start to detect
    # Sets the value of the input tensor.
    interpreter.set_tensor(input_details[0]['index'], image)
    # Invoke the interpreter.
    interpreter.invoke()

    # get results
    boxes = interpreter.get_tensor(output_details[0]['index'])
    classes = interpreter.get_tensor(output_details[1]['index'])
    scores = interpreter.get_tensor(output_details[2]['index'])
    num = interpreter.get_tensor(output_details[3]['index'])

    boxes, scores, classes = np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes + 1).astype(np.int32)
    out_scores, out_boxes, out_classes = non_max_suppression(scores, boxes, classes)

    # Print predictions info
    #print('Found {} boxes for {}'.format(len(out_boxes), 'images/dog.jpg'))
            
    return out_scores, out_boxes, out_classes

def image_object_detection(interpreter, colors,img):
    #image = cv2.imread('dog.jpg')
    image_data = preprocess_image_for_tflite(img, model_image_size=300)
    out_scores, out_boxes, out_classes = run_detection(image_data, interpreter)

    # Draw bounding boxes on the image file
    #result = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    # Save the predicted bounding box on the image
    #cv2.imwrite(os.path.join("out", "ssdlite_mobilenet_v2_dog.jpg"), result, [cv2.IMWRITE_JPEG_QUALITY, 90])
    res = []
    for i, c in reversed(list(enumerate(out_classes))):
        res.append(class_names[c])
    return res
    

In [6]:
def segmentation(img):
    b,g,r = cv2.split(img)
    rgb_img = cv2.merge([r,g,b])

    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    
    cv2.imwrite('seg.jpg',thresh)

In [8]:
img = cv2.imread('image13.jpg')
orig = img.copy()
img = cv2.resize(img, (220, 220))
img = np.reshape(img, (145200))
img = img.reshape(1,-1)

gnb = joblib.load('tnt_class.pkl')
out = gnb.predict(img)

img = orig.copy()

if out == 0:
    res = text_extraction(orig)
else:
    interpreter = tf.lite.Interpreter(model_path="ObjectDetection/model_data/ssdlite_mobilenet_v2.tflite")
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # label
    class_names = read_classes('ObjectDetection/model_data/coco_classes.txt')
    # Generate colors for drawing bounding boxes.
    colors = generate_colors(class_names)
            
    #image_object_detection(interpreter, colors)
    res = image_object_detection(interpreter, colors,orig)
    
    if res == []:
        res = text_extraction(orig)
    
print(res)

['person']


In [9]:
import pyttsx3

engine = pyttsx3.init()
for i in range(len(res)):
    engine.say(res[i])
engine.runAndWait()