In [1]:
"""
Author : M.Zeeshan Javed
Date  : May-11-2022
Description : This script will use the trained model to predict the text from all images of a directory
and save the output text in a given folder with same image name
"""
import os
import shutil

import time
import cv2
import run_model

In [2]:
model_config_path = 'configs/CNN_RNN_CTC/MMA-UD.json'


def do_prediction(image):
    """
    Author : M.Zeeshan Javed
    Date : Mar-09-2022
    Description : This function will call the necessary scripts to do prediction
    of Urdu text strip and will return the text.
    """
    encoded, predicted_text = run_model.recognize_strip(model_config_path, image)
    return predicted_text


In [3]:
output_dir = 'predictions'
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
    print(f"Deleted existing folder: {output_dir}")
os.makedirs(output_dir)
print(f"Recreated folder: {output_dir}")

out_files = os.listdir(output_dir)

Deleted existing folder: predictions
Recreated folder: predictions


In [4]:
# images_dir = '/home/cle-dl-05/Documents/2.DataSets/TEST SET/1K per channel bottom test data/Bol News/images'
images_dir = '/home/cle-dl-05/Documents/3.PdfOCR/dataset/Testing_set_1000/images' 

image_files = os.listdir(images_dir)
print("Images to test :", len(image_files))

Images to test : 1017


In [5]:
count = 0
faulty_images = []
start_time = time.time()
# Iterating on all images one by one
for image_file in image_files:
    count += 1
    print(f"\n{count}/{len(image_files)} Working on {image_file}")
    image_path = os.path.join(images_dir, image_file)
    
    image_name = image_file.split('.jpg')[0]
    
    if image_name + '.txt' in out_files:
        print("Skipping:", image_name)
        continue
        
    # Reading image and calling function to do prediction
    # %matplotlib inline
    image = cv2.imread(image_path)
    # plt.imshow(image)
    
    tic = time.time()
    try:
        text = do_prediction(image)
    except:
        print("** Error for:", image_name)
        faulty_images.append(image_name)
        continue
    toc = time.time()
    print(f"Predicted Text : {text}")
    # print("Inferring Time :", toc - tic)
    text_file_path = os.path.join(output_dir, image_name + '.txt')
    with open(text_file_path, 'w+', encoding='utf-8') as file:
        file.write(text)
        file.close()

end_time = time.time()
print("Time taken is :", (end_time - start_time))
print("Got issue in below images:\n", faulty_images)


1/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma80_Line8.jpg


  hsp = math.sqrt(0.299 * (r * r) + 0.587 * (g * g) + 0.114 * (b * b))


Predicted Text : فر و ر فر ا ند جو بد و سمو ر ز و بی کے ر ئیس تھے مطیو گئچنا نچہ ا ظہد خلو ص کے لیے 

2/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma458_Line8.jpg
Predicted Text : ہلہ ا س شخص کا بھلا کر ے جو میر ے عیب میر ے پا س تحفے ہیں 

3/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma168_Line20.jpg
Predicted Text : تھچنا نچہ سا و ر د د شیر تو ا صطحخر سبد ی بد ی فتہو گئے لیکن حضر ت عمر ؓ کی خیر خلفت 

4/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma346_Line11.jpg
Predicted Text : جب نکللے گئے تو حضر ت عمر نے ا یف کا ر شخص کو بھیجا کہ ا نکممین و ر با غیں کی قیت ک 

5/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma296_Line6.jpg
Predicted Text : یعنی کل فو ج جس کی تعد ز کے قر یب تھی صفو ں میں تقسیم ہو کر حضر ت خلد ؓ کی 

6/1017 Working on Al Jihad Fil Islam (Volume 02) SwaneUmri Hazrat Uma125_Line15.jpg
Predicted Text : ا س و ا قعہ کا قد ر ا ہو ا کہ و ر و تے جتے تھے و ر جو ش کے سا تھ کہتے جتے تھے کہ ملہ 