In [1]:
import json,os, cv2
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from util import Pipelines, VGG16Processor, ImageSorter
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np

In [2]:
#Tests Configurations

'''Important Paths to get and save results'''
TEST_DATA_PATH = Path("test_by_category")
CLASSIFICATION_PERFORMANCE_PATH = Path("results/")
CONFUSION_MATRICES_PATH = CLASSIFICATION_PERFORMANCE_PATH /'confusion_matrices'
RESULTS_FILE_NAME = CLASSIFICATION_PERFORMANCE_PATH / 'new_results.json'

'''Informations about the 

VGG16 Classification Model'''
MODEL_TYPE = 'Fine Tuned'
MODEL_NAME = "fine_tuned_text_and_no_text_are_diff_v2.hdf5"

RESULTS = {}

### Classification Performance With different Datasets

The following datasets all have images with and without text. They will be used to test the Not Fine Tuned and the Fine Tuned versions of the VGG16Models generated above to check how well they detect text in each of these categories:

- *ct_kidney_imgs*: computed tomography images of human kidneys 
- *digital_radiography_covid*: radiography images of human chests
- *magnetic_ressonance_brain*: magnetic ressonance images of human cancerigenous brains
- *mammography*: human mammography images
- *ultrasound_breast*: healthy and cancerigenous human breast ultrasound images
- *our_imgs*: real-world images extracted with the application

In [None]:
sorted_imgs = []
vgg16_processor = VGG16Processor(MODEL_NAME, sorted_imgs)

categories = os.listdir(TEST_DATA_PATH)
by_ctg_results = {}

for ctg in categories:  
    ctg_path = TEST_DATA_PATH / ctg
    vgg16_processor.tests_path = ctg_path

    ctg_results = vgg16_processor.classify_from_dir()
    true_classes = ctg_results['True Classes']
    pred_classes = ctg_results['Pred_classes']

    vgg_acc = accuracy_score(true_classes, pred_classes)
    f1 = f1_score(true_classes, pred_classes)
    precision = precision_score(true_classes, pred_classes)
    recall = recall_score(true_classes, pred_classes)

    cm = confusion_matrix(true_classes, pred_classes)
    cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['No Text', 'With Text'])
    cm_display.plot()
    plt.savefig(CONFUSION_MATRICES_PATH / (str(ctg) + '_results.png'))
    
    ctg_results = {'Total Number of Images Tested': len(true_classes),
                   'Accuracy' : vgg_acc, 'Recall': recall, 
                   'Precision' : precision, 'F1-Score': f1}

    by_ctg_results[ctg] = ctg_results
    RESULTS['Results By Category'] = by_ctg_results

with open(RESULTS_FILE_NAME, 'w') as f:
    json.dump(RESULTS, f, indent = 4)

## Time Performance

The following tests compare the execution times of the two pipelines performed with the images in the transformed_png_simples directory:

- OCR-Only pipeline: every image in the directory is passed to pytesseract's image_to_string function
- Classification - OCR pipeline: every image is first classified as having or not text and only those positively classified are passed to pytesseract's image_to_string function

The goal of such comparison is to tell if the overhead caused by the Classification task is significantly less than the one imposed by submitting images without text to OCR, since most images do not have text in it and therefore shouldn't be sent to the OCR pipeline.

In [3]:
#Tests Configurations

'''Where the images to be submitted to the time tests are located'''
IMGS_DIR = Path('transformed_png_simple')

'''The total number of images that should be submitted to the pipelines'''
TOTAL_IMGS = len(os.listdir(IMGS_DIR))

'''The number of times the TOTAL_IMGS are going to be submitted to each
pipeline so their execution times can be averaged''' 
N_TRIALS = 5 

'''Test Results and Important Information about them'''
RESULTS = {}
           
RESULTS_FILE_NAME = CLASSIFICATION_PERFORMANCE_PATH / (str(TOTAL_IMGS) + '_images_results.json')

In [4]:
img_sorter = ImageSorter()
sorted_imgs = img_sorter.get_random_files_from_dir(IMGS_DIR, TOTAL_IMGS)

sorted_imgs = [cv2.imread(str(img)) for img in sorted_imgs]

print(f'Total Images submitted to the pipelines: {len(sorted_imgs)}')

pipelines = Pipelines()
vgg_ocr_times = np.empty(N_TRIALS, dtype='float64')
ocr_only_times = np.empty(N_TRIALS, dtype='float64')
vgg16_processor = VGG16Processor(MODEL_NAME, sorted_imgs)

for i in range(N_TRIALS):
    '''vgg16_processor.test_preprocess_images(sorted_imgs)'''
    print("*"*20,f"Pipeline: VGG + OCR", "*"*20)
    vgg_ocr_times[i] = pipelines.classification_ocr_pipeline(MODEL_NAME, sorted_imgs)
    print(f'Time of Pipeline: VGG + OCR: {vgg_ocr_times[i]}')
    print("*"*20,f"Pipeline: OCR Only", "*"*20)
    ocr_only_times[i] = pipelines.ocr_only_pipeline(sorted_imgs)
    print(f'Time of Pipeline: OCR Only: {ocr_only_times[i]}')

classification_ocr_time = np.mean(vgg_ocr_times)
ocr_only_time =  np.mean(ocr_only_times)
time_diff  = (ocr_only_time - classification_ocr_time) / ocr_only_time

RESULTS['total_number_of_imgs'] = TOTAL_IMGS
RESULTS['average_execution_times'] = {}
RESULTS['average_execution_times']['ocr_only_pipeline'] = ocr_only_time
RESULTS['average_execution_times']['classification_ocr_pipeline'] = classification_ocr_time
RESULTS['average_execution_times']['Enhancement'] = time_diff

with open(RESULTS_FILE_NAME, 'w') as f:
    json.dump(RESULTS, f, indent=4) 

Total Images submitted to the pipelines: 2526
******************** Pipeline: VGG + OCR ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: VGG + OCR: 947.880334299989
******************** Pipeline: OCR Only ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: OCR Only: 1508.539592200017
******************** Pipeline: VGG + OCR ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: VGG + OCR: 643.2382193999947
******************** Pipeline: OCR Only ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: OCR Only: 1005.7863607999752
******************** Pipeline: VGG + OCR ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: VGG + OCR: 430.85406419995707
******************** Pipeline: OCR Only ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: OCR Only: 889.8955324999988
******************** Pipeline: VGG + OCR ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: VGG + OCR: 459.2686254000291
******************** Pipeline: OCR Only ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: OCR Only: 884.8380803000182
******************** Pipeline: VGG + OCR ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: VGG + OCR: 500.81660889997147
******************** Pipeline: OCR Only ********************


  median_value = (np.max(image) + np.min(image)) / 2


Time of Pipeline: OCR Only: 1378.1323963000323
