# EasyOCR custom model

## TextRecognitionDataGenerator

Dataset does not have to be generated within XGEE\
Instead, generating a dataset using the [TextRecognitionDataGenerator](https://github.com/Belval/TextRecognitionDataGenerator) with custom parameters to mirror XGEE's font style\
This dataset can be used to train own model by following the [deep-text-recognition-benchmark Repo](https://github.com/clovaai/deep-text-recognition-benchmark)\
Has to be fully convolutional to work with large variety of Text sizes and potentially orientations\
2 additional files describing recognition network architecture and model configuration needed

In [None]:
def _fallback_with_easyocr(self):
    """Fallback OCR engine: EasyOCR."""
    
    import easyocr

    logging.info('\n\nNow detecting text in the diagram.')
    target_img = self.image_wrapper.bgr_image
    target_img = TextDetector.fallback_pad_image_to_square(target_img)
    easyocr_img = target_img.copy()
    scale_factor = 2
    easyocr_img = cv2.resize(easyocr_img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
    easyocr_img = cv2.blur(easyocr_img, (5, 5))

    logging.getLogger('easyocr').setLevel(logging.CRITICAL)
    reader = TextDetector.fallback_create_ocr_reader(['en'])

    result = TextDetector.fallback_extract_text_from_image(reader, easyocr_img)
    logging.info(f'Number of words found in the original image: {len(result)}')
    short_words_count = len([detection for detection in result if len(detection[1]) < 5])
    logging.info(f'Number of words smaller than 5 letters: {short_words_count}')

    # Rotate the image by 90 degrees counterclockwise and search for text
    rotated_img = TextDetector.fallback_rotate_image(easyocr_img, 90)

    rotated_result = TextDetector.fallback_extract_text_from_image(reader, rotated_img)
    logging.info(f'Number of words found in the rotated image: {len(rotated_result)}')
    short_words_count = len([detection for detection in rotated_result if len(detection[1]) < 5])
    logging.info(f'Number of words smaller than 5 letters: {short_words_count}')

    # Adjust the coordinates of the rotated text and combine results
    result = TextDetector.fallback_adjust_and_combine_results(result, rotated_result, easyocr_img.shape, 90)
    # remove padding from the bounding boxes
    result = TextDetector.fallback_adjust_bounding_boxes(result)
    
    # Revert the coordinates of the found text to the coordinates in the not-upscaled image
    for detection in result:
        for point in detection[0]:
            point[0] = int(point[0] / scale_factor)
            point[1] = int(point[1] / scale_factor)

    # Filter out short texts from the results
    result = TextDetector.fallback_filter_short_texts(result)

    # # Filter out forbidden characters from the results
    # result = TextDetector.fallback_filter_forbidden_chars(result, forbidden_chars)
    # logging.info(f'Number of words found after filtering: {len(result)}')

    result = TextDetector.fallback_adjust_text_box_positions(result, self.image_wrapper.bgr_image.shape)

    # Convert text box tuples to instances of TextBox
    text_boxes_typed = [TextBox(x=box[0][0][0], y=box[0][0][1], width=box[0][2][0] - box[0][0][0], height=box[0][2][1] - box[0][0][1], text=box[1], confidence=box[2]) for box in result]

    # Store the merged boxes
    self.data_storage.store_geometric_shape_collection(TextBoxCollection('text', text_boxes_typed))

    # Log debug images showing detected text
    image_with_text_boxes = self.image_wrapper.bgr_image.copy()
    for box in text_boxes_typed:
        cv2.rectangle(image_with_text_boxes, (box.x, box.y), (box.x + box.width, box.y + box.height), (0, 255, 0), 2)
        cv2.putText(image_with_text_boxes, box.text, (box.x, box.y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    ImageLogger.log_image(image_with_text_boxes, 'DEBUG', 'image_with_text_boxes.png')        

def fallback_pad_image_to_square(image):
    """Pad the image to make it square."""
    h, w = image.shape[:2]
    if h == w:
        return image
    size = max(h, w)
    padded_image = cv2.copyMakeBorder(image, 
                                        top=(size - h) // 2,
                                        bottom=(size - h + 1) // 2,
                                        left=(size - w) // 2,
                                        right=(size - w + 1) // 2,
                                        borderType=cv2.BORDER_CONSTANT,
                                        value=[255, 255, 255])
    return padded_image

def fallback_create_ocr_reader(languages):
    """Create an OCR reader object."""
    return easyocr.Reader(languages, gpu=False) # set gpu=False if you want to use CPU

def fallback_extract_text_from_image(reader, image):
    """Extract text from an image using the OCR reader."""
    return reader.readtext(image, detail=1)

def fallback_adjust_bounding_boxes(results, padding_top=9, padding_bottom=6, padding_left=8, padding_right=8):
    """Adjust bounding boxes to remove padding."""
    for detection in results:
        for point in detection[0]:
            if point[0] < detection[0][1][0]:  # Left side
                point[0] += padding_left
            else:  # Right side
                point[0] -= padding_right
            if point[1] < detection[0][3][1]:  # Top side
                point[1] += padding_top
            else:  # Bottom side
                point[1] -= padding_bottom
    return results

def fallback_rotate_image(image, angle):
    """Rotate the image by the given angle."""
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def fallback_rotate_point(point, angle, center):
    """Rotate a point around a center by a given angle."""
    angle_rad = -angle * (3.14159265 / 180.0)
    ox, oy = center
    px, py = point

    qx = ox + (px - ox) * cos(angle_rad) - (py - oy) * sin(angle_rad)
    qy = oy + (px - ox) * sin(angle_rad) + (py - oy) * cos(angle_rad)
    return [int(qx), int(qy)]

def fallback_adjust_and_combine_results(original_result, rotated_result, image_shape, angle):
    """Adjust the coordinates of the rotated text and combine results."""
    (h, w) = image_shape[:2]
    center = (w / 2, h / 2)
    for detection in rotated_result:
        rotated_bbox = [TextDetector.fallback_rotate_point(point, -angle, center) for point in detection[0]]
        original_result.append([rotated_bbox, detection[1], detection[2]])
    return original_result

def fallback_filter_short_texts(detections, min_length=5): # min_length>=3, otherwise false positives
    """Filter out text detections that are shorter than the specified minimum length."""
    return [detection for detection in detections if len(detection[1]) >= min_length]

def fallback_filter_forbidden_chars(detections, forbidden_chars):
    """Filter out text detections that are only one letter and are in the forbidden characters list."""
    return [detection for detection in detections if not (len(detection[1]) == 1 and detection[1] in forbidden_chars)]

def fallback_adjust_text_box_positions(result, image_shape):
    """Adjust the position of the text boxes to account for the padding."""
    h, w = image_shape[:2]
    size = max(h, w)
    pad_top = (size - h) // 2
    pad_left = (size - w) // 2
    for detection in result:
        for point in detection[0]:
            point[0] -= pad_left
            point[1] -= pad_top
    return result
    