In [None]:
class WordSegmenter:
  """
A class for segmating words from line images. (File called Lines_Segmentation #not necessery for usage)
1. preprocess : converts the image to grayscale and then to binary image
2. find_words : uses morphological dilation to connect letters within words
3. extract_words: extract each words boundries
"""

    def __init__(self, line_image):
        self.line_image = line_image
        self.binary_line = None
        self.word_boxes = []
        self.word_images = []

    def preprocess(self):
        gray = cv2.cvtColor(self.line_image, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        self.binary_line = binary

    def find_words(self, min_area=350):
        # Dilate to connect letters into words
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 5))  # Big kernel horizontally
        dilated = cv2.dilate(self.binary_line, kernel, iterations=1)

        contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            if w * h > min_area:
                self.word_boxes.append((x, y, w, h))

        # Sort RIGHT to LEFT
        self.word_boxes = sorted(self.word_boxes, key=lambda b: -b[0])

    def extract_words(self):
        for (x, y, w, h) in self.word_boxes:
            word_img = self.line_image[y:y+h, x:x+w]
            self.word_images.append(word_img)

    def run(self):
        self.preprocess()
        self.find_words()
        self.extract_words()