In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
class TLS:
  """
A Class for text to line segmentation from a give image
-preprocess - Converts the image to grayscale and then to binary image
-segment_lines - identifies horozontal lines of text based on the horizontal projection of the given binary image then filtes out the lines based on the minimum height and num of black pixels
-extract_lines - extracts given segmented lines as separate image
-save_lines - save given line images  on a given output folder
-show_lines - show the user the lines created
  """
    def __init__(self, image_path):
        self.image_path = image_path
        self.original_image = cv2.imread(image_path)
        self.gray_image = None
        self.binary_image = None
        self.lines = []
        self.line_images = []

    def preprocess(self):
        # Convert to grayscale
        self.gray_image = cv2.cvtColor(self.original_image, cv2.COLOR_BGR2GRAY)

        # Thresholding (Binarization)
        _, self.binary_image = cv2.threshold(
            self.gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
        )

    def segment_lines(self, min_line_height=10, min_black_pixels=100):
      horizontal_sum = np.sum(self.binary_image, axis=1)

      in_line = False
      start = 0
      for i, value in enumerate(horizontal_sum):
        if value > 0 and not in_line:
            start = i
            in_line = True
        elif value == 0 and in_line:
            end = i
            in_line = False
            # Filter out small lines
            line_height = end - start
            if line_height >= min_line_height:
                # Also check for enough black pixels
                line_region = self.binary_image[start:end, :]
                black_pixel_count = np.sum(line_region > 0)
                if black_pixel_count > min_black_pixels:
                    self.lines.append((start, end))

    def extract_lines(self):
        for start, end in self.lines:
            line_img = self.original_image[start:end, :]
            self.line_images.append(line_img)

    def save_lines(self, output_folder="lines"):
        import os
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        for idx, line_img in enumerate(self.line_images):
            path = os.path.join(output_folder, f"line_{idx}.png")
            cv2.imwrite(path, line_img)

    def run(self, save=False):
        self.preprocess()
        self.segment_lines()
        self.extract_lines()
        if save:
            self.save_lines()



    def show_lines(self):
      for idx, line_img in enumerate(self.line_images):
        print("------------------------------------------------------------------")
        print("Line:")
        # Convert BGR to RGB because OpenCV loads images in BGR format
        rgb_img = cv2.cvtColor(line_img, cv2.COLOR_BGR2RGB)

        plt.figure(figsize=(10, 2))
        plt.imshow(rgb_img)
        plt.title(f'Line {idx}')
        plt.axis('off')
        plt.show()


