In [67]:
import numpy as np
import cv2
from PIL import Image
from model.encoder.region_detection import RegionDetection

class ColorExtractor(RegionDetection):
  '''
  Extracts color in the given image contour
  '''

  def __init__(self, image_path, image_name):
    super().__init__(image_path, image_name)
    self.image_path = image_path
    self.color_legend = self.detect_color_legend()

  def extract_legend_color(self):
    image = Image.open(self.image_path).convert('RGB')
    image_array = np.array(image)

    x, y, width, height = cv2.boundingRect(self.color_legend)
    legend_roi = image_array[y:y+height, x:x+width]
    flattened_colors = [color for row in legend_roi for color in row if not np.array_equal(color, [0, 0, 0])]

    unique_colors = []
    seen = set()

    for color in flattened_colors:
      color_tuple = tuple(color)

      if color_tuple not in seen:
        seen.add(color_tuple)
        unique_colors.append(color)

    return unique_colors
  
  def extract_grid_color(self):
    # Get the bounding box of the largest contour
    x, y, w, h = cv2.boundingRect(self.largest_contour)

    # Crop the region of interest from the original image using the bounding box
    cropped_image = self.image[y:y+h, x:x+w]

    num_regions_y = self.yaxis_len
    num_regions_x = self.xaxis_len

    # Calculate the new dimensions that are divisible by the specified number of regions
    new_height = (h // num_regions_y) * num_regions_y
    new_width = (w // num_regions_x) * num_regions_x

    # Resize the cropped image to the new dimensions while maintaining the aspect ratio
    resized_image = cv2.resize(cropped_image, (new_width, new_height))
    resized_image_rgb = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)

    # Calculate the width and height of each smaller region
    region_height = new_height // num_regions_y
    region_width = new_width // num_regions_x

    dominant_colors = []

    # Extract and display each smaller region
    for i in range(num_regions_y):
      for j in range(num_regions_x):
          
        start_y = i * region_height
        end_y = start_y + region_height
        start_x = j * region_width
        end_x = start_x + region_width
        smaller_region = resized_image_rgb[start_y:end_y, start_x:end_x]

        # Convert the smaller region to a 2D array of pixels
        pixels = smaller_region.reshape((-1, 3))
        pixels = np.float32(pixels)

        # Define criteria and apply K-means clustering
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
        k = 1
        _, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

        # Get the dominant color
        dominant_color = centers[0].astype(int).tolist()
        dominant_colors.append(dominant_color)

    return dominant_colors

In [66]:

class ColorMapping(ColorExtractor):

  def __init__(self, text, image_path, image_name):
    super().__init__(image_path, image_name)
    self.title = text.format_title()
    self.yaxis_labels = text.clean_yaxis_label()
    self.xaxis_labels = text.clean_xaxis_label()
    self.legend_values = text.clean_legend_values()
    self.yaxis_len = len(self.yaxis_labels)
    self.xaxis_len = len(self.xaxis_labels)
    self.colors = self.extract_legend_color()
    self.grid_colors = self.extract_grid_color()


  def map_legend_values(self):
    title = self.legend_values[0]
    legend_dict = {"title": title}
    legend_dict.update({tuple(self.colors[i]): self.legend_values[i + 1] for i in range(len(self.colors))})
    return legend_dict

  def color_distance(self, color, legend_color):
    return np.linalg.norm(np.array(color) - np.array(legend_color))

  def find_closest_color(self, color, legend_dict):
    closest_color = None
    min_distance = float('inf')

    for legend_color in legend_dict.keys():
      if legend_color == "title":
        continue
      distance = self.color_distance(color, legend_color)
      if distance < min_distance:
        min_distance = distance
        closest_color = legend_color
    return closest_color

  def map_cell_values(self):
    legend_dict = self.map_legend_values()
    mapped_legend_values = []

    for color in self.grid_colors:

      color_tuple = tuple(color)  # Convert array to tuple for dictionary key
      closest_color = self.find_closest_color(color_tuple, legend_dict)

      if closest_color is not None:
        # mapped_legend_values.append([closest_color, legend_dict[closest_color]])
        mapped_legend_values.append(legend_dict[closest_color])
      else:
        mapped_legend_values.append((color_tuple, None))  # Handle case where no close color is found
    
    print(mapped_legend_values)
    return mapped_legend_values

In [68]:
class TextFormatter:
    
  def __init__(self, text):
    self.text = text

  def format_title(self):
    title = self.text.extract_title()
    title = title.replace('\n', ' ')
    return title
  
  def clean_yaxis_label(self):
    
    yaxis_labels = self.text.extract_yaxis_labels()
    array_yaxis = yaxis_labels.splitlines()
    array_yaxis = [label for label in array_yaxis if label]
    
    return array_yaxis

  def clean_xaxis_label(self):

    xaxis_labels = self.text.extract_xaxis_labels()
    array_xaxis = xaxis_labels.splitlines()
    array_xaxis = [label for label in array_xaxis if label]
    
    return array_xaxis

  def clean_legend_values(self):

    legend_values = self.text.extract_legend_values()
    array_legend_values = legend_values.splitlines()
    array_legend_values = [value for value in array_legend_values if value]

    return array_legend_values


In [69]:
import pytesseract

class TextExtraction(RegionDetection):
  '''
  Extracts the text in the given region of interest.
  '''

  def __init__(self, image_path, image_name):
    super().__init__(image_path, image_name)
    self.image_path = image_path
    self.img = cv2.imread(image_path)
    self.title_roi = self.detect_title_roi()
    self.yaxis_roi = self.detect_yaxis_roi()
    self.xaxis_roi = self.detect_xaxis_roi()
    self.legend_roi = self.detect_legend_roi()

    if self.img is None:
      raise FileNotFoundError(f"Image not found or unable to read: {image_path}")

  def preprocess_image(self):

    gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    return binary

  def extract_title(self):

    title_text = pytesseract.image_to_string(self.title_roi)
    return title_text

  def extract_yaxis_labels(self):

    yaxis_text = pytesseract.image_to_string(self.yaxis_roi)
    return yaxis_text

  def extract_xaxis_labels(self):
    
    cropped_image = Image.fromarray(cv2.cvtColor(self.xaxis_roi, cv2.COLOR_BGR2RGB))
    rotated_image = cropped_image.rotate(-90, expand=True)
    
    xaxis_text = pytesseract.image_to_string(rotated_image)
    return xaxis_text

  def remove_legend(self):

    contour = self.detect_color_legend()
    if contour is not None:
      cv2.drawContours(self.image, [contour], -1, (255, 255, 255), thickness=cv2.FILLED)
    else:
      print("No legend found in ", self.image_name)

  def extract_legend_values(self):

    self.remove_legend()
    preprocessed_image = self.preprocess_image()
    x, y, w, h = cv2.boundingRect(self.largest_contour)
    roi = preprocessed_image[y:y+h, x+w:]

    roi_image = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
    custom_config = r'--psm 6 -c preserve_interword_spaces=1 --oem 3'
    legend_text = pytesseract.image_to_string(roi_image, config=custom_config)

    return legend_text


In [70]:
from model.encoder.color_mapping import ColorMapping

class GridProcessor(ColorMapping):
  
  def __init__(self, text, image_path, image_name):
    super().__init__(text, image_path, image_name)
    self.title = text.format_title()
    self.yaxis_labels = text.clean_yaxis_label()
    self.xaxis_labels = text.clean_xaxis_label()
    self.legend_values = text.clean_legend_values()
    self.yaxis_len = len(self.yaxis_labels)
    self.xaxis_len = len(self.xaxis_labels)
    self.colors = self.extract_legend_color()
    self.grid_color = self.extract_grid_color()
    self.mapped_grid = self.map_cell_values()

  def create_grid_matrix(self):
    cell_matrix = []
    for j in range(self.yaxis_len):
      for i in range(self.xaxis_len):
        index = j * self.xaxis_len + i  # Calculate the index for 1D list
        mapped_value = self.mapped_grid[index]  # Access the 1D list element
        cell_matrix.append([self.yaxis_labels[j], self.xaxis_labels[i], mapped_value, self.title])
    return cell_matrix

  def flatten_list(self, nested_list):
    return [item for sublist in nested_list for item in sublist]

In [71]:
import cv2
import numpy as np
import matplotlib.pyplot as plt


class RegionDetection:
  '''
  Identifies the region of interest for the extraction of text
  '''

  def __init__(self, image_path, image_name):
    self.image_path = image_path
    self.image_name = image_name
    self.image = cv2.imread(self.image_path)
    self.contours = self.find_contours()
    self.image_np = np.array(self.image)
    self.largest_contour = self.detect_grid()

  def find_contours(self):

    gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    return contours

  def detect_grid(self):

    largest_contour = max(self.contours, key=cv2.contourArea)
    return largest_contour
  
  def detect_color_legend(self):
  
    # Calculate areas for all contours
    contours_with_areas = [(contour, cv2.contourArea(contour)) for contour in self.contours]
    
    # Sort contours by area in descending order
    sorted_contours = sorted(contours_with_areas, key=lambda x: x[1], reverse=True)
    
    # Return the second largest contour if it exists
    if len(sorted_contours) > 1:
      second_largest_contour = sorted_contours[1][0]
      return second_largest_contour

  # Displays the region of interest
  def draw_bounding_box(self, roi):

    # Convert the ROI to grayscale
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    # Calculate the bounding box coordinates
    x, y, w, h = cv2.boundingRect(gray_roi)
    
    # Debug: Print the coordinates
    print(f"Bounding Box Coordinates: x={x}, y={y}, w={w}, h={h}")
    
    # Draw the bounding box on the original image
    cv2.rectangle(self.image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    # Debug: Display the grayscale ROI
    plt.imshow(gray_roi, cmap='gray')
    plt.title("Grayscale ROI")
    plt.axis('off')
    plt.show()


  def detect_title_roi(self):
    
    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      top = max(0, y - h)
      title_roi = self.image_np[top:y, :]

      return title_roi
  
  def detect_yaxis_roi(self):

    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      left = max(0, x - w)
      yaxis_roi = self.image_np[y:y + h, left:x]

      return yaxis_roi

  def detect_xaxis_roi(self):

    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      bottom = min(self.image_np.shape[0], y + 2 * h)
      xaxis_roi = self.image_np[y + h:bottom, x:x + w]

      return xaxis_roi

  def detect_legend_roi(self):
    
    x, y, w, h = cv2.boundingRect(self.largest_contour)
    right = max(self.image.shape[1], x + 2 * w)
    legend_roi = self.image[y:y + h, x + w:right]

    return legend_roi


In [72]:
import os, sys
from model.preprocessor.data_preprocessing import DataPreprocessing, Vocabulary

In [76]:
class DataPreprocessing:
  '''
  Converts the annotation and extracted texts from images into tokens or sequence
  '''
  def __init__(self, csv_dir, img_dir):
    self.csv_dir = csv_dir
    self.img_dir = img_dir
    spacy.prefer_gpu()
    self.spacy_eng = spacy.load("en_core_web_sm")


  def read_csv(self, file_path):
    df = pd.read_csv(file_path)
    return df
  
  @property
  def preprocess_data(self):
    all_img_data, all_annotation_data = self.collect_data
    print(all_annotation_data)
    return all_img_data, all_annotation_data

  @property
  def collect_data(self): # Returns tokenized img data, and annotation data
    img_data = []
    annotation_data = []
    df = self.read_csv(self.csv_dir)

    for _, row in df.iloc[0:2].iterrows():
      image_name = row.iloc[0]
      image_path = os.path.join(self.img_dir, image_name)

      values = TextExtraction(image_path, image_name)
      clean_values = TextFormatter(values).clean_xaxis_label()
      matrix_values = GridProcessor(clean_values, image_path, image_name).create_grid_matrix()
      img_data.append(matrix_values)

    return img_data, annotation_data


In [79]:

class HeatmapEncoder:

  def __init__(self, input_dir):
    self.train_csv_path = ".\\data\\ground_truth\\train\\annotation.csv"
    self.train_img_dir = ".\\data\\images\\train"
    self.max_len = 0


  def run_main(self):
    
    data = DataPreprocessing(self.train_csv_path, self.train_img_dir).collect_data
    
    # Data Preprocessing
    # 

    # Training

    # Save every epoch or after the training?

    # Testing

if __name__ == "__main__":
  input_dir = ".\\data\\images\\train"
  HeatmapEncoder(input_dir).run_main()

AttributeError: 'list' object has no attribute 'format_title'

In [78]:
import cv2
import numpy as np
import matplotlib.pyplot as plt


class ROI:
  '''
  Identifies the region of interest for the extraction of text
  '''

  def __init__(self, image_path):
    self.image_path = image_path
    self.image = cv2.imread(self.image_path)
    self.contours = self.find_contours()
    self.image_np = np.array(self.image)
    self.largest_contour = self.detect_grid()

  def find_contours(self):

    gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    return contours

  def detect_grid(self):
    max_area = 0
    largest_contour = None

    for contour in self.contours:
      area = cv2.contourArea(contour)

      if area > max_area:
        max_area = area
        largest_contour = contour

    return largest_contour
  
  def detect_color_legend(self):
  
    for contour in self.contours:
      area = cv2.contourArea(contour)

      if 1000 < area < 10000:
        return contour

  # Display the cropped region of interest
  def draw_bounding_box(self, roi):

    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    x, y, w, h = cv2.boundingRect(gray_roi)
    cv2.rectangle(self.image_np, (x, y), (x + w, y + h), (0, 255, 0), 2)
    plt.imshow(cv2.cvtColor(self.image_np, cv2.COLOR_BGR2RGB))
    plt.title("Bounding Box")
    plt.axis('off')
    plt.show()
  
  def detect_title_roi(self):
    
    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      top = max(0, y - h)
      title_roi = self.image_np[top:y, :]

      return title_roi
  
  def detect_yaxis_roi(self):

    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      left = max(0, x - w)
      yaxis_roi = self.image_np[y:y + h, left:x]

      return yaxis_roi

  def detect_xaxis_roi(self):

    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      bottom = min(self.image_np.shape[0], y + 2 * h)
      xaxis_roi = self.image_np[y + h:bottom, x:x + w]

      return xaxis_roi

  def detect_legend_roi(self):
    if self.largest_contour is not None:
      x, y, w, h = cv2.boundingRect(self.largest_contour)
      right = max(self.image_np.shape[1], x + 2 * w)
      legend_roi = self.image_np[y:y + h, x + w:right]
      self.draw_bounding_box(legend_roi)

      # Crop the image and return a copy without modifying the original
    return legend_roi