<a href="https://colab.research.google.com/github/cyberneel/DTE/blob/main/DTE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Design Text Extractor (DTE)

**What is it:** This notebook is a projecct to get the specs for machines from their designs easier.

In [None]:
import google

# Installing ppytorch & its dependencies
!pip install torch torchaudio torchvision torchtext torchdata
google.colab.output.clear()

In [None]:
# Installing EasyOCR (for text extracting)
!pip install easyocr
google.colab.output.clear()

In [None]:
# Dependency Imports
import cv2
import easyocr
from matplotlib import pyplot as plt
import numpy as np

In [None]:
# Set the iage path with this variable

In [None]:
IMAGE_PATH = ("/MachineDesignBox.jpeg")

In [None]:
ocr = easyocr.Reader(['en'], gpu=True)

In [None]:
# Run the basic detection
result = ocr.readtext(IMAGE_PATH)
result

In [None]:
# Preview the texts

import easyocr
import cv2
from matplotlib import pyplot as plt

image = cv2.imread(IMAGE_PATH)

# Draw bounding boxes
for (bbox, text, prob) in result:
    (top_left, top_right, bottom_right, bottom_left) = bbox
    top_left = tuple(map(int, top_left))
    bottom_right = tuple(map(int, bottom_right))
    cv2.rectangle(image, top_left, bottom_right, (0, 0, 0), 2)
    cv2.putText(image, text, (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)

# Convertfrom BGR to RGB
imageN = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with the boundary boxes
plt.figure(figsize=(10, 10))
plt.imshow(imageN)
plt.axis('off')
plt.show()


# Grouping

Here we try to group the text

In [None]:
# More dependencies
!pip install scikit-learn opencv-python-headless
google.colab.output.clear()

In [None]:
# import the scikit
from sklearn.cluster import DBSCAN

image = cv2.imread(IMAGE_PATH)

# Extract the centers of bounding boxes
coordinates = []
for (boundBox, text, prob) in result:
    (top_left, top_right, bottom_right, bottom_left) = boundBox
    center_x = (top_left[0] + bottom_right[0]) / 2
    center_y = (top_left[1] + bottom_right[1]) / 2
    coordinates.append([center_x, center_y])

# Convert to numpy array
coordinates = np.array(coordinates)

# DBSCAN clustering
clustering = DBSCAN(eps=100, min_samples=1).fit(coordinates)

# Group the text by the clusters
clustered_text = {}
for label, (bbox, text, prob) in zip(clustering.labels_, result):
    if label not in clustered_text:
        clustered_text[label] = []
    clustered_text[label].append((bbox, text, prob))


In [None]:
# New preview of clusterd text

# list to store the grouped text as objects
grouped_text_objects = []

# Draw bounding boxes of groups
for cluster in clustered_text.values():
    all_x = []
    all_y = []
    combined_text = ''
    for (bbox, text, prob) in cluster:
        (top_left, top_right, bottom_right, bottom_left) = bbox
        top_left = tuple(map(int, top_left))
        bottom_right = tuple(map(int, bottom_right))
        #cv2.rectangle(image, top_left, bottom_right, (0, 0, 0), 2)
        #cv2.putText(image, text, (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
        all_x.extend([top_left[0], bottom_right[0]])
        all_y.extend([top_left[1], bottom_right[1]])
        combined_text += ' ' + text

    # Store the combined text of the cluster
    grouped_text_objects.append(combined_text.strip())

    # Draw box around cluster
    min_x, max_x = min(all_x), max(all_x)
    min_y, max_y = min(all_y), max(all_y)
    cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (0, 0, 255), 2)

# Convert from BGR to RGB
imageG = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image
plt.figure(figsize=(10, 10))
plt.imshow(imageG)
plt.axis('off')
plt.show()

print(grouped_text_objects)

# NLP Magic

Try to use Natural Language Processing (NLP) to structure results

In [None]:
!pip install transformers safetensors huggingface_hub accelerate flash_attn
google.colab.output.clear()

In [None]:
import torch
from transformers import pipeline

generate_text = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")

In [None]:
res = generate_text("Here is the raw text, turn this into a json with all the appropriate fields & fix any typos: " + grouped_text_objects[2], max_length=1000)
print(res[0]["generated_text"])

# Larger Samples

Now lets try to make the tool detects the desired specs of machines from a larger diagram with multiple machines

# Masking

In this section we will first crop the design document to the regions we need

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

#uploaded = files.upload()

# Assuming the uploaded image file is the first one
image_path = '/IMG_2132.jpeg'#next(iter(uploaded))
image = cv2.imread(image_path)
hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

# Define the RGB color and convert to HSV
target_color = np.uint8([[[242, 243, 245]]]) # Set to the color of the boxes needed
hsv_target_color = cv2.cvtColor(target_color, cv2.COLOR_RGB2HSV)[0][0]

# Define the HSV range for the target color
lower_color = hsv_target_color - np.array([50, 20, 20]) # change these + & - for sensitivity
upper_color = hsv_target_color + np.array([50, 20, 20])

# Create a mask for the color
mask = cv2.inRange(hsv_image, lower_color, upper_color)

# Find contours in the mask
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Define the minimum contour area
min_contour_area = 200000  # Adjust this value as needed

# Iterate through the contours, filter by area, crop, and display the smaller images
for idx, contour in enumerate(contours):
    if cv2.contourArea(contour) >= min_contour_area:
        x, y, w, h = cv2.boundingRect(contour)
        cropped_image = image[y:y+h, x:x+w]

        # Convert BGR to RGB for displaying with matplotlib
        cropped_image_rgb = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)

        # Display the cropped image
        plt.figure()
        plt.imshow(cropped_image_rgb)
        plt.title(f'Cropped Image {idx+1}')
        plt.axis('off')
        plt.show()
        print(cv2.contourArea(contour))

print("Cropped images have been displayed.")
