# Instructions
This file allows for a single image to be tested and debugged in order to adjust parameters and work on the OCR Script in a different environment without changing the original ipynb file.

It also provides further visuals for the image, cropping out the regions of text it detects on the image.

To use:
1. Run the "!pip install easyocr" command in the first cell to setup the environment.
2. In files, upload the image(s) that you want to run the code on.
3. Set the IMAGE_PATH equal to the name of the file.
4. Run the OCR Script and the output will be printed below the code cell.

In [None]:
!pip install easyocr

In [None]:
IMAGE_PATH = ""  # Enter path to your image here

This is where the gamma, alpha, beta values can be adjusted to test different combinations on the image.

In [None]:
gamma = 0.5
alpha = 1
beta = 1

# OCR Testing Script

In [None]:
import cv2
import easyocr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
from difflib import get_close_matches
from datetime import datetime
from google.colab.patches import cv2_imshow

def preprocess_image(image):

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_equalized = cv2.equalizeHist(gray)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
                                        cv2.THRESH_BINARY, 11, 2)

    # Dilate the image
    kernel = np.ones((5,5),np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=3)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(dilated, 30, 150)

    gamma_corrected = cv2.pow(gray_equalized / 255.0, gamma) # gamma
    gamma_corrected = (gamma_corrected * 255).astype(np.uint8)

    toned_down_img = cv2.convertScaleAbs(gamma_corrected, alpha, beta) #alpha, beta

    return toned_down_img

def download_english_words():
    url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
    response = requests.get(url)
    english_words = set(response.text.split())

    return english_words

def extract_text(image, image_path):

    preprocessed_img = preprocess_image(image)

    reader = easyocr.Reader(['en'])
    result = reader.readtext(image)

    english_words = download_english_words()

    # Specify structure shape and kernel size
    rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))

    # Applying dilation on threshhold image
    dilation = cv2.dilate(preprocessed_img, rect_kernel, iterations = 1)

    # Finding contours
    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    # Create a copy of the image
    im2 = image.copy()

    # Display bounding boxes around detected text
    fig, ax = plt.subplots(1)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    for contour in contours:
      # Bounding box detection (using contour detection)
      x, y, w, h = cv2.boundingRect(contour)
      rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')

      for detection in result:
        # Bounding box coordainates (using text detection)
        points = detection[0]
        x, y, x1, y1 = points[0][0], points[0][1], points[2][0], points[2][1]
        w, h = x1 - x, y1 - y

        rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)

        # Crop the region from the original image
        x, y, w, h = int(x), int(y), int(w), int(h)
        cropped_region = im2[y:y + h, x:x + w]

        # Preprocessing on the cropped region
        preprocessed_cropped_region = preprocess_image(cropped_region)

        # Apply Harris Corner Detection
        corners = cv2.cornerHarris(preprocessed_cropped_region, 2, 3, 0.04)

        # Threshold for an optimal value
        corners_thresh = 0.01 * corners.max()

        # Find corner coordinates
        corner_coords = np.argwhere(corners > corners_thresh)

        # Draw corners on the original image
        for corner in corner_coords:
            cv2.circle(image, (x + corner[1], y + corner[0]), 3, (0, 0, 255), -1)

        # Define structure shape and kernel size for each letter
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))  # Adjust size as needed

        # Apply morphological operations to enhance letter structure
        morph_image = cv2.morphologyEx(preprocessed_cropped_region, cv2.MORPH_CLOSE, rect_kernel)

        # Display the cropped region for visual inspection
        plt.figure()
        plt.imshow(preprocessed_cropped_region, cmap='gray')
        plt.title(f"Cropped Region for {image_path}")
        plt.show()

        # Run EasyOCR on the cropped region
        cropped_result = reader.readtext(preprocessed_cropped_region)

        for cropped_detection in cropped_result:
              # Find the closest English word
              detected_text = detection[1].lower()
              detected_text = detected_text.replace("[", "1").replace("(", "1")
              closest_word = get_close_matches(detected_text, english_words, n=1)[0] if detected_text in english_words else 'N/A'

              print("Extracted text:", detected_text)
              print("Closest word:", closest_word)

    plt.show()

if __name__ == '__main__':
    image = cv2.imread(IMAGE_PATH)
    extract_text(image, IMAGE_PATH)
