<a href="https://colab.research.google.com/github/chaelinoh/deep-learning/blob/main/extract_words.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-cloud-vision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting google-cloud-vision
  Downloading google_cloud_vision-3.4.0-py2.py3-none-any.whl (444 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.2/444.2 KB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: google-cloud-vision
Successfully installed google-cloud-vision-3.4.0


In [None]:
import io
import os
from PIL import Image, ImageDraw
import re
from google.cloud import vision


In [None]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS']="/content/drive/MyDrive/key.json"

In [None]:
def detect_text_folder(folder_path):
    """Detects text in all image files in a folder."""
    client = vision.ImageAnnotatorClient()

    counter = 0  # initialize counter outside of loop

    for filename in os.listdir(folder_path):
        if filename.endswith(('.JPG', '.jpeg', '.png')):
            file_path = os.path.join(folder_path, filename)
            with io.open(file_path, 'rb') as image_file:
                content = image_file.read()

            image = vision.Image(content=content)

            response = client.text_detection(image=image)
            texts = response.text_annotations
            print('Texts for file {}:'.format(filename))

            for text in texts:
                print('\n"{}"'.format(text.description))

                vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
                left = min(vertices, key=lambda x: x[0])[0]
                top = min(vertices, key=lambda x: x[1])[1]
                right = max(vertices, key=lambda x: x[0])[0]
                bottom = max(vertices, key=lambda x: x[1])[1]

                # adjust the bounding box to make it bigger
                padding = 70
                left -= padding
                top -= padding
                right += padding
                bottom += padding

                box = (left, top, right, bottom)
                # crop the image using the bounding box
                with Image.open(file_path) as img:
                    cropped_img = img.crop(box)

                # save the cropped image to file
                save_path = os.path.join('/content/drive/MyDrive/DT/extracted', '{}_cropped_{}.jpg'.format(os.path.splitext(filename)[0], counter))
                cropped_img.save(save_path)

                print('bounds: {}'.format(box))
                counter += 1  # increment counter for each detected text

            if response.error.message:
                raise Exception(
                    '{}\nFor more info on error messages, check: '
                    'https://cloud.google.com/apis/design/errors'.format(
                        response.error.message))

detect_text_folder('/content/drive/MyDrive/DT')

Texts for file 1.JPG:

"S
파"
bounds: (325, 220, 916, 744)

"S"
bounds: (325, 532, 509, 744)

"파"
bounds: (545, 220, 916, 572)
Texts for file 11.JPG:

"쿵"
bounds: (630, 173, 881, 456)

"쿵"
bounds: (630, 173, 881, 456)
Texts for file 12.JPG:

"OINA"
bounds: (510, 303, 746, 594)

"OINA"
bounds: (510, 303, 746, 594)
Texts for file 2.JPG:

"커헉…!"
bounds: (109, 71, 380, 255)

"커헉"
bounds: (109, 71, 326, 255)

"…"
bounds: (193, 71, 369, 255)

"!"
bounds: (224, 71, 380, 255)
Texts for file 3.JPG:

"Holl
N"
bounds: (57, 327, 767, 695)

"Holl"
bounds: (58, 327, 391, 638)

"N"
bounds: (596, 515, 768, 695)
Texts for file 4.JPG:

"빤"
bounds: (72, -61, 368, 233)

"빤"
bounds: (72, -61, 368, 233)
Texts for file 5.JPG:

"야!
페링-
[121
최은욱 집 가냐?
1986
HHHHNNHHHHI"
bounds: (75, -47, 704, 823)

"야"
bounds: (75, 92, 271, 286)

"!"
bounds: (137, 92, 285, 286)

"페링"
bounds: (289, -41, 641, 204)

"["
bounds: (309, -41, 538, 209)

"121"
bounds: (358, -46, 638, 207)

"최은욱"
bounds: (140, 187, 406, 369)

"집"
bounds:

In [None]:
import cv2
import numpy as np

def crop_characters(image_path):
    # read image
    img = cv2.imread(image_path)

    # convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # apply Otsu thresholding
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # apply binary erosion and dilation to remove noise and gaps within characters
    kernel = np.ones((3, 3), np.uint8)
    eroded = cv2.erode(thresh, kernel, iterations=1)
    dilated = cv2.dilate(eroded, kernel, iterations=1)

    # find contours of the characters
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # get bounding box of the characters
    x, y, w, h = cv2.boundingRect(contours[0])

    # crop the image using the bounding box
    cropped_img = img[y:y+h, x:x+w]

    return cropped_img


In [None]:
for filename in os.listdir('/content/drive/MyDrive/DT/extracted'):
    if filename.endswith('.jpg'):
        file_path = os.path.join('/content/drive/MyDrive/DT/extracted', filename)
        cropped_img = crop_characters(file_path)
        save_path = os.path.join('/content/drive/MyDrive/DT/extracted/crop', filename)
        cv2.imwrite(save_path, cropped_img)
