In [4]:
import io
import os

# Set environment variable
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "ocr-project-386007-133b41eb4439.json"

# Imports the Google Cloud client library
from google.cloud import vision

# Instantiates a client
client = vision.ImageAnnotatorClient()

# The name of the image file to annotate
file_name = os.path.abspath('poster.jpg')

# Loads the image into memory
with io.open(file_name, 'rb') as image_file:
    content = image_file.read()

image = vision.Image(content=content)

# Performs label detection on the image file
response = client.label_detection(image=image)
labels = response.label_annotations

print('Labels:')
for label in labels:
    print(label.description)
    
# Performs text detection on the image file
response = client.text_detection(image=image)
texts = response.text_annotations

print('Texts:')
for text in texts:
    print(text.description)

Labels:
World
Font
Line
Moon
Astronomical object
Illustration
Circle
Electric blue
Art
Graphics
Texts:
**
산책 소모임
해질 무렵
밤에
w
LOOD
w
c
JJ
m
**
산책
소모임
해질
무렵
밤
에
w
LOOD
w
c
JJ
m


In [7]:
# 텍스트 검출하고 바운딩 박스 그린 이미지 저장
import io
import os

from google.cloud import vision_v1
from PIL import Image, ImageDraw

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "ocr-project-386007-133b41eb4439.json"

client = vision_v1.ImageAnnotatorClient()
img_path = 'poster'
with io.open(img_path+'.jpg', "rb") as image_file:
    content = image_file.read()

image = vision_v1.types.Image(content=content)

response = client.text_detection(image=image)
texts = response.text_annotations[1:]

im = Image.open(io.BytesIO(content))

draw = ImageDraw.Draw(im)

for text in texts:
    vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
    draw.polygon(vertices, outline='red')

im.save(f"result_{img_path}.jpg")

In [97]:
texts

[description: "풀문"
 bounding_poly {
   vertices {
     x: 61
     y: 80
   }
   vertices {
     x: 183
     y: 81
   }
   vertices {
     x: 183
     y: 135
   }
   vertices {
     x: 61
     y: 134
   }
 },
 description: "스튜디오"
 bounding_poly {
   vertices {
     x: 188
     y: 80
   }
   vertices {
     x: 435
     y: 81
   }
   vertices {
     x: 435
     y: 136
   }
   vertices {
     x: 188
     y: 135
   }
 },
 description: "MARA"
 bounding_poly {
   vertices {
     x: 388
     y: 442
   }
   vertices {
     x: 410
     y: 442
   }
   vertices {
     x: 410
     y: 448
   }
   vertices {
     x: 388
     y: 448
   }
 }]

In [3]:
# 바운딩 박스로 검출된 부분 crop 해서 저장
import io
import os

from google.cloud import vision_v1
from PIL import Image, ImageDraw

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "ocr-project-386007-133b41eb4439.json"

client = vision_v1.ImageAnnotatorClient()
img_path = 'poster'

with io.open(img_path+".jpg", "rb") as image_file:
    content = image_file.read()

image = vision_v1.types.Image(content=content)

response = client.text_detection(image=image)
texts = response.text_annotations[1:]

im = Image.open(io.BytesIO(content))

for i, text in enumerate(texts):
    vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
    left = min(vertices, key=lambda x: x[0])[0]
    upper = min(vertices, key=lambda x: x[1])[1]
    right = max(vertices, key=lambda x: x[0])[0]
    lower = max(vertices, key=lambda x: x[1])[1]
    im_crop = im.crop((left, upper, right, lower))
    im_crop.save(f"result_{img_path}_{i}.jpg")

KeyboardInterrupt: 

In [101]:
# 바운딩 박스 부분 crop하고 글자는 검정 배경은 흰색으로 바꾸기(잘안됨)
import io
import os

from google.cloud import vision_v1
from PIL import Image

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "ocr-project-386007-133b41eb4439.json"

client = vision_v1.ImageAnnotatorClient()

with io.open("풀문스튜디오.jpg", "rb") as image_file:
    content = image_file.read()

image = vision_v1.types.Image(content=content)

response = client.text_detection(image=image)
texts = response.text_annotations[1:]

im = Image.open(io.BytesIO(content))

for i, text in enumerate(texts):
    vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
    vertices.sort(key=lambda x: x[0])
    left = vertices[0][0]
    right = vertices[-1][0]
    vertices.sort(key=lambda x: x[1])
    top = vertices[0][1]
    bottom = vertices[-1][1]

    # crop the bounding box area
    im_crop = im.crop((left, top, right, bottom))

    # get character region
    im_gray = im_crop
    im_bin = im_gray.point(lambda x: 0 if x > 128 else 255)
    region = im_bin.crop(im_bin.getbbox())

    # save character region
    region.save(f"result_{i}.jpg")

In [131]:
import cv2
image_path = r'studio.jpg'

# 이미지 로드
img = cv2.imread(image_path)

# 그레이 스케일로 변환
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 이진화
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# 경계선 검출
edges = cv2.Canny(thresh, 100, 200)

# 결과 출력
cv2.imshow('Original Image', img)
cv2.imshow('Inverted Image', thresh)
cv2.imshow('Detected Edges', edges)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [127]:
img = cv2.imread('cafe.jpg')
img.shape

(750, 750, 3)