In [1]:
import numpy as np
import cv2

# 文件OCR
### 前處理
* step1: 邊緣檢測
* step2: 獲取輪廓
* step3: 變換

In [2]:
def order_points(pts):
    rect = np.zeros((4, 2), dtype='float32')
    # 0123 分別是左上 右上 右下 左下
    # 計算左上 右下
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    
    # 計算右上 左下
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def four_point_transform(image, pts):
    # 獲取輸入座標
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    
    # 計算輸入的w & h
    # 計算上下邊寬度, 取最大值(因為有近似所以可能有差)
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # 計算左右邊高度, 取最大值(因為有近似所以可能有差)
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # 變換後對應座標位置
    # 目標點
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")
    # 計算變換矩陣
    # 投影變換
    # 變換矩陣 M
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
    return warped

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

def cv_show(img, name='image'):
    cv2.imshow(name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()    

In [3]:
path = 'images/receipt.jpg'

In [4]:
image = cv2.imread(path)

In [5]:
ratio = image.shape[0] / 500
orig = image.copy()
image = resize(orig, height=500)

In [6]:
# 前處理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)

In [7]:
print('step 1 邊緣檢測')
cv_show(image)
cv_show(edged)

step 1 邊緣檢測


In [8]:
# 輪廓檢測
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[0]
# 將檢測出的輪廓進行排序
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] # 取前幾大的

In [9]:
for c in cnts:
    # 計算輪廓相似
    peri = cv2.arcLength(c, True)
    # c: 輸入的點集, 
    # epsilon: 從原始羅擴到近似輪廓的最大距離, 準確度參數, 越小越精準
    # True: 表示封閉的
    approx = cv2.approxPolyDP(c, 0.02*peri, True)  # 用長度的2%當作精度
    # 4個點就拿出來(矩形)
    if len(approx) == 4:
        screenCnt = approx
        break

In [10]:
print('step 2 獲取輪廓')
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv2.imshow('Outline', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

step 2 獲取輪廓


In [11]:
# 透視變換, 目的是要將原本歪斜的圖片轉正
# 將原本abcd 四個點, 透過一個矩陣(M)轉換成 ABCD
# 參數: 1. 原始輸入座標 2. 轉換後的座標
# 2: A(0, 0) B(w, 0) C(w, h) D(0, h)
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)

In [12]:
# 二值化
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]


In [13]:
print('step 3 變換')
# cv2.imshow('Original', resize(orig, height = 650))
cv2.imshow('Scanned', resize(ref, height = 650))
cv2.waitKey(0)

step 3 變換


27

In [14]:
cv2.imwrite('scan.jpg', ref)

True

### OCR

In [15]:
import pytesseract

In [16]:
image = cv2.imread('scan.jpg')
text = pytesseract.image_to_string(image)
print(text)

we KK KR OK KK KK OK KK

WHOL
FOODS

WHOLE FOODS MARKET

399 POST RD WEST

36%
$65
365
365

BROTH

 

E
S

WESTPORT, CT 06880
(203) 227-6858

BACON
BACON
BACON
BACUN

LS
LS
LS
LS

CHIC

FLOUR ALMOND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15

BEEF

JUICE COF CASHEW

L

DOCS PINT ORGANIC
HNY ALMOND Bui

xeee TAX

.00

BAL

TER

NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP

4 99
4.99
4.99
39
19
99
.80

aN

et
ao — NH

6.49
5.04
8.99
14.49
9.99
101.33

TY

nr? 7 Th

mara THN eh

