Skip to content

Commit 30cd279

Browse files
committed
tesserocr更好用
1 parent 7692ce7 commit 30cd279

File tree

9 files changed

+62
-0
lines changed

9 files changed

+62
-0
lines changed

my01-OCR文字识别/Tessract-OCR/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ http://www.pyimagesearch.com/2017/07/10/using-tesseract-ocr-python/
77
* ubuntu: sudo apt-get install tesseract-ocr
88
* pip install pillow
99
* pip install pytesseract
10+
* pip install tesserocr
1011

12+
# tesserocr更好用
1113

1214
## 运行
1315
* 标准输出,不用输出到TXT文件:
File renamed without changes.
1.51 KB
Loading
1.18 KB
Loading
1.18 KB
Loading
1.27 KB
Loading
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# -*-coding:utf8-*-#
2+
3+
__author__ = 'play4fun'
4+
"""
5+
create time:16/10/21 11:44
6+
"""
7+
8+
from tesserocr import PyTessBaseAPI
9+
10+
images = ['/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample1.jpeg', '/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample2.jpeg',
11+
'/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample3.jpeg']
12+
13+
with PyTessBaseAPI() as api:
14+
for img in images:
15+
api.SetImageFile(img)
16+
print('text:', api.GetUTF8Text())
17+
print('-----')
18+
print(api.AllWordConfidences())
19+
print('-----')
20+
# api is automatically finalized when used in a with-statement (context manager).
21+
# otherwise api.End() should be explicitly called when it's no longer needed.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# -*-coding:utf8-*-#
2+
3+
__author__ = 'play4fun'
4+
"""
5+
create time:16/10/21 11:47
6+
"""
7+
8+
import tesserocr
9+
from PIL import Image
10+
11+
print(tesserocr.tesseract_version()) # print tesseract-ocr version
12+
print(tesserocr.get_languages()) # prints tessdata path and list of available languages
13+
14+
image = Image.open('sample.jpg')
15+
print(tesserocr.image_to_text(image)) # print ocr text from image
16+
# or
17+
print(tesserocr.file_to_text('sample.jpg'))
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# -*-coding:utf8-*-#
2+
3+
__author__ = 'play4fun'
4+
"""
5+
create time:16/10/21 11:47
6+
"""
7+
# Orientation and script detection (OSD)
8+
9+
from PIL import Image
10+
from tesserocr import PyTessBaseAPI, PSM
11+
12+
with PyTessBaseAPI(psm=PSM.AUTO_OSD) as api:
13+
image = Image.open("/usr/src/tesseract/testing/eurotext.tif")#No such file
14+
api.SetImage(image)
15+
api.Recognize()
16+
17+
it = api.AnalyseLayout()
18+
orientation, direction, order, deskew_angle = it.Orientation()
19+
print("Orientation: {:d}".format(orientation))
20+
print("WritingDirection: {:d}".format(direction))
21+
print("TextlineOrder: {:d}".format(order))
22+
print("Deskew angle: {:.4f}".format(deskew_angle))

0 commit comments

Comments
 (0)