Ref: https://medium.com/searce/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361

In [1]:
!pip3 install --upgrade pip
!pip3 install --upgrade google-cloud-vision
!pip3 install --upgrade google-cloud-storage

Requirement already up-to-date: pip in /usr/local/envs/py3env/lib/python3.5/site-packages (19.0.3)
Requirement already up-to-date: google-cloud-vision in /usr/local/envs/py3env/lib/python3.5/site-packages (0.36.0)
Requirement already up-to-date: google-cloud-storage in /usr/local/envs/py3env/lib/python3.5/site-packages (1.14.0)


## Text detection samples

**Text Detection** performs Optical Character Recognition. It detects and extracts text within an image with support for a broad range of languages. It also features automatic language identification.

In [3]:
import io

### Detecting document text in a local image

In [4]:
# Ref: https://cloud.google.com/vision/docs/ocr
# Ref: https://cloud.google.com/vision/docs/detecting-text#detecting_text_in_a_local_image

def detect_text(path):
    """Detects text in the file."""
    from google.cloud import vision
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    print('Texts:')

    for text in texts:
        print('\n"{}"'.format(text.description))

        vertices = (['({},{})'.format(vertex.x, vertex.y)
                    for vertex in text.bounding_poly.vertices])

        print('bounds: {}'.format(','.join(vertices)))

In [5]:
detect_text('./sample_img_files/text_detection_image_demo.png')

Texts:

"STATEMENT
WACHOVIA NATIONAL BANK,
WINSTON, N. C.
JANUARY 29TH, 1906.
(CONDENSED FROM REPORT TO THE COMPTROLLER OF THE CURRENCY.)
RESOURCES.
LIABILITIES
150,000.00
Loans, including Overdrafts 511,789.61 | Capital.
U. S. Bonds and Premiums 52,300.00 Surplus and Undivided Profits 171,167.89
Real Estate, Furniture and
Circulation...
50,000.00
4,500.00
2,500.00
Cash and Due from Banks... 268,231 30
$839,320.91
Fixtures,...
Redemption fund with U. S.
Erostrs..408.1.12
$889,320.91
W. A. LEMLY, President.
JAS. A. GRAY, Cashier.
"
bounds: (21,26),(585,26),(585,318),(21,318)

"STATEMENT"
bounds: (237,26),(361,26),(361,38),(237,38)

"WACHOVIA"
bounds: (65,53),(235,53),(235,71),(65,71)

"NATIONAL"
bounds: (249,53),(417,53),(417,71),(249,71)

"BANK,"
bounds: (434,53),(540,53),(540,71),(434,71)

"WINSTON,"
bounds: (231,74),(318,74),(318,85),(231,85)

"N."
bounds: (328,74),(344,74),(344,84),(328,84)

"C."
bounds: (353,74),(367,74),(367,84),(353,84)

"JANUARY"
bounds: (200,91),(286,91),(286,9

### Detecting text in a remote image

For your convenience, the Vision can perform Text detection directly on an image file located in Google Cloud Storage or on the Web without the need to send the contents of the image file in the body of your request.

In [6]:
# Ref: https://cloud.google.com/vision/docs/ocr
# Ref: https://cloud.google.com/vision/docs/detecting-text#detecting_text_in_a_remote_image

def detect_text_uri(uri):
    """Detects text in the file located in Google Cloud Storage or on the Web.
    """
    from google.cloud import vision
    client = vision.ImageAnnotatorClient()
    image = vision.types.Image()
    image.source.image_uri = uri

    response = client.text_detection(image=image)
    texts = response.text_annotations
    print('Texts:')

    for text in texts:
        print('\n"{}"'.format(text.description))

        vertices = (['({},{})'.format(vertex.x, vertex.y)
                    for vertex in text.bounding_poly.vertices])

        print('bounds: {}'.format(','.join(vertices)))

In [7]:
img_uri = 'https://00e9e64baca7c1cf5583bffe9197d002d133a5ac52c625d377-apidata.googleusercontent.com/download/storage/v1/b/ocr-data-source/o/sample_img_files%2Ftext_detection_image_demo.png?qk=AD5uMEsC__fAMxwRWUjPRKAcqaqBKRSoKxTNn4ZrnBsGHTbHyp-C41jqhUICfGwz0kZ_XUTKc7VBy2B6mnpuIZN-7vkYMdU3W3zQE-7kLQko6Vs0KnRR41uEdBbMSE0ByyBr9a6-p8Zz0c4_xHUeCXz-yTNOFttTMzNTTAUBgdlKQN0NGZS7Q-UxbnpSrDiJyTGIPOfgI89a9zWT-yUep_F0cIjMLyJ2qw3gm6cUNL47GYCrDhq2_wLQH6n-XAJXXzMv20BjTCDi_0Y1bVJoMKXLG2r7XY_gWzuIDuh8VM3IYgxQ3_nQZr8eEUm0HGHQ9XRmEw1FBroT1Vi1ThA7voxRXp3t9EoNYSALavuh7Ad6kLq949VRw3RFVi5gOKvuAneISpD1PwVV_YeibiaQvRrJxai_JWGtQSpxABrTEBL5BfDNjzsTSXAuzwbpjvYGk8UhnexZ7txgfe2857Lh9k--hjrVyuH8FUps2Cv1nSqaSo-FzjTmeU3yepMreFKRl1JfJBHokHusc5A6PIwj0SmnrvRxRtESPFijumbzf3QvqlgKvND5NgdjgC0cucMCtVOssvfsccG2bIS_hwO-jUUlYvgdqzaAtdK-Ylg_oIvb20J0ZHlUiSGVjoczs9VJ1gPiO48CdjubLhDKE-ve1D7mvwNHcgqVPC6Oj5oLuXKP04iZX9V0GLLhMxQBX-cXyOr-tHHnJVdAvjAKHi6B--RZ1boKKiDDLCOKwbqtQaqKyWpxX6Q5r3TXlW5yNt8gMRrkHTg7TRIyNohjTmpz-aVsa91MiBDNkM6PViEHFgKtTXhozPZV8VpkUc2UteOqQ2NeYCYAkQ5x'

In [8]:
detect_text_uri(img_uri)

Texts:

"STATEMENT
WACHOVIA NATIONAL BANK,
WINSTON, N. C.
JANUARY 29TH, 1906.
(CONDENSED FROM REPORT TO THE COMPTROLLER OF THE CURRENCY.)
RESOURCES.
LIABILITIES
150,000.00
Loans, including Overdrafts 511,789.61 | Capital.
U. S. Bonds and Premiums 52,300.00 Surplus and Undivided Profits 171,167.89
Real Estate, Furniture and
Circulation...
50,000.00
4,500.00
2,500.00
Cash and Due from Banks... 268,231 30
$839,320.91
Fixtures,...
Redemption fund with U. S.
Erostrs..408.1.12
$889,320.91
W. A. LEMLY, President.
JAS. A. GRAY, Cashier.
"
bounds: (21,26),(585,26),(585,318),(21,318)

"STATEMENT"
bounds: (237,26),(361,26),(361,38),(237,38)

"WACHOVIA"
bounds: (65,53),(235,53),(235,71),(65,71)

"NATIONAL"
bounds: (249,53),(417,53),(417,71),(249,71)

"BANK,"
bounds: (434,53),(540,53),(540,71),(434,71)

"WINSTON,"
bounds: (231,74),(318,74),(318,85),(231,85)

"N."
bounds: (328,74),(344,74),(344,84),(328,84)

"C."
bounds: (353,74),(367,74),(367,84),(353,84)

"JANUARY"
bounds: (200,91),(286,91),(286,9