# Optical Character Recognition (OCR) with GCP
 
- https://pypi.org/project/google-cloud-vision/
- https://cloud.google.com/vision/docs/handwriting#vision-document-text-detection-python


# In bash terminal:

## [0] Install google-cloud-texttospeech 
sudo python3.11 -m pip install google-cloud-vision

## [1] Install dotenv library to set environmental variable in bash, that one can read in jupyter-notebook
sudo python3.11 -m pip install "python-dotenv[cli]"

## [2] Login-in to google 
gcloud auth login

## [3] Set the project location and the variable in the .env file
export location=$(echo "europe-west9") 

dotenv set LOCATION $location

## [4] Enable API services
gcloud services enable vision.googleapis.com 

## [5] Set the project_id
export PROJECT_ID=$(echo "XXX")

gcloud config set project $PROJECT_ID

dotenv set PROJECT_ID $PROJECT_ID

## [6] Create the application_default_credentials.json file
Directions: https://cloud.google.com/docs/authentication/provide-credentials-adc#local-dev

gcloud auth application-default login


# In python jupyter-notebook:

## Import environmental variables

In [1]:
# https://pypi.org/project/python-dotenv/
from dotenv import load_dotenv, find_dotenv
dot_env_file_exist = load_dotenv(find_dotenv()) # read local .env file

# Returns true or false if .env exists in current directory
print('dot_env_file_exist: ', dot_env_file_exist)

dot_env_file_exist:  True


## Credentials

In [None]:
import os
from os import environ
import numpy as np

# Load bash environmental variables into python jupyter-notebook
PROJECT_ID = os.getenv("PROJECT_ID")
print('PROJECT_ID: ', PROJECT_ID)

LOCATION = os.getenv("LOCATION")
print('LOCATION: ', LOCATION)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../.config/gcloud/application_default_credentials.json"


In [3]:
def detect_document(path):
    """Detects document features in an image."""
    from google.cloud import vision

    client = vision.ImageAnnotatorClient()

    with open(path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.document_text_detection(image=image)


    out = []
    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            print(f"\nBlock confidence: {block.confidence}\n")

            for paragraph in block.paragraphs:
                print("Paragraph confidence: {}".format(paragraph.confidence))

                for word in paragraph.words:
                    word_text = "".join([symbol.text for symbol in word.symbols])
                    print(
                        "Word text: {} (confidence: {})".format(
                            word_text, word.confidence
                        )
                    )
                    out.append(word_text)

                    for symbol in word.symbols:
                        print(
                            "\tSymbol: {} (confidence: {})".format(
                                symbol.text, symbol.confidence
                            )
                        )
                    

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )

    
    return out


In [4]:
# folder_path = "/home/oem2/Pictures/Screenshots/exam1"
# folder_path = "/home/oem2/Pictures/Screenshots/exam2"
folder_path = "/home/oem2/Pictures/Screenshots/Azure"

files = os.listdir(folder_path)

dd = {}
for i in files:
    filepath = os.path.join(folder_path, i)
    # print('filepath: ', filepath)
    out = detect_document(filepath)
    
    img_text = ' '.join(out)
    # print('img_text: ', img_text)
    dd[i] = img_text


Block confidence: 0.9271661639213562

Paragraph confidence: 0.9243646264076233
Word text: O (confidence: 0.47004052996635437)
	Symbol: O (confidence: 0.47004052996635437)
Word text: y (confidence: 0.9137423038482666)
	Symbol: y (confidence: 0.9137423038482666)
Word text: = (confidence: 0.5140128135681152)
	Symbol: = (confidence: 0.5140128135681152)
Word text: [ (confidence: 0.8388254642486572)
	Symbol: [ (confidence: 0.8388254642486572)
Word text: 1 (confidence: 0.9254067540168762)
	Symbol: 1 (confidence: 0.9254067540168762)
Word text: , (confidence: 0.9689997434616089)
	Symbol: , (confidence: 0.9689997434616089)
Word text: 0.22 (confidence: 0.9876518249511719)
	Symbol: 0 (confidence: 0.9920816421508789)
	Symbol: . (confidence: 0.9899270534515381)
	Symbol: 2 (confidence: 0.9913996458053589)
	Symbol: 2 (confidence: 0.9771989583969116)
Word text: , (confidence: 0.9784817695617676)
	Symbol: , (confidence: 0.9784817695617676)
Word text: 0.5 (confidence: 0.9798345565795898)
	Symbol: 0 (con