In [None]:
export LANGUAGE=French
export LOCALE=fr
export BIGQUERY_ROLE=roles/bigquery.dataOwner
export CLOUD_STORAGE_ROLE=roles/storage.admin
export SERVICE_ACCOUNT=new-sa

##Task 1. Configure a service account to access the Machine Learning APIs, BigQuery, and Cloud Storage

In [None]:
# Create a new service account that provides credentials for the script
gcloud iam service-accounts create $SERVICE_ACCOUNT

In [None]:
# Once you have created the account, bind the roles/bigquery.dataOwner and roles/storage.admin roles to the Service Account
# to provide the IAM permissions required to process files from Cloud Storage and insert the result data into a BigQuery table

gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SERVICE_ACCOUNT@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=$BIGQUERY_ROLE

gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SERVICE_ACCOUNT@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=$CLOUD_STORAGE_ROLE

gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SERVICE_ACCOUNT@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/serviceusage.serviceUsageConsumer


##Task 2. Create and download a credential file for your service account

In [None]:
# When you have configured the service account permissions, download the JSON format IAM credentials file for the service account
gcloud iam service-accounts keys create $SERVICE_ACCOUNT-key.json --iam-account $SERVICE_ACCOUNT@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com

In [None]:
# Don't forget to configure the environment variable that supplies the name of the credential file for the Python script
export GOOGLE_APPLICATION_CREDENTIALS=${PWD}/$SERVICE_ACCOUNT-key.json

##Task 3. Modify the Python script to extract text from image files

In [None]:
# Copy the file analyze-images-v2.py from the Cloud Storage bucket
gsutil cp gs://qwiklabs-gcp-04-4c92716181fb/analyze-images-v2.py .

In [None]:
# Modify this Python script to extract text from the image files stored in your project bucket and then
# save the text data for each file into a text file that is written back to the same bucket

# sed -i "s/'en'/'${LOCAL}'/g" analyze-images-v2.py
# vi analyze-images-v2.py

# or [Open Editor]

In [None]:
# Dataset: image_classification_dataset
# Table name: image_text_detail
import os
import sys

# Import Google Cloud Library modules
from google.cloud import storage, bigquery, language, vision, translate_v2

if ('GOOGLE_APPLICATION_CREDENTIALS' in os.environ):
    if (not os.path.exists(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])):
        print ("The GOOGLE_APPLICATION_CREDENTIALS file does not exist.\n")
        exit()
else:
    print ("The GOOGLE_APPLICATION_CREDENTIALS environment variable is not defined.\n")
    exit()

if len(sys.argv)<3:
    print('You must provide parameters for the Google Cloud project ID and Storage bucket')
    print ('python3 '+sys.argv[0]+ '[PROJECT_NAME] [BUCKET_NAME]')
    exit()

project_name = sys.argv[1]
bucket_name = sys.argv[2]

# Set up our GCS, BigQuery, and Natural Language clients
storage_client = storage.Client()
bq_client = bigquery.Client(project=project_name)
nl_client = language.LanguageServiceClient()

# Set up client objects for the vision and translate_v2 API Libraries
vision_client = vision.ImageAnnotatorClient()
translate_client = translate_v2.Client()

# Setup the BigQuery dataset and table objects
dataset_ref = bq_client.dataset('image_classification_dataset')
dataset = bigquery.Dataset(dataset_ref)
table_ref = dataset.table('image_text_detail')
table = bq_client.get_table(table_ref)

# Create an array to store results data to be inserted into the BigQuery table
rows_for_bq = []

# Get a list of the files in the Cloud Storage Bucket
files = storage_client.bucket(bucket_name).list_blobs()
bucket = storage_client.bucket(bucket_name)

print('Processing image files from GCS. This will take a few minutes..')

# Process files from Cloud Storage and save the result to send to BigQuery
for file in files:
    if file.name.endswith('jpg') or  file.name.endswith('png'):
        file_content = file.download_as_string()

        # TBD: Create a Vision API image object called image_object
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image
        from google.cloud import vision_v1
        import io
        client = vision.ImageAnnotatorClient()

        # TBD: Detect text in the image and save the response data into an object called response
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection
        image = vision_v1.types.Image(content=file_content)
        response = client.text_detection(image=image)

        # Save the text content found by the vision API into a variable called text_data
        text_data = response.text_annotations[0].description

        # Save the text detection response data in <filename>.txt to cloud storage
        file_name = file.name.split('.')[0] + '.txt'
        blob = bucket.blob(file_name)
        # Upload the contents of the text_data string variable to the Cloud Storage file
        blob.upload_from_string(text_data, content_type='text/plain')

        # Extract the description and locale data from the response file
        # into variables called desc and locale
        # using response object properties e.g. response.text_annotations[0].description
        desc = response.text_annotations[0].description
        locale = response.text_annotations[0].locale

        # TBD: Save the description as the translated text into target_language eg. '', 'fe', and 'ja' according to the lab manual .
        if locale == 'fr':
            translated_text = desc
        else:
            # TBD: According to the target language pass the description data to the translation API
            # ref: https://googleapis.dev/python/translation/latest/client.html#google.cloud.translate_v2.client.Client.translate
            # Set the target_language locale to according to the lab activity)
            from google.cloud import translate_v2 as translate
            client = translate.Client()
            translation = translate_client.translate(text_data, target_language='fr')
            translated_text = translation['translatedText']
        print(translated_text)

        # if there is response data save the original text read from the image,
        # the locale, translated text, and filename
        if len(response.text_annotations) > 0:
            rows_for_bq.append((desc, locale, translated_text, file.name))

print('Writing Vision API image data to BigQuery...')
# Write original text, locale and translated text to BQ
# TBD: When the script is working uncomment the next line to upload results to BigQuery
errors = bq_client.insert_rows(table, rows_for_bq)
assert errors == []

In [None]:
# Run the partially completed script to check your progress to make sure you are on the right track

python3 analyze-images-v2.py $DEVSHELL_PROJECT_ID qwiklabs-gcp-04-4c92716181fb

##Task 4. Modify the Python script to translate the text using the Translation API

In [None]:
# Modify the second part of the Python script to identify any non-French text data found by the Vision API and
# use the Translation API to translate the original text into French

python3 analyze-images-v2.py $DEVSHELL_PROJECT_ID qwiklabs-gcp-04-4c92716181fb

##Task 5. Identify the most common language used in the signs in the dataset

In [None]:
# Remove the comment character from the line at the end of the script that uploads the data to BigQuery

python3 analyze-images-v2.py $DEVSHELL_PROJECT_ID qwiklabs-gcp-04-4c92716181fb

In [None]:
# Confirm that all necessary data has been loaded into BigQuery by running a query that counts the number of times it sees each separate language

# bq query --use_legacy_sql=false "SELECT locale,COUNT(locale) as lcount FROM image_classification_dataset.image_text_detail GROUP BY locale ORDER BY lcount DESC"

# or BQ -> +SQL
SELECT locale,COUNT(locale) as lcount FROM image_classification_dataset.image_text_detail GROUP BY locale ORDER BY lcount DESC