In [10]:
import os
import json
import pandas as pd
from dsutils.de.files import xls_to_csv, get_data_path, get_datafile_path
from google.oauth2 import service_account

# Imports the Google Cloud Translation library
from google.cloud import translate

# Import google cloud storage:
from google.cloud import storage

In [11]:
## Input
glossary_path = get_datafile_path("google_glossary.csv")
path_to_json_key = get_datafile_path("vsi-esv-ab828a611479.json")

## Output
bucket_name = 'glossary_bucket' # store the glossaries in the "bucket name" bucket/folder/directory
glossary_name = "EFSA_EPPO_NCBI_glossary.csv" #the name the csv file will have after being stored in google cloud

## Env Variables
with open(path_to_json_key, 'r') as file:
    json_authentication_file = json.load(file)

project_id = json_authentication_file['project_id']
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path_to_json_key

# Load credentials

In [12]:
credentials = service_account.Credentials.from_service_account_file(
    path_to_json_key
)

#scoped_credentials = credentials.with_scopes(
#    ['https://www.googleapis.com/auth/cloud-platform'])
credentials

<google.oauth2.service_account.Credentials at 0x7f7ad4db6e90>

In [13]:
project_id

'vsi-esv'

# Simple translation test

In [14]:
# snippet adapted from 
# https://cloud.google.com/translate/docs/advanced/translating-text-v3

# Initialize Translation client
def test_translate_text_1(
    text="Sample text.",  # text for the test
    project_id = project_id,            # your project ID
    source_lang='en',
    target_lang='fr'
    ):
    """Translating Text."""

    client = translate.TranslationServiceClient()

    location = "global"

    parent = f"projects/{project_id}/locations/{location}"

    # Translate text from English to French
    # Detail on supported types can be found here:
    # https://cloud.google.com/translate/docs/supported-formats
    response = client.translate_text(
        request={
            "parent": parent,
            "contents": [text], # you can also send several strings!
            "mime_type": "text/plain",  # other types: text/plain, text/html
            "source_language_code": source_lang,
            "target_language_code": target_lang,
        }
    )

    # Display the translation for each input text provided
    for translation in response.translations:
        print("Translated text: {}".format(translation.translated_text))


In [15]:
## Test
test_translate_text_1(
    text = "the weather is bad today.",
    project_id= project_id
)


Translated text: le temps est mauvais aujourd'hui.


## Create glossary

Format: equivalent term set CSV

In [16]:
# Using Equivalent term sets 
# https://cloud.google.com/translate/docs/advanced/glossary#equivalent_term_sets_csv

In [17]:
glossary_df = pd.read_csv(
    glossary_path
    )
#glossary_df

## Upload csv to google cloud storage

In [18]:
# to upload a file to google cloud storage, I need the project to give me access to a google storage bucket!

# Buckets are like folders/directories, but for Google storage
# read more about buckets:
# https://cloud.google.com/storage/docs/buckets

# how to create buckets from the command line:
# https://cloud.google.com/storage/docs/creating-buckets

# to use the credentials in a command line session, run in the terminal:
# `export GOOGLE_APPLICATION_CREDENTIALS="KEY_PATH" `
# where "KEY_PATH" is a path to a JSON with your credentials
# (from 
# https://cloud.google.com/translate/docs/setup#using_the_service_account_key_file_in_your_environment
# )


client = storage.Client()
## Upload
#legacy: bucket = client.get_bucket(bucket_name) 
bucket = client.bucket(bucket_name)
blob = bucket.blob(glossary_name) 
blob.upload_from_filename(
    glossary_path
    )

csv_gs_URI =  "gs://" + bucket_name + "/"+ glossary_name # URL of the google storage file

Forbidden: 403 POST https://storage.googleapis.com/upload/storage/v1/b/glossary_bucket/o?uploadType=multipart: {
  "error": {
    "code": 403,
    "message": "elisa-lubrini@vsi-esv.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object.",
    "errors": [
      {
        "message": "elisa-lubrini@vsi-esv.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object.",
        "domain": "global",
        "reason": "forbidden"
      }
    ]
  }
}
: ('Request failed with status code', 403, 'Expected one of', <HTTPStatus.OK: 200>)

## Create a test glossary

In [None]:
# adapted from
#https://cloud.google.com/translate/docs/advanced/glossary#equivalent_term_sets_glossary

In [None]:
from google.cloud import translate_v3 as translate
languages = glossary_df.columns

def test_create_glossary_1(
    project_id="YOUR_PROJECT_ID",
    input_uri="YOUR_INPUT_URI", # where is the glossary in your google cloud storage
    glossary_id="YOUR_GLOSSARY_ID", # your name for the glossary?
    timeout=180,
    languages=['en', 'fr'],
):
    """
    Create a equivalent term sets glossary. Glossary can be words or
    short phrases (usually fewer than five words).
    https://cloud.google.com/translate/docs/advanced/glossary#format-glossary
    """
    client = translate.TranslationServiceClient()

    # Supported language codes: https://cloud.google.com/translate/docs/languages

    
    location = "us-central1"  # The location of the glossary

    name = client.glossary_path(project_id, location, glossary_id)
    language_codes_set = translate.types.Glossary.LanguageCodesSet(
        language_codes=languages
    )

    gcs_source = translate.types.GcsSource(input_uri=input_uri)

    input_config = translate.types.GlossaryInputConfig(gcs_source=gcs_source)

    glossary = translate.types.Glossary(
        name=name, language_codes_set=language_codes_set, input_config=input_config
    )

    parent = f"projects/{project_id}/locations/{location}"
    # glossary is a custom dictionary Translation API uses
    # to translate the domain-specific terminology.
    operation = client.create_glossary(parent=parent, glossary=glossary)

    result = operation.result(timeout)
    print("Created: {}".format(result.name))
    print("Input Uri: {}".format(result.input_config.gcs_source.input_uri))

In [None]:
test_create_glossary_1(
    project_id=project_id,
    input_uri= csv_gs_URI, # URL of the csv in google cloud
    glossary_id="test_1" #name for the created glossary
)

NameError: name 'csv_gs_URI' is not defined

# Translate text with a glossary

In [None]:
# adapted from
# https://cloud.google.com/translate/docs/advanced/glossary#v3

In [None]:

from google.cloud import translate


def test_translate_text_with_glossary(
    text="YOUR_TEXT_TO_TRANSLATE",
    project_id="YOUR_PROJECT_ID",
    glossary_id="YOUR_GLOSSARY_ID",
    source_language='en',
    target_language='en',
):
    """Translates a given text using a glossary."""

    client = translate.TranslationServiceClient()
    location = "us-central1"
    parent = f"projects/{project_id}/locations/{location}"

    glossary = client.glossary_path(
        project_id, "us-central1", glossary_id  # The location of the glossary
    )

    glossary_config = translate.TranslateTextGlossaryConfig(glossary=glossary)

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    response = client.translate_text(
        request={
            "contents": [text],
            "target_language_code": target_language,
            "source_language_code": source_language,
            "parent": parent,
            "glossary_config": glossary_config            
        }
    )

    print("Translated text: \n")
    for translation in response.glossary_translations:
        print("\t {}".format(translation.translated_text))



: 

In [None]:
test_translate_text_with_glossary(
    text= "La chrysomèle rayée du concombre est pas très connue",
    project_id= project_id,
    glossary_id= csv_gs_URI,
    source_language='fr',
    target_language='en'
)

: 

: 