# Text Augmentation via Google Translation API

## Preparation

In [None]:
import os  
from google.cloud import  translate

In [None]:
# You need to add environment variable if you use Translation API on local machine.
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path/to/json_file"
client = translate.Client()

### Checking of translation

In [None]:
text = u'Hello, world!'
trans_from = 'en'
trans_to = 'ja'
translated_dict = client.translate(text, source_language=trans_from, target_language=trans_to, model='nmt')

In [None]:
translated_dict['translatedText']

## Text Augmentor

In [None]:
import time
import sys

def text_augmentor(text_series, src_dst_lang, mid_lang, up_limit_per_100s=100000):
    """
    src_lang series -> mid_lang series -> dst_lang series
    
    up_limit_per_100s: upper limit of translation requests per 100s
    (Ref: https://cloud.google.com/translate/quotas)
    """
    up_limit_per_char = 100 / up_limit_per_100s
    failed_idx_list = []
    translated_list = []
    for idx, text in enumerate(text_series):
        strat_time = time.time()
        char_len = 0
        try:
            # check characters length
            char_len += len(text)
            # 1st translation
            mid_translated_text = client.translate(text, 
                                                   source_language=src_dst_lang, 
                                                   target_language=mid_lang, 
                                                   model='nmt')['translatedText']
            first_time = time.time()
            first_elapsed_time = first_time - strat_time
            trans_time_per_char =  first_elapsed_time / char_len
            # Avoiding upper limit of requests
            if trans_time_per_char < up_limit_per_char:
                margine = 5
                #　翻訳char数＋基準超過chara数に従って，sleep timeを決定する．
                sleep_time = (up_limit_per_char + trans_time_per_char) * (char_len + margine)
                time.sleep(sleep_time)
                char_len = 0
            else:
                time.sleep(up_limit_per_char*char_len)
                
            # 2nd translation (retranslation to dst language) 
            char_len += len(mid_translated_text)
            translated_text = client.translate(mid_translated_text, 
                                               source_language=mid_lang, 
                                               target_language=src_dst_lang, 
                                               model='nmt')['translatedText']
            translated_list.append(translated_text)
            second_elapsed_time = time.time() - first_time
            trans_time_per_char =  second_elapsed_time / char_len
            if trans_time_per_char < up_limit_per_char:
                margine = 5
                sleep_time = (up_limit_per_char + trans_time_per_char) * (char_len + margine)
                time.sleep(sleep_time)
                char_len = 0
            else:
                time.sleep(up_limit_per_char*char_len)
                
        except :
            # If translation is failed, you get current index
            failed_idx_list.append(idx)

        sys.stdout.write("\r {:d} of {:d} finished".format(idx+1, len(text_series)))
        #sys.stdout.write("\r mid: {:s}, final: {:s}".format(mid_translated_text, translated_text))
        sys.stdout.flush()
        
    translated_df = pd.DataFrame(translated_list, columns=["translated_mid_"+mid_lang])

    return translated_df["translated_mid_"+mid_lang], failed_idx_list