In [None]:
import requests
import pandas as pd
import time
from functools import lru_cache
import threading
pd.set_option('display.max_colwidth', None)
import os

In [None]:
def time_logger(func):
  def wrapper(*args, **kwargs):
    print(time.strftime("%H:%M:%S",time.localtime()))
    func(*args, **kwargs)
  return wrapper

In [None]:
def get_progress(filepath):
  num_lines = int(0)
  try:
    with open(filepath,'r', encoding='utf-8') as current_file:
      num_lines += int(len(current_file.readlines()))
      return num_lines
  except:
    return int()

In [None]:
# Flush the buffer to the file
@time_logger
def write_list_to_file(list_to_write, out_filepath):
  with open(out_filepath, 'a', encoding='utf-8') as file:
    file.writelines([str(x)+"\n" for x in list_to_write])

In [None]:
# Call the TranslatePlus API
@lru_cache(maxsize=5)
def translate_api(session, text, retry = True):
  translate_url = "https://api.translateplus.io/v1/translate"
  header = {"X-API-KEY":"a22eb1566341478d30276ee39cccbe319ba454d1"}
  json_obj = {
    "text": text,
    "source": "en",
    "target": "mt"
  }

  try:
    response = session.post(translate_url, json=json_obj, headers=header).json()
    result = response['translations']['translation']
  except:
    if retry:
      time.sleep(1)
      result = translate_api(session, text, False)
    else:
      result = f"COULD NOT TRANSLATE: [{text}]"
  return result

In [None]:
@time_logger
def buffer_translate(input_list, buffer_size, out_filepath):
  total_sentences = len(input_list)
  buffers_flushed = 0
  session = requests.Session()
  buffer_list = []

  for sentence in input_list:
    buffer_list.append(translate_api(session, sentence))
    if len(buffer_list) >= buffer_size:
      write_list_to_file(buffer_list, out_filepath)
      buffer_list.clear()
      buffers_flushed += 1
      print(out_filepath + ": " + str(int((100*buffers_flushed*buffer_size)/total_sentences)) + "% complete")
  else:
    write_list_to_file(buffer_list, out_filepath)
  print(out_filepath + " DONE")

In [None]:
BLEU_ENG_FILEPATH = 'C:\\Users\\bijgu\\Desktop\\Test\\bleu\\common.en'
BLEU_TRANS_FILEPATH = 'C:\\Users\\bijgu\\Desktop\\Test\\bleu\\'

In [None]:
eng_text = []
with open(BLEU_ENG_FILEPATH,'r',encoding='utf-8') as eng_file:
  eng_text = [x.rstrip("\n") for x in eng_file.readlines()]

In [None]:
batch_1 = eng_text[:2000]
batch_2 = eng_text[2000:4000]
batch_3 = eng_text[4000:6000]
batch_4 = eng_text[6000:8000]
batch_5 = eng_text[8000:]


In [None]:
t1=threading.Thread(target=buffer_translate,args=(batch_1, 200, os.path.join(BLEU_TRANS_FILEPATH,'batch_1.txt')))
t2=threading.Thread(target=buffer_translate,args=(batch_2, 200, os.path.join(BLEU_TRANS_FILEPATH,'batch_2.txt')))
t3=threading.Thread(target=buffer_translate,args=(batch_3, 200, os.path.join(BLEU_TRANS_FILEPATH,'batch_3.txt')))
t4=threading.Thread(target=buffer_translate,args=(batch_4, 200, os.path.join(BLEU_TRANS_FILEPATH,'batch_4.txt')))
t5=threading.Thread(target=buffer_translate,args=(batch_5, 200, os.path.join(BLEU_TRANS_FILEPATH,'batch_5.txt')))

In [None]:
t1.start()
t2.start()
t3.start()
t4.start()
t5.start()

t1.join()
t2.join()
t3.join()
t4.join()
t5.join()

21:18:08
21:18:08
21:18:08
21:18:08
21:18:08
21:19:12
C:\Users\bijgu\Desktop\Test\bleu\batch_1.txt: 10% complete
21:19:17
C:\Users\bijgu\Desktop\Test\bleu\batch_4.txt: 10% complete
21:19:18
C:\Users\bijgu\Desktop\Test\bleu\batch_2.txt: 10% complete
21:19:18
C:\Users\bijgu\Desktop\Test\bleu\batch_5.txt: 4% complete
21:19:23
C:\Users\bijgu\Desktop\Test\bleu\batch_3.txt: 10% complete
21:20:31
C:\Users\bijgu\Desktop\Test\bleu\batch_2.txt: 20% complete
21:20:35
C:\Users\bijgu\Desktop\Test\bleu\batch_4.txt: 20% complete
21:20:35
C:\Users\bijgu\Desktop\Test\bleu\batch_3.txt: 20% complete
21:20:40
C:\Users\bijgu\Desktop\Test\bleu\batch_5.txt: 9% complete
21:20:41
C:\Users\bijgu\Desktop\Test\bleu\batch_1.txt: 20% complete
21:21:44
C:\Users\bijgu\Desktop\Test\bleu\batch_2.txt: 30% complete
21:21:52
C:\Users\bijgu\Desktop\Test\bleu\batch_4.txt: 30% complete
21:21:56
C:\Users\bijgu\Desktop\Test\bleu\batch_3.txt: 30% complete
21:21:59
C:\Users\bijgu\Desktop\Test\bleu\batch_5.txt: 13% complete
21:22

In [None]:
print("Done!")

Done!


In [None]:
def merge_file_content(current_list, file):
  with open(file, 'r', encoding='utf-8') as i_file:
    file_data = [x.strip("\n") for x in i_file.readlines()]
  current_list.extend(file_data)
  return current_list

list_orig = []

file_list = [x for x in os.listdir(BLEU_TRANS_FILEPATH) if x[-4:] == '.txt']
file_list = sorted(file_list)

for txt_file in file_list:
  merge_file_content(list_orig, txt_file)

with open('translated_api.mt','w',encoding='utf-8') as o_file:
  o_file.writelines([x+"\n" for x in list_orig])

In [None]:
len(list_orig)

12322