In [28]:
from typing import List
import pandas as pd
import json as js
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../genuine-essence-364222-54dfa19feae0.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../genuine-essence-364222-e5a8b39d8c83.json"
"""imports for translator api"""
import six
from google.cloud import translate_v2 as translate

"""imports for BigQuery"""
from google.cloud import bigquery
from google.cloud.exceptions import NotFound

"""imports for fast api"""
from fastapi import FastAPI


In [29]:
def get_translation(language_id: str, text: str) -> str:
    """Translates text into the target language using google api"""

    translate_client = translate.Client()

    if isinstance(text, six.binary_type):
        text = text.decode("utf-8")

    result = translate_client.translate(text, target_language=language_id)
    return result["translatedText"]

In [30]:
def get_audio(language_id: str,text: str):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code=language_id
    #     name="en-US-Standard-C",
    #     ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    # we might be able to get another kind of response by changing this
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )

    return response.audio_content

In [31]:
def get_list_of_words(input_file: str) -> List[str]:
    """Get the list of words that we will use"""
    f = open(input_file,"r")
    words = []
    for line in f.readlines():
        words.append(line.rstrip('\n'))
    return words

In [32]:
def get_list_of_languages() -> dict:
    """Get the list of languages"""
    f = open('../data/lan-dict.json')
    data = js.load(f)
    f.close()
    return data

In [33]:
def get_list_of_language_ids() -> List[str]:
    """Get the list of ids of languages"""
    ids = []
    f = open('../data/lan-dict.json')
    data = js.load(f)
    f.close()
    return data.values()


In [34]:
def get_table(type: str, input_file: str) -> pd.DataFrame:
    """call google api to make the data of translations (type is either text or audio)"""
    langs = get_list_of_languages()
    words = get_list_of_words(input_file)
    f = {}
    if type == 'text':
        for (lang,id) in langs.items():
            f[lang] = []
            for word in words:
                f[lang].append(get_translation(id,word))        
    elif type == 'audio':
        print("bye")
    else:
        raise ValueError()
    return pd.DataFrame(f)


In [35]:
def add_table(input_file: str, output_file: str):
    """make the data table into a csv file"""
    d = get_table("text", input_file)
    d.to_csv(output_file,index=False)
#    d = get_table("audio")
#    d.to_csv('../data/audio.csv',index=False)
# add_table('../data/essentials.txt','../data/essentials.csv')

In [25]:
def make_dataset():
    """make data set with BigQuery for the first time if it doesn't exist already"""

   # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set dataset_id to the ID of the dataset to create.
    dataset_id = "genuine-essence-364222.basic_words"

    # Construct a full Dataset object to send to the API.
    dataset = bigquery.Dataset(dataset_id)

    # TODO(developer): Specify the geographic location where the dataset should reside.
    dataset.location = "EU"

    # Send the dataset to the API for creation, with an explicit timeout.
    # Raises google.api_core.exceptions.Conflict if the Dataset already
    # exists within the project.

    try:
        client.get_dataset(dataset_id)  # Make an API request.
        print("Dataset {} already exists".format(dataset_id))
    except NotFound:
        print("Dataset {} is not found".format(dataset_id))
        dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
        print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
make_dataset()

Dataset genuine-essence-364222.basic_words already exists


In [56]:
from google.cloud.bigquery.schema import SchemaField

def create_words_table():
    client = bigquery.Client()

    # TODO(dev): Change table_id to the full name of the table you want to create.
    table_id = "genuine-essence-364222.basic_words.words"

    langs = get_list_of_language_ids()
    schema = []
    for l in langs:
        schema.append(bigquery.SchemaField(l,"STRING",mode="REQUIRED"))
    # schema = [
    #     bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
    #     bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    # ]
    print(schema)

    client.delete_table(table_id, not_found_ok=True)
    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )

# create_words_table()

[SchemaField('af', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('sq', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('am', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('ar', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('hy', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('as', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('ay', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('az', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('bm', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('eu', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('be', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('bn', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('bho', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('bs', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('bg', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('ca', 'STRING', 'REQUIRED', None, None, (