In [44]:
from google.cloud import translate

# Instantiates a client
translate_client = translate.Client().from_service_account_json(
    '/home/jupyter/.config/gcloud/phonetic-srch-9869e4839c17.json')


LANG_ARABIC = 'ar'
LANG_ENGLISH = 'en'

def chunks(l, n):
    # For item i in a range that is a length of l,
    for i in range(0, len(l), n):
        # Create an index range for l of n items:
        yield l[i:i+n]

def translate(values, source_lang=None, dest_lang=None):
    translation = []
    lower = [x.lower() for x in values]
    chuned_lower = list(chunks(lower, 2))
    for chunk in chuned_lower:
        translation += translate_client.translate(
            chunk, 
            source_language=source_lang,
            target_language=dest_lang,
            model='nmt')
    return translation

In [33]:
import psycopg2
from configparser import ConfigParser

def config(filename='prepare_data.ini', section='phonetic'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
 
    return db

def db_connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    # read connection parameters
    params = config()

    # connect to the PostgreSQL server
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**params)

    return conn

In [34]:
import pandas as pd

conn = db_connect()
try:
    query = """
        select * from ARABIC_GIVEN_NAMES_G_TRANSLATED 
        where eng_translated is null
        order by count desc limit 4
        """
    
    df = pd.read_sql(query, con=conn)
    arb_list = list(df['arb'])
except (Exception, psycopg2.DatabaseError) as error:
    print(error)
finally:
    if conn is not None:
        conn.close()
        print('Database connection closed.')

Connecting to the PostgreSQL database...
Database connection closed.


In [45]:
print(arb_list)
print(df)

['السيده', 'فاطمه', 'سيده', 'مارى']
       id     arb  count eng_translated
0   83449  السيده   5492           None
1   47171   فاطمه   1914           None
2   50053    سيده   1788           None
3  107317    مارى   1692           None


In [46]:
eng_translation = translate(arb_list, source_lang=LANG_ARABIC, dest_lang=LANG_ENGLISH)

In [None]:
arab_translation = translate(eng_list, source_lang=LANG_ENGLISH, dest_lang=LANG_ARABIC)

In [47]:
print(eng_translation)

[{'translatedText': 'Mrs', 'model': 'nmt', 'input': 'السيده'}, {'translatedText': 'Fatima', 'model': 'nmt', 'input': 'فاطمه'}, {'translatedText': 'Lady', 'model': 'nmt', 'input': 'سيده'}, {'translatedText': 'Marie', 'model': 'nmt', 'input': 'مارى'}]


In [64]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://phonetic:phoneticsearch@10.110.9.230:5432/phonetic')

for t in eng_translation:
    df.loc[df['arb'] == t['input'].upper(), 'eng_translated'] = t['translatedText'].upper()

print(df)
df.to_sql(name='temp_table', con=engine, if_exists="replace", index=False)

query = """
    INSERT INTO ARABIC_GIVEN_NAMES_G_TRANSLATED(ARB, COUNT)
    SELECT eng_translated from temp_table
    FROM GIVEN_NAMES_MASTER M
    WHERE ARB IS NOT NULL AND ARB != ''
    GROUP BY M.ARB;
    """



#df.to_csv("/home/jupyter/output-data/given_names_sample_translated.csv", sep="|", index=False)
    

       id     arb  count eng_translated
0   83449  السيده   5492            MRS
1   47171   فاطمه   1914         FATIMA
2   50053    سيده   1788           LADY
3  107317    مارى   1692          MARIE


In [11]:
import os
print(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])

#os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/jupyter/.config/gcloud/phonetic-srch-9869e4839c17.json' 


/home/jupyter/.config/gcloud/application_default_credentials.json
