# database related code

In [1]:
import psycopg2
import pandas as pd
from configparser import ConfigParser

def config(filename='prepare_data.ini', section='phonetic'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
 
    return db

def db_connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    # read connection parameters
    params = config()

    # connect to the PostgreSQL server
    conn = psycopg2.connect(**params)
    print('Connected to the PostgreSQL database...')
    
    return conn

def read_dataframe():
    conn = db_connect()
    result = None
    try:
        query = """
            SELECT * FROM GIVEN_NAMES_MASTER
            WHERE ARB IS NOT NULL AND ENG IS NOT NULL AND ARB != '' AND ENG != ''
            UNION ALL SELECT * FROM FAMILY_NAMES_MASTER
            WHERE ARB IS NOT NULL AND ENG IS NOT NULL AND ARB != '' AND ENG != ''
            UNION ALL SELECT * FROM GIVEN_NAMES_DAN
            WHERE ARB IS NOT NULL AND ENG IS NOT NULL AND ARB != '' AND ENG != ''

            """

        result = pd.read_sql(query, con=conn, index_col='id')
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
            
    print("data retrieved from database with size: {size}".format(size= result.shape))
    return result

def read_eng_given_names():
    conn = db_connect()
    result = None
    try:
        query = """
            SELECT * FROM (
                SELECT NAME, SUM(COUNT) AS S FROM (
                    SELECT ENG AS NAME, COUNT FROM GIVEN_NAMES_MASTER
                    WHERE ENG IS NOT NULL AND ENG != '' AND ARB IS NOT NULL AND ARB != ''
                ) AS SUB GROUP BY NAME
                ORDER BY S DESC
            ) AS S2 WHERE S >= 10;
            """
        result = pd.read_sql(query, con=conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
            
    print("data retrieved from database with size: {size}".format(size= result.shape))
    return result

def read_arb_given_names():
    conn = db_connect()
    result = None
    try:
        query = """
            SELECT * FROM (
                SELECT NAME, SUM(COUNT) as s FROM (
                    SELECT ARB AS NAME, COUNT FROM GIVEN_NAMES_MASTER
                    WHERE ENG IS NOT NULL AND ENG != '' AND ARB IS NOT NULL AND ARB != ''
                ) AS SUB GROUP BY NAME
                ORDER BY S DESC
            ) AS S2 WHERE S >= 10;
            """
        result = pd.read_sql(query, con=conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
            
    print("data retrieved from database with size: {size}".format(size= result.shape))
    return result

def read_eng_family_names():
    conn = db_connect()
    result = None
    try:
        query = """
            SELECT * FROM (
                SELECT NAME, SUM(COUNT) AS S FROM (
                    SELECT ENG AS NAME, COUNT FROM FAMILY_NAMES_MASTER
                    WHERE ENG IS NOT NULL AND ENG != '' AND ARB IS NOT NULL AND ARB != ''
                ) AS SUB GROUP BY NAME
                ORDER BY S DESC
            ) AS S2 WHERE S >= 10;
            """
        result = pd.read_sql(query, con=conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
            
    print("data retrieved from database with size: {size}".format(size= result.shape))
    return result

def read_arb_family_names():
    conn = db_connect()
    result = None
    try:
        query = """
            SELECT * FROM (
                SELECT NAME, SUM(COUNT) AS S FROM (
                    SELECT ARB AS NAME, COUNT FROM FAMILY_NAMES_MASTER
                    WHERE ENG IS NOT NULL AND ENG != '' AND ARB IS NOT NULL AND ARB != ''
                ) AS S1 GROUP BY NAME
                ORDER BY S DESC
            ) AS S2 WHERE S >= 10;
            """
        result = pd.read_sql(query, con=conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
            
    print("data retrieved from database with size: {size}".format(size= result.shape))
    return result

# graph related fuctions

In [2]:
import networkx as nx

def generate_names_graph():
    
    global df
    
    if df.empty:
        df = read_dataframe()   

    global G
    G = nx.Graph()

    for index, row in df.iterrows():
        arb, eng, cnt = row['arb'], row['eng'], row['count']
        if G.has_edge(arb, eng):
                    G[arb][eng]['weight'] += cnt
        else:
            G.add_edge(arb, eng, weight=cnt)

    nx.write_gpickle(G,"/home/jupyter/notebooks/PoC/data-preparation/pickle/names_graph.gpickle")
    
def read_pickled_names_graph():
    global G
    G = nx.read_gpickle("/home/jupyter/notebooks/PoC/data-preparation/pickle/names_graph.gpickle")

# resetting dataframe, execute only if you need to get all data from db again

In [3]:
import pandas as pd
df = pd.DataFrame()
eng_gnames = pd.DataFrame()
eng_fnames = pd.DataFrame()
arb_gnames = pd.DataFrame()
arb_fnames = pd.DataFrame()

In [4]:
english_total_result = []
arabic_total_result = []
top_noise_data = []
origin_name = ''

# get variations functions

In [5]:
import numpy as np

def get_english_variantions(validate = False): 
    global arabic_total_result, english_total_result
    iterable_list = arabic_total_result[:]
    for arabic_name in iterable_list:
        english_list = get_english_variants(arabic_name)
        english_total_result += get_top_frequency_names(english_list)
    english_total_result = list(set(english_total_result))
    
    if validate:
        iterable_list = english_total_result[:]
        for english_name in iterable_list:
            if not validate_english_name_by_arabic_variations(english_name, arabic_total_result):
                print("removing name: {name}".format(name=english_name))
                english_total_result.remove(english_name)
        english_total_result.sort()
        
                
def get_arabic_variantions(validate = False): 
    global arabic_total_result, english_total_result
    iterable_list = english_total_result[:]
    for english_name in iterable_list:
        arabic_list = get_arabic_variants(english_name)
        arabic_total_result += get_top_frequency_names(arabic_list)
    arabic_total_result = list(set(arabic_total_result))

    if validate:
        iterable_list = arabic_total_result[:]
        for arabic_name in iterable_list:
            if not validate_arabic_name_by_english_variations(arabic_name, english_total_result):
                print("removing name: {name}".format(name=arabic_name))
                arabic_total_result.remove(arabic_name)

        arabic_total_result.sort()
             
"""
def get_english_variants(arabic_name):
    global df
    
    if df.empty:
        df = read_dataframe()   
    
    result = {}
    a = df[df['arb'] == arabic_name]
    b = a[['eng', 'count']].groupby('eng').sum()
    result = b.to_dict()['count']
    return result

def get_arabic_variants(english_name): 
    global df
    
    if df.empty:
        df = read_dataframe()
    
    result = {}
    a = df[df['eng'] == english_name]
    b = a[['arb', 'count']].groupby('arb').sum()
    result = b.to_dict()['count']
    
    return result
"""

def get_english_variants(name):
    
    result = {}
    for v,u in nx.edges(G, name):
        result[u] = G[v][u]['weight']
    return result

def get_arabic_variants(name): 
     
    result = {}
    for v,u in nx.edges(G, name):
        result[u] = G[v][u]['weight']
    return result



def validate_arabic_name_by_english_variations(arabic_name, valid_english_variations):
    english_variations = get_english_variants(arabic_name)
    total_valid_count = 0
    total_invalid_count = 0
    
    total = sum(english_variations.values())
    if total < 3:
        return False

    for key, val in english_variations.items():
        if key in valid_english_variations:
            total_valid_count += val
        else:
            total_invalid_count += val
    
    #print("for {name}: valid: {valid}, invalid: {invalid}".format(name=arabic_name, valid=total_valid_count, invalid=total_invalid_count))
    
    if total_valid_count < 3:
        return False

    if total_valid_count > total_invalid_count or total_valid_count > 100:
        return True
    
    #print("english variations for {name} are: {dic}".format(name=arabic_name, dic=english_variations))
    return False
    
def validate_english_name_by_arabic_variations(english_name, valid_arabic_variations):
    arabic_variations = get_arabic_variants(english_name)
    total_valid_count = 0
    total_invalid_count = 0
    
    total = sum(arabic_variations.values())
    if total < 3:
        return False
    
    for key, val in arabic_variations.items():
        if key in valid_arabic_variations:
            total_valid_count += val
        else:
            total_invalid_count += val
    
    #print("for {name}: valid: {valid}, invalid: {invalid}".format(name=arabic_name, valid=total_valid_count, invalid=total_invalid_count))

    if total_valid_count < 3:
        return False
    
    if total_valid_count > total_invalid_count or total_valid_count > 100:
        return True
    
    #print("arabic variations for {name} are: {dic}".format(name=english_name, dic=arabic_variations))
    return False

def get_top_frequency_names(list):
    total = sum(list.values())
    lower_accepted_frequency = 100
    threshold = 10
    
    max_value = max(list.values())
    if total > 6561:
        threshold = 1
    else:
        threshold -= total**(1./4.)
        
    #print("threshold: {thre}, total: {tot}".format(thre=threshold, tot=total))
    matched_list = [key for key, val in list.items() 
                    if len(key) > 2 and 
                    key not in top_noise_data and 
                    (val / total * 100 > threshold or val >= lower_accepted_frequency)]
    #print("top matched_list: {thre}".format(thre=matched_list))
    not_matched_list = [ (key, val) for key, val in list.items() if val / total * 100 <= threshold and val < lower_accepted_frequency]
    matched_list_with_composite = [key for key, val in list.items() 
                                   if any(match in key and len(key) < len(match) * 2  for match in matched_list)]
    if(len(matched_list_with_composite) - len(matched_list) > 3):
        return matched_list
    
    return matched_list_with_composite

def get_random_names(names, number=20):
    rnd = []
    rnd.extend(np.random.randint(low=0, high=int(np.floor(len(names) * 0.01)), size=int(np.ceil(number * 0.5))))
    rnd.extend(np.random.randint(low=int(np.floor(len(names) * 0.01)+1), high=int(np.floor(len(names) * 0.5)), size=int(np.ceil(number * 0.3))))
    rnd.extend(np.random.randint(low=int(np.floor(len(names) * 0.5)+1), high=len(names), size=int(np.ceil(number * 0.2))))
    return [names.loc[rnd[num],'name'] for num in range(number)]
    
def rnd_english_given_names(number=20):
    global eng_gnames
    
    if eng_gnames.empty:
        eng_gnames = read_eng_given_names()

    return get_random_names(eng_gnames, number)

def rnd_arabic_given_names(number=20):
    global arb_gnames
    
    if arb_gnames.empty:
        arb_gnames = read_arb_given_names()

    return get_random_names(arb_gnames, number)

def rnd_english_family_names(number=20):
    global eng_fnames
    
    if eng_fnames.empty:
        eng_fnames = read_eng_family_names()

    return get_random_names(eng_fnames, number)

def rnd_arabic_family_names(number=20):
    global arb_fnames
    
    if arb_fnames.empty:
        arb_fnames = read_arb_family_names()

    return get_random_names(arb_fnames, number)

# utilities

In [6]:
def reset_global_variables(name):
    global english_total_result, arabic_total_result, top_noise_data, origin_name
    english_total_result = []
    arabic_total_result = []
    top_noise_data = ['MRS', 'MRS.', 'MRSS', 'MR', 'MR.', 'MISS']
    origin_name = name

# load initial arabic variations

In [8]:
read_pickled_names_graph()
reset_global_variables('MOHAMED')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))

top arabic results: ['موهامد', 'محمد']


# get initial english variations list

In [9]:
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))

top english results: ['MOHAMID', 'MOUKHAMED', 'MHEMMED', 'MEHAMED', 'MUHAMMET', 'MUHEMED', 'MOCHAMAD', 'MIHAMAD', 'MOOHAMID', 'MOKHAMED', 'MUKHAMMAT', 'MOHAMAT', 'MOHAMEED', 'MUHAMIT', 'MUHAMMAD', 'MUKHAMMET', 'MOUHAMMED', 'MUCHAMMED', 'MWAHAMD', 'MOOHAMED', 'MOHAMMUD', 'MOUHAMAD', 'MEHAMMED', "M'HAMMED", 'MUHMMED', 'MOHAMMED', 'MUHAMEED', 'MOUHEMAD', 'MUHAMAT', 'MUHAMMAT', 'MOCHAMED', 'MOKHAMMED', 'MUHEMMAD', 'MOHAMMAT', 'MOCHAMMED', 'MHEMAD', 'MUHAMMID', 'MOHAMD', 'MOUHAMAT', "M'HEMMED", 'MHEMED', 'MUHAMMEED', 'MUKHAMMAD', 'MOHAMMID', 'MOHEMAT', 'MUHMMAD', 'MOOHAMMAD', 'MOCHEMAD', 'MOCHAMET', 'MOKHAMAD', 'MUHAMEET', 'MAHAMAD', 'MHAMMAD', "M'HEMED", "M'HAMAD", 'MUHAMMUD', 'MUHAMID', 'MOUHEMED', 'MUKHAMAT', 'MUKHAMMED', 'MUCHAMAD', 'MIHAMMAD', 'MOUHAMMAD', 'MUHD', 'MUKHAMED', 'MOUHAMED', 'MOHAMMEED', 'MOKHAMMAD', 'MOHAMMD', 'MOHEMMAD', 'MOHMED', 'MHAMED', 'MOHAMEET', 'MOOHAMAD', 'MUHEMMED', 'MUHAMET', 'MUHAMED', 'MOHAMED', 'MOHAMAD', 'MUKHAMET', 'MUHAMAD', 'MOHAMUD', 'MOWHAMMAD', 'MOOH

# get more variations and validate results

In [10]:
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))

removing name: محمدنيل
removing name: بن محمد
removing name: محمدعلى
removing name: امحمد
removing name: محمدنور
removing name: محمد 
top arabic results: ['ماهاماد', 'محماد', 'محمد', 'محمّد', 'مخمد', 'مهمت', 'موتشاماد', 'موحد', 'موهامات', 'موهاماد', 'موهامت', 'موهامد', 'موهامود', 'موهاميد', 'موهمد']


In [11]:
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

top english results: ["M'HAMAD", "M'HAMED", "M'HAMMAD", "M'HAMMED", "M'HEMAD", "M'HEMED", "M'HEMMED", 'MAHAMAD', 'MAHAMMAD', 'MEHAMED', 'MEHAMMED', 'MEHMED', 'MEHMET', 'MHAMAD', 'MHAMED', 'MHAMMAD', 'MHAMMED', 'MHEMAD', 'MHEMED', 'MHEMMED', 'MIHAMAD', 'MIHAMMAD', 'MOCHAMAD', 'MOCHAMAT', 'MOCHAMED', 'MOCHAMET', 'MOCHAMMAD', 'MOCHAMMED', 'MOCHEMAD', 'MOHAMAD', 'MOHAMADE', 'MOHAMAT', 'MOHAMD', 'MOHAMED', 'MOHAMEED', 'MOHAMEET', 'MOHAMET', 'MOHAMID', 'MOHAMMAD', 'MOHAMMAT', 'MOHAMMD', 'MOHAMMED', 'MOHAMMEED', 'MOHAMMET', 'MOHAMMID', 'MOHAMMIT', 'MOHAMMOD', 'MOHAMMUD', 'MOHAMOOD', 'MOHAMOUD', 'MOHAMUD', 'MOHD', 'MOHEMAD', 'MOHEMAT', 'MOHEMED', 'MOHEMET', 'MOHEMMAD', 'MOHEMMED', 'MOHMD', 'MOHMED', 'MOHMMED', 'MOKHAMAD', 'MOKHAMED', 'MOKHAMMAD', 'MOKHAMMED', 'MOOHAMAD', 'MOOHAMED', 'MOOHAMET', 'MOOHAMID', 'MOOHAMMAD', 'MOOHAMMED', 'MOOHED', 'MOUHAMAD', 'MOUHAMAT', 'MOUHAMATT', 'MOUHAMED', 'MOUHAMET', 'MOUHAMMAD', 'MOUHAMMED', 'MOUHEMAD', 'MOUHEMED', 'MOUKHAMED', 'MOWHAMMAD', 'MUCHAMAD', 'MUCH

In [12]:
reset_global_variables('MAHIR')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

top arabic results: ['مهير', 'مهر', 'ماهر', 'ماهير']
top english results: ['MAHIIR', 'MAAIR', 'MEHR', 'MOUHR', 'MAHEAR', 'MAHIER', 'MEIR', 'MAHRI', 'MAHIRE', 'MAHR', 'MAHHER', 'MER', 'MAIR', 'MAHAR', 'MAYR', 'MEAIR', 'MEHEIR', 'MAHAIR', 'MYHR', 'MUHR', 'MAHER', 'MEEIR', 'MAHIR', 'MIHIR', 'MAIIR', 'MEHEAR', 'MAR', 'MAHERE', 'MAAIER', 'MAHEIR', 'MAEAR', 'MAEIR', 'MHER', 'MAAR', 'MEHIER', 'MEHIR', 'MAAER', 'MEHER', 'MEYR', 'MEHEER', 'MEIIR', 'MAEER', 'MAAHIR', 'MIHR', 'MEER', 'MAER', 'MAAHER', 'MAHAMARAKKALAGE', 'MEEER', 'MEEAR', 'MOOHR', 'MAIER', 'MEHAR', 'MEEHR', 'MOHEER', 'MOHR', 'MEHAIR', 'MAHYR', 'MAHEER', 'MEIER']
removing name: مهرى
removing name: ماهر ى
top arabic results: ['ماهر', 'ماهرة', 'ماهرى', 'ماهير', 'مايار', 'ماير', 'محار', 'مهر', 'مهير', 'مهيرة', 'مير']
removing name: MIRR
top english results: ['MAAAIRA', 'MAAER', 'MAAHER', 'MAAHIR', 'MAAHIRA', 'MAAHIRAH', 'MAAIER', 'MAAIR', 'MAAIRA', 'MAAR', 'MAAYAR', 'MAAYER', 'MAAYRA', 'MAEAR', 'MAEARA', 'MAEER', 'MAEERA', 'MAEERE', '

In [13]:
reset_global_variables('HAMZA')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

top arabic results: ['حمزة', 'حمزه']
top english results: ['HUMZA', 'HAMZA', 'HAMZAH', 'KHAMZA']
top arabic results: ['حمزة', 'حمزه', 'هامزه']
top english results: ['HAMZA', 'HAMZAH', 'HAMZEH', 'HUMZA', 'KHAMZA']


In [None]:
reset_global_variables('MARY')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

In [None]:
first_iteration_english = (df['eng'] == 'MICHAEL')
first_iteration_arabic = (df['arb'].isin(df[first_iteration_english]['arb']))
second_iteration_english = (df['eng'].isin(df[first_iteration_arabic]['eng']))
second_iteration_arabic = (df['arb'].isin(df[second_iteration_english]['arb']))
df[first_iteration_english | first_iteration_arabic | second_iteration_english | second_iteration_arabic]

#df[df['eng'] == 'MICHAEL']
#df[df['eng'] == 'MICHAEL' | df['arb'] in df[df['eng'] == 'MICHAEL']['arb']]


In [None]:
reset_global_variables('MICHAEL')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

In [None]:
%%time
reset_global_variables('OMAR')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

In [None]:
%%time
reset_global_variables('AMMAR')
english_total_result.append(origin_name)
get_arabic_variantions()
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions()
print("top english results: {dct}".format(dct=english_total_result))
get_arabic_variantions(True)
print("top arabic results: {dct}".format(dct=arabic_total_result))
get_english_variantions(True)
print("top english results: {dct}".format(dct=english_total_result))

In [None]:
%%time
read_pickled_names_graph()
names = rnd_english_given_names(99)
print(names)

In [None]:
%%time
names = ['MOHAMED', 'ANTON', 'SHAMA', 'OBAID', 'SMIRA', 'SAFA', 'AHMED', 'RASHEED', 'FIRST NAME', 'SAYED', 'JEAN', 'THAMER', 'ZIAD', 'NADER', 'SHAHD', 'AFRAH', 'AZZA', 'ALAA', 'SHEHAB', 'WAGDA', 'GHAREEB', 'POST GRAND NAME', 'DALAL', 'FARES', 'ADNAN', 'YASIN', 'SAFAA', 'IHAB', 'BASMA', 'RASHID', 'HODA', 'HELAL', 'LINA', 'LAURA', 'QASEM', 'DALAL', 'EMAD', 'HAMAD', 'YASIN', 'OSAMA', 'WESAM', 'AFAF', 'IVAN', 'EMMANUEL', 'MICHAEL', 'SEBASTIAN', 'JOSEPH', 'HANEEN', 'SOHAIL', 'ADAM', 'AMMAR', 'KERYN', 'MIIKA', 'TRIPTI', 'PALANIAPPAN', 'JANNE', 'CHATHURA', 'BHUWAN', 'CARLY', 'JAYANTI', 'JADHAV', 'LENIN', 'JOSY', 'HAKIMULLAH', 'BURTON', 'ILONA', 'AAFIYA', 'SUBAIDHA', 'ANGELIQUE', 'SHOHID', 'ALBERTINA', 'MALHAR', 'POOJA', 'MEG', 'GOMATHI', 'SHAKEER', 'DARPAN', 'BHURA', 'ANNELIZE', 'DEVENDRAN', 'JINDER', 'METILDA', 'BIRATI', 'IMPERIAL', 'OTTONIEL', 'LASANTHI', 'YANDIR', 'SAHIT', 'SANTILLAN', 'WASRIAH', 'MUSLIHAH', 'MAHINDERPERSAD', 'CAR NI', 'NAZRI BIN', 'THULANI', 'ALIMAH', 'ABDULLAYEVA', 'NOIME', 'AKHMETOV', 'GENARD']
for name in names:
    reset_global_variables(name)
    english_total_result.append(origin_name)
    get_arabic_variantions()
    get_english_variantions()
    get_arabic_variantions(True)
    print("top arabic results: {dct}".format(dct=arabic_total_result))
    get_english_variantions(True)
    print("top english results: {dct}".format(dct=english_total_result))

In [None]:
%%time
names = ['MOHAMED', 'ANTON', 'SHAMA', 'OBAID', 'SMIRA', 'SAFA', 'AHMED', 'RASHEED', 'FIRST NAME', 'SAYED', 'JEAN', 'THAMER', 'ZIAD', 'NADER', 'SHAHD', 'AFRAH', 'AZZA', 'ALAA', 'SHEHAB', 'WAGDA', 'GHAREEB', 'POST GRAND NAME', 'DALAL', 'FARES', 'ADNAN', 'YASIN', 'SAFAA', 'IHAB', 'BASMA', 'RASHID', 'HODA', 'HELAL', 'LINA', 'LAURA', 'QASEM', 'DALAL', 'EMAD', 'HAMAD', 'YASIN', 'OSAMA', 'WESAM', 'AFAF', 'IVAN', 'EMMANUEL', 'MICHAEL', 'SEBASTIAN', 'JOSEPH', 'HANEEN', 'SOHAIL', 'ADAM', 'AMMAR', 'KERYN', 'MIIKA', 'TRIPTI', 'PALANIAPPAN', 'JANNE', 'CHATHURA', 'BHUWAN', 'CARLY', 'JAYANTI', 'JADHAV', 'LENIN', 'JOSY', 'HAKIMULLAH', 'BURTON', 'ILONA', 'AAFIYA', 'SUBAIDHA', 'ANGELIQUE', 'SHOHID', 'ALBERTINA', 'MALHAR', 'POOJA', 'MEG', 'GOMATHI', 'SHAKEER', 'DARPAN', 'BHURA', 'ANNELIZE', 'DEVENDRAN', 'JINDER', 'METILDA', 'BIRATI', 'IMPERIAL', 'OTTONIEL', 'LASANTHI', 'YANDIR', 'SAHIT', 'SANTILLAN', 'WASRIAH', 'MUSLIHAH', 'MAHINDERPERSAD', 'CAR NI', 'NAZRI BIN', 'THULANI', 'ALIMAH', 'ABDULLAYEVA', 'NOIME', 'AKHMETOV', 'GENARD']
for name in names:
    reset_global_variables(name)
    english_total_result.append(origin_name)
    get_arabic_variantions()
    get_english_variantions()
    get_arabic_variantions(True)
    print("top arabic results: {dct}".format(dct=arabic_total_result))
    get_english_variantions(True)
    print("top english results: {dct}".format(dct=english_total_result))