In [105]:
import pinyin
import argparse


def isEnglish(s):
    try:
        s.encode('ascii')
    except UnicodeEncodeError:
        return False
    else:
        return True

def toPinyin(s, rearrange=False):
    s = s.strip()
#     if isEnglish(s):
#         return s

    chars = []
    if rearrange:
        firstname=s[1:]
        lastname=s[0]
        chars.append(pinyin.get(firstname, format="strip", delimiter="-").strip("-"))
        chars.append(pinyin.get(lastname, format="strip", delimiter="-"))
    else:
        chars+= pinyin.get(s, format="strip", delimiter=" ").split()
    return " ".join(map(lambda x: x.capitalize(), chars))



In [106]:
import difflib

#Initialize EnglishNames list
file = open('EnglishFirstNames.txt', 'r') 
EnglishNames = []
for line in file: 
    EnglishNames.append(line[:-1]) #-1 to get rid of the \n character

    
#Initialize ChineseNames list
file = open('ChineseFirstNames.txt', 'r') 
ChineseNames = []
for (i,line) in enumerate(file): 
    ChineseNames.append(line[:-1]) #-1 to get rid of the \n character
   
   
#Convert all ChineseNames to PinyinNames list
PinyinNames = []
PinyinChineseDict = {}
for name in ChineseNames:
    pinyinName = toPinyin(name, False)
    PinyinNames.append(pinyinName)
    PinyinChineseDict[pinyinName] = name
    

print(len(EnglishNames))
print(len(ChineseNames))
print(len(PinyinNames))



4945
9799
9799


In [107]:
def getChineseNames(EnglishName, num):
    print("\n-----------Method1: difflib------------")
    recPinyinNames = difflib.get_close_matches(EnglishName, PinyinNames,n=num, cutoff=0.0)
    recChineseNames = []
    for name in recPinyinNames:
        recChineseNames.append(PinyinChineseDict[name])
        
    print("Hi %s, here are your %d recommendations for a Chinese name: " % (EnglishName, num)) 
    print(recPinyinNames)
    print(recChineseNames)

# print(difflib.get_close_matches('xiaoming', ChineseNames,n=5, cutoff=0.0))
# print(difflib.get_close_matches('xiaoming', PinyinNames,n=5, cutoff=0.0))





In [108]:
import Levenshtein
import queue as Q


def getChineseNames2(EnglishName, num):
    print("\n-----------Method2: Levenshtein------------")
    q = Q.PriorityQueue()
    for pyname in PinyinNames:
        similarity = Levenshtein.ratio(EnglishName, pyname)
        q.put((-similarity,pyname))#python priority queue uses min heap, we use -similarity
        
    recPinyinNames=[]
    for i in range(num):
        ttuple = q.get()
        print(ttuple)
        recPinyinNames.append(ttuple[1])#get returns tuple if (priority,str)
        
    recChineseNames = [] 
    for name in recPinyinNames:
        recChineseNames.append(PinyinChineseDict[name])
    print("Hi %s, here are your %d recommendations for a Chinese name: " % (EnglishName, num)) 
    print(recPinyinNames)
    print(recChineseNames)
    
    
    
Levenshtein.ratio('hello', 'hi')
Levenshtein.ratio('hello', 'helloworld')

0.6666666666666666

In [109]:
getChineseNames('Jordan Allen', 8)
getChineseNames2('Jordan Allen', 8)



-----------Method1: difflib------------
Hi Jordan Allen, here are your 8 recommendations for a Chinese name: 
['Jiang Wen', 'Pan An', 'Jiang Feng', 'Jin Wen', 'Yang Ao Sen', 'Yang Wen', 'Yang Sen', 'Yan Zhen']
['蒋文', '潘安', '姜峰', '金稳', '杨奥森', '杨雯', '杨森', '闫桢']

-----------Method2: Levenshtein------------
(-0.5714285714285714, 'Jiang Wen')
(-0.5555555555555556, 'Pan An')
(-0.5454545454545454, 'Jiang Feng')
(-0.5263157894736842, 'Jin Wen')
(-0.5217391304347826, 'Yang Ao Sen')
(-0.5, 'Fang Wen')
(-0.5, 'Guo Jian Wen')
(-0.5, 'Jia Chen')
Hi Jordan Allen, here are your 8 recommendations for a Chinese name: 
['Jiang Wen', 'Pan An', 'Jiang Feng', 'Jin Wen', 'Yang Ao Sen', 'Fang Wen', 'Guo Jian Wen', 'Jia Chen']
['蒋文', '潘安', '姜峰', '金稳', '杨奥森', '方闻', '郭建雯', '贾陈']


In [110]:
getChineseNames('Jordan', 8)
getChineseNames2('Jordan', 8)


-----------Method1: difflib------------
Hi Jordan, here are your 8 recommendations for a Chinese name: 
['Mo Fan', 'Tao Er Kang', 'Yu Juan', 'Yao Nan', 'Yao Lan', 'Xu Juan', 'Wu Jian', 'Tao Ran']
['莫凡', '陶而康', '于娟', '姚楠', '姚兰', '徐娟', '吴剑', '陶然']

-----------Method2: Levenshtein------------
(-0.5, 'Mo Fan')
(-0.47058823529411764, 'Tao Er Kang')
(-0.46153846153846156, 'Cao Han')
(-0.46153846153846156, 'Du Juan')
(-0.46153846153846156, 'Gao Han')
(-0.46153846153846156, 'Gao Han')
(-0.46153846153846156, 'Gao Han')
(-0.46153846153846156, 'Guo Fan')
Hi Jordan, here are your 8 recommendations for a Chinese name: 
['Mo Fan', 'Tao Er Kang', 'Cao Han', 'Du Juan', 'Gao Han', 'Gao Han', 'Gao Han', 'Guo Fan']
['莫凡', '陶而康', '曹涵', '杜娟', '高寒', '高寒', '高寒', '郭凡']


In [111]:
userName = input('\nWelcome to our Chinese Name Recommender System. \nPlease enter your name:')

userAge = input('\nHello %s! Please enter your age:' % (userName))

print('\nNow please enter the gender you want for the names:')
print('Enter "M" for Male and "F" for Female:')
userGender = input()

num = input('\nAlmost there! How many Chinese names do you want our system to recommend?')
num = int(num)

getChineseNames("Jordan", num)
getChineseNames2(userName, num)


Welcome to our Chinese Name Recommender System. 
Please enter your name:Marco

Hello Marco! Please enter your age:18

Now please enter the gender you want for the names:
Enter "M" for Male and "F" for Female:
m

Almost there! How many Chinese names do you want our system to recommend?9

-----------Method1: difflib------------
Hi Jordan, here are your 9 recommendations for a Chinese name: 
['Mo Fan', 'Tao Er Kang', 'Yu Juan', 'Yao Nan', 'Yao Lan', 'Xu Juan', 'Wu Jian', 'Tao Ran', 'Tao Lan']
['莫凡', '陶而康', '于娟', '姚楠', '姚兰', '徐娟', '吴剑', '陶然', '涛兰']

-----------Method2: Levenshtein------------
(-0.5454545454545454, 'Ma Hao')
(-0.5454545454545454, 'Ma Yao')
(-0.5454545454545454, 'Mao Lu')
(-0.5454545454545454, 'Mao Yi')
(-0.5454545454545454, 'Mao Yu')
(-0.5, 'Ma Long')
(-0.5, 'Ma Xiao')
(-0.5, 'Ma Xiao')
(-0.5, 'Mao Han')
Hi Marco, here are your 9 recommendations for a Chinese name: 
['Ma Hao', 'Ma Yao', 'Mao Lu', 'Mao Yi', 'Mao Yu', 'Ma Long', 'Ma Xiao', 'Ma Xiao', 'Mao Han']
['马浩', '马瑶', 

In [112]:
# import colorama
# from colorama import Fore, Back, Style

# colorama.init()

# text = "The quick brown fox jumps over the lazy dog"

# print(Fore.RED + text)
# print(Back.CYAN + text + Style.RESET_ALL)
# print(text)

The quick brown fox jumps over the lazy dog
The quick brown fox jumps over the lazy dog
The quick brown fox jumps over the lazy dog


In [113]:
print('Enter "M" for Male and "F" for Female:')
userGender = input()
if(userGender == "M") or (userGender == "Male"):
    userGender = "M"
else:
    userGender = "F"

Enter "M" for Male and "F" for Female:
af


array([1])

array([1, 1, 1])

array([0.8245    , 0.51245455, 0.85      ])