In [3]:
import nltk
from nltk.corpus import cmudict
import re
from pronouncing import phones_for_word

# 下载cmudict数据
nltk.download('cmudict')

# 加载cmudict数据
cmu_dict = cmudict.dict()

# 函数：获取单词的音标
def get_pronunciation(word):
    if word.lower() in cmu_dict:
        return cmu_dict[word.lower()]
    else:
        return None

# 函数：标注文本的音标
def annotate_with_phonetic(text):
    words = re.findall(r'\b\w+\b', text)
    annotated_text = ""
    for word in words:
        pronunciation = get_pronunciation(word)
        if pronunciation:
            annotated_text += f"{word} {' '.join(pronunciation[0])} "
        else:
            annotated_text += f"{word} "
    return annotated_text

# 示例文本
text = "Hello, how are you doing today?"

# 标注文本的音标
annotated_text = annotate_with_phonetic(text)
print("Annotated Text:")
print(annotated_text)


[nltk_data] Downloading package cmudict to
[nltk_data]     /Users/chenjian/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


Annotated Text:
Hello HH AH0 L OW1 how HH AW1 are AA1 R you Y UW1 doing D UW1 IH0 NG today T AH0 D EY1 


In [5]:
import re
import nltk
from nltk.corpus import cmudict

# 下载cmudict数据
nltk.download('cmudict')

# 加载cmudict数据
cmu_dict = cmudict.dict()

# 函数：获取单词的音标
def get_pronunciation(word):
    if word.lower() in cmu_dict:
        return cmu_dict[word.lower()]
    else:
        return None

# 函数：标注文本的音标
def annotate_with_phonetic(text):
    words = re.findall(r'\b\w+\b', text)
    annotated_text = ""
    pronunciations = ""
    for word in words:
        pronunciation = get_pronunciation(word)
        if pronunciation:
            annotated_text += f"{word} "
            pronunciations += f"{' '.join(pronunciation[0])} "
        else:
            annotated_text += f"{word} "
            pronunciations += " " * len(word) + " "
    return annotated_text.strip(), pronunciations.strip()

# 示例文本
text = "Hello, how are you doing today?"

# 标注文本的音标
annotated_text, phonetic_text = annotate_with_phonetic(text)
print("Annotated Text:")
print(annotated_text)
print("Phonetic Text:")
print(phonetic_text)


[nltk_data] Downloading package cmudict to
[nltk_data]     /Users/chenjian/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


Annotated Text:
Hello how are you doing today
Phonetic Text:
HH AH0 L OW1 HH AW1 AA1 R Y UW1 D UW1 IH0 NG T AH0 D EY1


In [6]:
import nltk
from nltk.corpus import cmudict

# 下载 CMU 发音词典
nltk.download('cmudict')

# 初始化 CMU 发音词典
cmu_dict = cmudict.dict()

def get_pronunciation(sentence):
    words = nltk.word_tokenize(sentence.lower())
    pronunciation = []
    for word in words:
        if word in cmu_dict:
            pronunciation.append(' '.join(cmu_dict[word][0]))
        else:
            pronunciation.append("UNK")
    return pronunciation

def format_with_pronunciation(sentence, pronunciation):
    return f"{sentence}\n{' '.join(pronunciation)}"

def annotate_and_format_sentence(sentence):
    pronunciation = get_pronunciation(sentence)
    return format_with_pronunciation(sentence, pronunciation)

# 例句
sentence = "How are you doing today?"

# 标注和排版
annotated_sentence = annotate_and_format_sentence(sentence)
print(annotated_sentence)


[nltk_data] Downloading package cmudict to
[nltk_data]     /Users/chenjian/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


How are you doing today?
HH AW1 AA1 R Y UW1 D UW1 IH0 NG T AH0 D EY1 UNK
