In [1]:
!pip install pronouncing epitran gtts gradio IPython


import pronouncing
import epitran
from gtts import gTTS
import gradio as gr
import os

Collecting pronouncing
  Downloading pronouncing-0.2.0.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting epitran
  Downloading epitran-1.26.0-py2.py3-none-any.whl.metadata (34 kB)
Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting gradio
  Downloading gradio-5.21.0-py3-none-any.whl.metadata (16 kB)
Collecting cmudict>=0.4.0 (from pronouncing)
  Downloading cmudict-1.0.32-py3-none-any.whl.metadata (3.6 kB)
Collecting panphon>=0.20 (from epitran)
  Downloading panphon-0.21.2-py2.py3-none-any.whl.metadata (15 kB)
Collecting jamo (from epitran)
  Downloading jamo-0.4.1-py3-none-any.whl.metadata (2.3 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gr

In [2]:
def transcribe_to_ipa(text, language):
    if language == 'en-US' or language == 'eng-Latn':  # Treat en-US as eng-Latn
        words = text.lower().split()
        transcription = []
        for word in words:
            phones = pronouncing.phones_for_word(word)
            if phones:
                ipa = arpabet_to_ipa(phones[0])
                transcription.append(ipa)
            else:
                transcription.append(f"[{word}]")
        return " ".join(transcription)
    else:
        try:
            epi = epitran.Epitran(language)
            return epi.transliterate(text)
        except Exception as e:
            return f"Error: Language '{language}' not supported by Epitran ({str(e)})"

In [3]:
def arpabet_to_ipa(arpabet):
    mapping = {
        'AA': 'ɑ', 'AE': 'æ', 'AH': 'ə', 'AO': 'ɔ', 'AW': 'aʊ',
        'AY': 'aɪ', 'B': 'b', 'CH': 'tʃ', 'D': 'd', 'DH': 'ð',
        'EH': 'ɛ', 'ER': 'ɜr', 'EY': 'eɪ', 'F': 'f', 'G': 'ɡ',
        'HH': 'h', 'IH': 'ɪ', 'IY': 'i', 'JH': 'dʒ', 'K': 'k',
        'L': 'l', 'M': 'm', 'N': 'n', 'NG': 'ŋ', 'OW': 'oʊ',
        'OY': 'ɔɪ', 'P': 'p', 'R': 'r', 'S': 's', 'SH': 'ʃ',
        'T': 't', 'TH': 'θ', 'UH': 'ʊ', 'UW': 'u', 'V': 'v',
        'W': 'w', 'Y': 'j', 'Z': 'z', 'ZH': 'ʒ'
    }
    phonemes = arpabet.split()
    ipa = ""
    for phoneme in phonemes:
        if phoneme[-1].isdigit():
            sound = phoneme[:-1]
            if phoneme[-1] == '1':
                ipa += "ˈ"
            elif phoneme[-1] == '2':
                ipa += "ˌ"
        else:
            sound = phoneme
        ipa += mapping.get(sound, sound.lower())
    return ipa

In [4]:
def explain_phonemes(ipa):
    descriptions = {
        'k': 'voiceless velar stop', 'æ': 'near-open front vowel',
        't': 'voiceless alveolar stop', 'h': 'voiceless glottal fricative', 'ə': 'mid-central vowel (schwa)', 'ˈ': 'primary stress',
        'l': 'alveolar lateral approximant', 'oʊ': 'diphthong (mid-back to high-back)', 'ð': 'voiced dental fricative', 'ɡ': 'voiced velar stop',
        'r': 'alveolar approximant', 'ʌ': 'open-mid back vowel', 'n': 'alveolar nasal', 'z': 'voiced alveolar fricative'
    }
    breakdown = []
    i = 0
    while i < len(ipa):
        if i + 1 < len(ipa) and ipa[i:i+2] in descriptions:
            char = ipa[i:i+2]
            breakdown.append(f"/{char}/: {descriptions[char]}")
            i += 2
        elif ipa[i] in descriptions:
            char = ipa[i]
            breakdown.append(f"/{char}/: {descriptions[char]}")
            i += 1
        else:
            i += 1
    return "\n".join(breakdown)

In [5]:
def process_input(text, language):
    if language == 'en-US':
        language = 'eng-Latn'
    ipa_result = transcribe_to_ipa(text, language)
    phoneme_explanation = explain_phonemes(ipa_result)
    audio_file = None
    gtts_lang_map = {
        'eng-Latn': 'en', 'en-US': 'en', 'spa-Latn': 'es', 'fra-Latn': 'fr', 'deu-Latn': 'de',
        'ita-Latn': 'it', 'rus-Cyrl': 'ru', 'cmn-Hans': 'zh-cn', 'cmn-Hant': 'zh-tw',
        'por-Latn': 'pt', 'jpn-Hrgn': 'ja', 'jpn-Ktkn': 'ja', 'kor-Hang': 'ko',
    }
    gtts_lang = gtts_lang_map.get(language)
    if gtts_lang:
        try:
            audio_file = "output.mp3"
            tts = gTTS(text=text, lang=gtts_lang, slow=False)
            tts.save(audio_file)
        except Exception as e:
            return f"IPA Transcription: /{ipa_result}/\n\nPhoneme Breakdown:\n{phoneme_explanation}\n\nWarning: Audio generation failed ({str(e)})", None
    else:
        return f"IPA Transcription: /{ipa_result}/\n\nPhoneme Breakdown:\n{phoneme_explanation}\n\nNote: Audio not available for '{language}'", None
    return f"IPA Transcription: /{ipa_result}/\n\nPhoneme Breakdown:\n{phoneme_explanation}", audio_file

In [6]:
languages = ['en-US', 'eng-Latn', 'spa-Latn', 'fra-Latn', 'deu-Latn', 'ita-Latn', 'rus-Cyrl', 'cmn-Hans']

with gr.Blocks(title="Phonetics Tool") as demo:
    gr.Markdown("# Phonetics Transcription Tool")
    gr.Markdown("Enter text and select a language to get IPA transcription and audio (if available)")

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Enter your text")
            language_dropdown = gr.Dropdown(choices=languages, value='en-US', label="Select Language")
            submit_btn = gr.Button("Process")
        with gr.Column():
            output_text = gr.Textbox(label="Transcription and Breakdown", lines=10)
            output_audio = gr.Audio(label="Pronunciation")

    submit_btn.click(fn=process_input, inputs=[text_input, language_dropdown], outputs=[output_text, output_audio])

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://863c92262281116e72.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


