<a href="https://colab.research.google.com/github/englissi/englissi/blob/main/%EC%9D%BC%EC%83%81_%EB%8C%80%ED%99%94%EB%AC%B8_%EB%B0%9C%EC%9D%8C%EC%97%B0%EC%8A%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
%%capture

!pip install gradio gtts
!pip install gradio gtts speechrecognition
!pip install gradio gtts speechrecognition difflib



In [13]:
import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import random

def generate_daily_conversation():
    conversations = {
        "ordering food": [
            "A: Hi, how are you today?",
            "B: I'm good, thanks! How about you?",
            "A: I'm doing well. Did you do anything fun over the weekend?",
            "B: Yes, I went hiking. It was really refreshing. What about you?",
            "A: I watched a couple of movies and relaxed at home."
        ],
        "booking a hotel": [
            "A: Hello, I'd like to book a room for two nights.",
            "B: Certainly, would you prefer a single or double bed?",
            "A: A double bed, please.",
            "B: No problem. May I have your name, please?"
        ],
        "asking for directions": [
            "A: Excuse me, can you tell me how to get to the nearest subway station?",
            "B: Sure, go straight ahead and take the second left. The station is just around the corner.",
            "A: Thank you very much!",
            "B: You're welcome!"
        ]
    }
    return conversations

def text_to_speech(text, lang='en', gender='female'):
    tld = 'co.uk' if gender == 'male' else 'com'
    tts = gTTS(text=text, lang=lang, tld=tld)
    temp_file_path = tempfile.mktemp(suffix=".mp3")
    tts.save(temp_file_path)
    return temp_file_path

def recognize_speech_from_microphone(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Could not understand the audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        return str(e)

def calculate_similarity(original_text, recognized_text):
    return SequenceMatcher(None, original_text.lower(), recognized_text.lower()).ratio() * 100

def process_conversation(scenario):
    conversations = generate_daily_conversation()
    conversation = conversations.get(scenario, [])
    dialogue = "\n".join(conversation)
    return conversation, dialogue

def process_sentence(sentence, audio):
    recognized_text = recognize_speech_from_microphone(audio)
    similarity = calculate_similarity(sentence, recognized_text)
    return recognized_text, similarity

def update_sentence(index, conversation):
    index = int(index)
    if index < len(conversation):
        sentence = conversation[index]
        audio_path = text_to_speech(sentence.replace('A: ', '').replace('B: ', ''), gender='male' if 'A:' in sentence else 'female')
        return sentence, audio_path, None, None
    else:
        return "", "", None, None

with gr.Blocks() as demo:
    scenarios = list(generate_daily_conversation().keys())
    scenario_choice = gr.Dropdown(choices=scenarios, label="Choose a scenario")

    conversation, dialogue = process_conversation(scenarios[0])
    dialogue_state = gr.State(value=conversation)
    dialogue_box = gr.Textbox(label="Generated Dialogue", value=dialogue, interactive=False)

    with gr.Row():
        sentence_index = gr.Number(label="Sentence Index", value=0, interactive=True)
        sentence_text = gr.Textbox(label="Sentence", value=conversation[0], interactive=False)
        native_audio = gr.Audio(label="Native Pronunciation", type="filepath", value=text_to_speech(conversation[0].replace('A: ', '').replace('B: ', ''), gender='male' if 'A:' in conversation[0] else 'female'))

    with gr.Row():
        audio_input = gr.Audio(label="Your Pronunciation", type="filepath")
        evaluate_button = gr.Button("Evaluate Pronunciation")

    recognized_text = gr.Textbox(label="Recognized Text")
    similarity_score = gr.Number(label="Similarity (%)")

    def on_scenario_change(scenario):
        new_conversation, new_dialogue = process_conversation(scenario)
        return new_conversation, new_dialogue, new_conversation[0], text_to_speech(new_conversation[0].replace('A: ', '').replace('B: ', ''), gender='male' if 'A:' in new_conversation[0] else 'female'), None, None

    scenario_choice.change(on_scenario_change, inputs=[scenario_choice], outputs=[dialogue_state, dialogue_box, sentence_text, native_audio, recognized_text, similarity_score])

    sentence_index.change(update_sentence, inputs=[sentence_index, dialogue_state], outputs=[sentence_text, native_audio, recognized_text, similarity_score])
    evaluate_button.click(process_sentence, inputs=[sentence_text, audio_input], outputs=[recognized_text, similarity_score])

demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://c808567388fcb52858.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


