In [None]:
# Use IBM Watson Speech to Text, Language Translator and Text to Speech 
# APIs for English and Mandarin Chinese
from watson_developer_cloud import SpeechToTextV1
from watson_developer_cloud import LanguageTranslatorV3
from watson_developer_cloud import TextToSpeechV1
import mykeys  # contains my API keys for accessing Watson services
import wave
import pydub
import pydub.playback
import pyaudio

def run_en_to_zh():
    """Calls the English-to-Chinese functions that interact with Watson APIs"""
    # Get English speech from user
    input('Press Enter, then speak in English')
    record_audio('english.wav')
    # Transcribe English speech to English text
    english = en_speech_to_text(
        file_name='english.wav', model_id='en-US_BroadbandModel')
    # Exit if no English speech
    if not english:
        print('Program exited.')
        return
    print('Received English:', english)
    # Translate the English text into Simplified Chinese text
    chinese = translate(text_to_translate=english, model='en-zh')
    print('Chinese text:', chinese)
    # Synthesize the Chinese text into Mandarin speech
    text_to_speech(text=chinese, voice='zh-CN_LiNaVoice', file_name='chinese.wav')
    # Step 5: Play the Mandarin audio file
    play_audio(file_name='chinese.wav')

def run_zh_to_en():
    """Calls the Chinese-to-English functions that interact with Watson APIs"""
    # Get Mandarin speech from user
    input('Press Enter, then speak in Mandarin')
    record_audio('chineseresponse.wav')
    # Transcribe the Mandarin speech to Chinese text
    chinese = zh_speech_to_text(
        file_name='chineseresponse.wav', model_id='zh-CN_BroadbandModel')
    # Exit if no Mandarin speech
    if not chinese:
        print('Program exited.')
        return
    print('Received Mandarin:', chinese)
    # Translate the Chinese text into English text
    english = translate(text_to_translate=chinese, model='zh-en')
    print('English text:', english)
    # Synthesize the English text into English speech
    text_to_speech(text=english, voice='en-US_AllisonVoice',
    file_name='englishresponse.wav')
    # Step 10: Play the English audio
    play_audio(file_name='englishresponse.wav')

def en_speech_to_text(file_name, model_id):
    """Use Watson Speech to Text to convert English audio file to English text."""
    # Create client
    stt = SpeechToTextV1(iam_apikey=mykeys.speech_to_text_key)
    # Open audio file
    with open(file_name, 'rb') as audio_file:
        result = stt.recognize(audio=audio_file,
            content_type='audio/wav', model=model_id).get_result()
    # Only final result, no intermediate results
    results_list = result['results']
    # Prompt user to try again if no final result
    if not results_list:
        print('No audio detected! Try again:')
        run_en_to_zh()
        return
    # Get final result
    final_result = results_list[0]
    # Only 1 alternative
    alts_list = final_result['alternatives']
    first_alt = alts_list[0]
    # Get the 'transcript' key's value
    text = first_alt['transcript']
    return text

def zh_speech_to_text(file_name, model_id):
    """Use Watson Speech to Text to convert Mandarin audio file to Chinese text."""
    # Create client
    stt = SpeechToTextV1(iam_apikey=mykeys.speech_to_text_key)
    # Open audio file
    with open(file_name, 'rb') as audio_file:
        result = stt.recognize(audio=audio_file,
            content_type='audio/wav', model=model_id).get_result()
    # Only final result, no intermediate results
    results_list = result['results']
    # Prompt user to try again if no final result
    if not results_list:
        print('No audio detected! Try again:')
        run_zh_to_en()
        return
    # Get final result
    final_result = results_list[0]
    # Only 1 alternative
    alts_list = final_result['alternatives']
    first_alt = alts_list[0]
    # Get the 'transcript' key's value
    text = first_alt['transcript']
    return text

def translate(text_to_translate, model):
    """Use Watson Language Translator to translate English to Chinese 
       (en-zh) or Chinese to English (zh-en) as specified by model."""
    # Create client
    language_translator = LanguageTranslatorV3(
        version='2018-05-31', iam_apikey=mykeys.translate_key)
    # Translate
    translated_text = language_translator.translate(
        text=text_to_translate, model_id=model).get_result()
    translations_list = translated_text['translations']
    first_translation = translations_list[0]
    translation = first_translation['translation']
    return translation  # return the translated string

def text_to_speech(text, voice, file_name):
    """Use Watson Text to Speech to convert text to specified voice
       and save to a WAV file."""
    # Create client
    tts = TextToSpeechV1(iam_apikey=mykeys.text_to_speech_key)
    # Write audio into new file
    with open(file_name, 'wb') as audio_file:
        audio_file.write(tts.synthesize(
            text, accept='audio/wav', voice=voice).get_result().content)

def record_audio(file_name):
    """Record 5 seconds of audio to a WAV file."""
    FRAME_RATE = 44100
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    SECONDS = 5
    recorder = pyaudio.PyAudio()
    # Configure and open audio stream for recording
    audio_stream = recorder.open(format=FORMAT, channels=CHANNELS, 
        rate=FRAME_RATE, input=True, frames_per_buffer=CHUNK)
    audio_frames = []
    print('Recording 5 seconds of audio')
    for i in range(0, int(FRAME_RATE * SECONDS / CHUNK)):
        audio_frames.append(audio_stream.read(CHUNK))
    print('Recording complete')
    # Stop recording
    audio_stream.stop_stream()
    audio_stream.close()
    # Release resources used by PyAudio  
    recorder.terminate()
    # Save audio_frames to a WAV file
    with wave.open(file_name, 'wb') as output_file:
        output_file.setnchannels(CHANNELS)
        output_file.setsampwidth(recorder.get_sample_size(FORMAT))
        output_file.setframerate(FRAME_RATE)
        output_file.writeframes(b''.join(audio_frames))

def play_audio(file_name):
    """Play audio (WAV file)."""
    sound = pydub.AudioSegment.from_wav(file_name)
    pydub.playback.play(sound)

In [None]:
run_en_to_zh()

In [None]:
run_zh_to_en()