# Making sense of the world through speech

In [None]:
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import IPython
import os
import azure.cognitiveservices.speech as speechsdk

In [None]:
sentence = "Accessibility empowers everyone. \
    Accessibility and inclusion are essential to delivering our mission \
    to empower every person and every organisation on the planet to achieve more"

In [None]:
speech_key = "<INSERT KEY>"
service_region = "westeurope"

# 





## Neural text-to-speech

Microsoft neural text-to-speech uses deep neural networks to make the voices of computers nearly indistinguishable from recordings of people. With the human-like natural prosody and clear articulation of words, neural text-to-speech has significantly reduced listening fatigue when you interact with AI systems.

[Language support](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support#neural-voices)   
[What is neural text-to-speech?](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech)

In [None]:
speech_config = SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_synthesis_language = "en-GB" 
speech_config.speech_synthesis_voice_name ="en-GB-LibbyNeural"
speech_config.speech_synthesis_voice_name ="en-GB-HazelRUS"

In [None]:
audio_file=f'{speech_config.speech_synthesis_voice_name}.wav'

audio_config = AudioOutputConfig(filename=audio_file)

synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

synthesizer.speak_text(sentence)

In [None]:
IPython.display.Audio(audio_file,autoplay=True)

# 





## Speech translation (Translating speech-to-text)

One of the core features of the Speech service is the ability to recognize human speech and translate it to other languages.

[Get started with speech translation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-translation)


In [None]:
translation_config = speechsdk.translation.SpeechTranslationConfig(
        subscription=speech_key, region=service_region)

translation_config.speech_recognition_language = "en-US"

translation_config.add_target_language('nl-nl')
translation_config.add_target_language("hi")

recognizer = speechsdk.translation.TranslationRecognizer(
        translation_config=translation_config)

print('Say something...')
result = recognizer.recognize_once()

print(f'Recognized: "{result.text}"')

print("= Translations =")
for trans in result.translations:
    print(f'\n{trans}: {result.translations[trans]}')

# 





##  Translating speech to multiple target languages and Synthesize translations

In [None]:
from_language = 'en-US'
to_languages =  ['nl','en','hi','pt','lt','zh-Hans']

def translate_speech_to_text():
    translation_config = speechsdk.translation.SpeechTranslationConfig(
            subscription=speech_key, region=service_region)

    translation_config.speech_recognition_language = from_language
    for lang in to_languages:
        translation_config.add_target_language(lang)

    recognizer = speechsdk.translation.TranslationRecognizer(
            translation_config=translation_config)
    
    print('Say something...')
    result = recognizer.recognize_once()
    synthesize_translations(result=result)

def synthesize_translations(result):
    language_to_voice_map = {
        "nl": "nl-NL-FennaNeural",
        "en": "en-US-AriaNeural",
        "hi": "hi-IN-SwaraNeural",
        "pt": "pt-BR-FranciscaNeural",
        "lt": "lt-LT-OnaNeural",
        "zh-Hans": "zh-CN-XiaoxiaoNeural"
    }
    print(f'Recognized: "{result.text}"')

    for language in result.translations:
        translation = result.translations[language]
        print(f'Translated into "{language}": {translation}')

        speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
        speech_config.speech_synthesis_voice_name = language_to_voice_map.get(language)
        
        audio_config = speechsdk.audio.AudioOutputConfig(filename=f'{language}-translation.wav')
        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
        speech_synthesizer.speak_text_async(translation).get()

translate_speech_to_text()

In [None]:
IPython.display.Audio("en-translation.wav",autoplay=True)

In [None]:
IPython.display.Audio("hi-translation.wav",autoplay=True)

In [None]:
IPython.display.Audio("nl-translation.wav",autoplay=True)