# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [3]:
! pip3 install --upgrade google-cloud-aiplatform  --quiet
! pip3 install --upgrade google-cloud-texttospeech  --quiet



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
PROJECT_ID = "sandbox"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In this section, you will get the list of all supported languages.

from typing import Sequence

import google.cloud.texttospeech as tts


def unique_languages_from_voices(voices: Sequence[tts.Voice]):
    language_set = set()
    for voice in voices:
        for language_code in voice.language_codes:
            language_set.add(language_code)
    return language_set


def list_languages():
    client = tts.TextToSpeechClient()
    response = client.list_voices()
    languages = unique_languages_from_voices(response.voices)

    print(f" Languages: {len(languages)} ".center(60, "-"))
    for i, language in enumerate(sorted(languages)):
        print(f"{language:>10}", end="\n" if i % 5 == 4 else "")
        

In [9]:
#Call the function:

list_languages()

#You should get the following (or a larger) list:


#---------------------- Languages: 58 -----------------------
#    af-ZA     am-ET     ar-XA     bg-BG     bn-IN
#   ca-ES    cmn-CN    cmn-TW     cs-CZ     da-DK
#  de-DE     el-GR     en-AU     en-GB     en-IN
# en-US     es-ES     es-US     eu-ES     fi-FI


---------------------- Languages: 60 -----------------------
     af-ZA     am-ET     ar-XA     bg-BG     bn-IN
     ca-ES    cmn-CN    cmn-TW     cs-CZ     da-DK
     de-DE     el-GR     en-AU     en-GB     en-IN
     en-US     es-ES     es-US     et-EE     eu-ES
     fi-FI    fil-PH     fr-CA     fr-FR     gl-ES
     gu-IN     he-IL     hi-IN     hu-HU     id-ID
     is-IS     it-IT     ja-JP     kn-IN     ko-KR
     lt-LT     lv-LV     ml-IN     mr-IN     ms-MY
     nb-NO     nl-BE     nl-NL     pa-IN     pl-PL
     pt-BR     pt-PT     ro-RO     ru-RU     sk-SK
     sr-RS     sv-SE     ta-IN     te-IN     th-TH
     tr-TR     uk-UA     ur-IN     vi-VN    yue-HK


In [10]:
#In this section, you will get the list of voices available in different languages.

import google.cloud.texttospeech as tts


def list_voices(language_code=None):
    client = tts.TextToSpeechClient()
    response = client.list_voices(language_code=language_code)
    voices = sorted(response.voices, key=lambda voice: voice.name)

    print(f" Voices: {len(voices)} ".center(60, "-"))
    for voice in voices:
        languages = ", ".join(voice.language_codes)
        name = voice.name
        gender = tts.SsmlVoiceGender(voice.ssml_gender).name
        rate = voice.natural_sample_rate_hertz
        print(f"{languages:<8} | {name:<24} | {gender:<8} | {rate:,} Hz")
        

In addition to a selection of multiple voices in different genders and qualities, multiple accents are available: Australian, British, Indian, and American English.

Take a moment to list the voices available for your preferred languages and variants (or even all of them):

In [11]:
# list_voices(language_code) to list voices available for a given language.

#Now, get the list of available German voices:


list_voices("de")


------------------------ Voices: 32 ------------------------
de-DE    | de-DE-Chirp-HD-D         | MALE     | 24,000 Hz
de-DE    | de-DE-Chirp-HD-F         | FEMALE   | 24,000 Hz
de-DE    | de-DE-Chirp-HD-O         | FEMALE   | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Aoede    | FEMALE   | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Charon   | MALE     | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Fenrir   | MALE     | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Kore     | FEMALE   | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Leda     | FEMALE   | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Orus     | MALE     | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Puck     | MALE     | 24,000 Hz
de-DE    | de-DE-Chirp3-HD-Zephyr   | FEMALE   | 24,000 Hz
de-DE    | de-DE-Neural2-G          | FEMALE   | 24,000 Hz
de-DE    | de-DE-Neural2-H          | MALE     | 24,000 Hz
de-DE    | de-DE-Polyglot-1         | MALE     | 24,000 Hz
de-DE    | de-DE-Standard-A         | FEMALE   | 24,000 Hz
de-DE    | de-DE-Standard-B         | MALE     | 24,00

In [12]:
#In addition to a selection of multiple voices in different genders and qualities, multiple accents are available: Australian, British, Indian, and American English.

#Take a moment to list the voices available for your preferred languages and variants (or even all of them):

list_voices("fr")
list_voices("pt")
list_voices()

------------------------ Voices: 53 ------------------------
fr-CA    | fr-CA-Chirp-HD-D         | MALE     | 24,000 Hz
fr-CA    | fr-CA-Chirp-HD-F         | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Chirp-HD-O         | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Aoede    | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Charon   | MALE     | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Fenrir   | MALE     | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Kore     | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Leda     | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Orus     | MALE     | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Puck     | MALE     | 24,000 Hz
fr-CA    | fr-CA-Chirp3-HD-Zephyr   | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Neural2-A          | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Neural2-B          | MALE     | 24,000 Hz
fr-CA    | fr-CA-Neural2-C          | FEMALE   | 24,000 Hz
fr-CA    | fr-CA-Neural2-D          | MALE     | 24,000 Hz
fr-CA    | fr-CA-Standard-A         | FEMALE   | 24,00

You can use the Text-to-Speech API to convert a string into audio data. You can configure the output of speech synthesis in a variety of ways, including selecting a unique voice or modulating the output in pitch, volume, speaking rate, and sample rate.

In [15]:
import google.cloud.texttospeech as tts


def text_to_wav(voice_name: str, text: str,voice_filename: str):
    language_code = "-".join(voice_name.split("-")[:2])
    text_input = tts.SynthesisInput(text=text)
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code, name=voice_name
    )
    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input,
        voice=voice_params,
        audio_config=audio_config,
    )

    filename = f"{voice_filename}.wav"
    with open(filename, "wb") as out:
        out.write(response.audio_content)
        print(f'Generated speech saved to "{voice_filename}"')
        

In [16]:
# Call Text to Wave
text_to_wav("en-AU-Neural2-A", "What is the temperature in Sydney?", "Section1_a")

Generated speech saved to "Section1_a"
