In [1]:
"""Synthesizes speech from the input string of text or ssml.
Make sure to be working in a virtual environment.

Note: ssml must be well-formed according to:
    https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech

# Instantiates a client
client = texttospeech.TextToSpeechClient()

# Set the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(text="こんにちは、滝澤です。")

# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
# 英語：US / 日本語：ja-JP
# NEUTORAL / 男性：MALE / 女性：FEMALE
voice = texttospeech.VoiceSelectionParams(
    language_code="ja-JP",
    name="ja-JP-Neural2-C",
    ssml_gender=texttospeech.SsmlVoiceGender.MALE
)

# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding.MP3
)

# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
    input=synthesis_input, voice=voice, audio_config=audio_config
)

# The response's audio_content is binary.
# filename = "output.mp3"
# with open(filename, "wb") as out:
#     # Write the response to the output file.
#     out.write(response.audio_content)
#     print(f'音声データは{filename}ファイルに書き出しました')

In [3]:
from IPython.display import Audio
Audio(response.audio_content)

In [None]:
from google.cloud import texttospeech

In [20]:
lang_code = {
    '英語' : 'en-US',
    '日本語' : 'ja-JP'
}

gender_type = {
    #'デフォルト':texttospeech.SsmlVoiceGender.SSML_VOICE_GENDER_UNSPECIFIED,
    '男性':texttospeech.SsmlVoiceGender.MALE,
    '女性':texttospeech.SsmlVoiceGender.FEMALE,
    'ニュートラル':texttospeech.SsmlVoiceGender.NEUTRAL
}

if lang_code == '日本語':
    voice_type = {
        '男性' : "ja-JP-Neural2-C",
        '女性' : "ja-JP-Neural2-B",
        'ニュートラル' : ""
else:
    voice_type = {
        '男性' : "en-US-Neural2-J",
        '女性' : "en-US-Neural2-F",
        'ニュートラル' : ""
}

# 初期値設定
lang = '日本語'
g_type = 'ニュートラル'
v_type = 'ニュートラル'
talk_text = "こんにちは、滝澤です。"

client = texttospeech.TextToSpeechClient()

synthesis_input = texttospeech.SynthesisInput(text = talk_text)

voice = texttospeech.VoiceSelectionParams(
    language_code=lang_code[lang],
    name=voice_type[v_type],
    ssml_gender=gender_type[g_type]
)

audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding.MP3
)


response = client.synthesize_speech(
    input=synthesis_input, voice=voice,
    audio_config=audio_config
)

Audio(response.audio_content)