In [None]:
!pip install google-cloud-texttospeech

In [None]:
import os

# set params, for more: https://cloud.google.com/text-to-speech/docs/reference/rpc/google.cloud.texttospeech.v1beta1
params = {"audio_encoding":"LINEAR16",
          "voice_name":'en-US-Wavenet-I',
          "speaking_rate":1.0,
          "speaking_pitch":0}

path_to_save_audio = "/content/" + params['voice_name'] + ".mp3"
print(path_to_save_audio)
# text example:
# Google Cloud Text-to-Speech enables developers to synthesize natural-sounding speech with 100+ voices, available in multiple languages and variants.
# It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks to deliver the highest fidelity possible. 
# As an easy-to-use API, you can create lifelike interactions with your users, across many applications and devices.
path_to_text = '/content/script.txt'

# 'webAPI or pyAPI'
generate_flag = 'webAPI'

# set credentials for pyAPI
# tutorial for generating credentials_json: https://www.youtube.com/watch?v=gb0bytUGDnQ
path_to_credentials = "PATH_TO_YOUR_KEY.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path_to_credentials

# set credentials for webAPI
# tutorial for generating credentials_key: https://www.youtube.com/watch?v=y_vQvAqgqgA
key = 'YOUR_TTS_API_KEY'
post_url = 'https://texttospeech.googleapis.com/v1beta1/text:synthesize?fields=audioContent&key=' + key

In [None]:
def load_text(path_text):
    text = []
    with open(path_to_text, 'r') as text_file:
        for single_line in text_file.readlines():
            if single_line != None:
              text.append(single_line.strip("\n"))
    text = "".join(text)
    return text

def save_audio(path_audio,audio_content):
      with open(path_audio, "wb") as outFile:
        outFile.write(audio_content)
        print('Audio content written to file:' + path_audio)

In [None]:
from google.cloud import texttospeech_v1beta1 as tts

def text_to_speech_google_py(voice_name,text,audio_encoding,speaking_rate,speaking_pitch):
    client = tts.TextToSpeechClient()
    language_code = '-'.join(voice_name.split('-')[:2])
    text_input = tts.SynthesisInput(text=text)
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code,
        name=voice_name)
    audio_config = tts.AudioConfig(
        audio_encoding=audio_encoding, speaking_rate=speaking_rate, pitch=speaking_pitch)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input,
        voice=voice_params,
        audio_config=audio_config)
    
    return response.audio_content

In [None]:
import requests, json
from base64 import b64decode

def generate_request_body(params, text):
    request_body = {
          "audioConfig": {
            "audioEncoding": params['audio_encoding'],
            "pitch": params['speaking_pitch'],
            "speakingRate": params['speaking_rate']
          },
          "input": {
            "text": text
          },
          "voice": {
            "languageCode": '-'.join(params['voice_name'].split('-')[:2]),
            "name": params['voice_name']
          }
        }
    return json.dumps(request_body)


def text_to_speech_google_web(params, text):
    data = generate_request_body(params, text) 
    b64_response = requests.post(post_url,data=data).json()
    return b64decode(b64_response['audioContent'])

In [None]:
import sys

text = load_text(path_to_text)

if generate_flag == 'webAPI':
    audio_content = text_to_speech_google_web(params,text)
elif generate_flag == 'pyAPI':
    audio_content = text_to_speech_google_py(params['voice_name'],text,params['audio_encoding'],params['speaking_rate'],params['speaking_pitch'])
else:
    print("Please check the generate_flag, which should be web or py method, but your method is {}.".format(generate_flag))
    sys.exit()

save_audio(path_to_save_audio, audio_content)