# Google GCP Vertex-AI Text to Speech implementation
Install the following packages before beginning

In [None]:
#!pip install google-cloud-texttospeech
#!pip install google-cloud-aiplatform

# Test your google authenticators. 
If you don't have any listed then you will need to sign up on GCP </br>
Also make sure you have allowed Vertex AI in your project

In [1]:
#!gcloud auth list

It should print something like this:
```
    Credentialed Accounts
ACTIVE  ACCOUNT
*     YOUREMAILtDOMAINil.com

To set the active account, run:
    $ gcloud config set account `ACCOUNT`
```

In [2]:
#!gcloud auth application-default print-access-token

# GCP Vertex-AI engine setup
you can choose your voices from `client.list_voices()`

In [9]:
from IPython.display import Audio

In [7]:
"""Synthesizes speech from the input string of text."""
from google.cloud import texttospeech

client = texttospeech.TextToSpeechClient()

input_text = texttospeech.SynthesisInput(text="Principal Component Analysis relies on Eigen Vectors and Values as well as Projections and Dimensional Reduction. Using PCA we can construct a co-variance matrix")

# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.VoiceSelectionParams(
    language_code="en-US",
    name="en-US-Studio-O",
)

audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding.LINEAR16,
    speaking_rate=1
)

response = client.synthesize_speech(
    request={"input": input_text, "voice": voice, "audio_config": audio_config}
)

# The response's audio_content is binary.
with open("output.mp3", "wb") as out:
    out.write(response.audio_content)
    print('Audio content written to file "output.mp3"')

Audio content written to file "output.mp3"


In [10]:
filename = 'output.mp3'
display(Audio(filename, autoplay=True))

In [12]:
synthesis_input = texttospeech.SynthesisInput(text="Hello, World!")
voice = texttospeech.VoiceSelectionParams(
    language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding.MP3
)
response = client.synthesize_speech(
    input=synthesis_input, voice=voice, audio_config=audio_config
)

In [14]:
# The response's audio_content is binary.
with open("output2.mp3", "wb") as out:
    out.write(response.audio_content)
    print('Audio content written to file "output2.mp3"')

Audio content written to file "2output.mp3"


In [15]:
filename = 'output2.mp3'
display(Audio(filename, autoplay=True))

# Build a function to do all our stuff for us

In [4]:
def useGCP_TTS(myText, voice, filename):
    input_text = texttospeech.SynthesisInput(text=myText)
    
    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name=voice,
    )
    
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16,
        speaking_rate=1
    )
    
    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )
    
    # The response's audio_content is binary.
    with open(filename, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
    return filename

In [23]:
filename = 'output2.mp3'
voice = 'en-US-Studio-O'
#testing our model using an audio file
myText = """Help us change lives
At our company we are working hard to help improve patient lives.
Join us at our start up to help make a difference in someone else life by utilizing your software engineering and machine learning expertise.
"""

myText2 = """Position Overview

The Lead Machine Learning Engineer is largely self-driven, working in collaboration with others on one or more projects which are extremely complex in scope.

"""

useGCP_TTS(myText, voice, filename)

Audio content written to file "output.mp3"


'output2.mp3'

In [24]:
display(Audio(filename, autoplay=True))

In [None]:
useGCP_TTS(myText, voice, filename)

In [None]:
display(Audio(filename, autoplay=True))

### Print list of voices you can choose from

In [3]:
#client.list_voices()