In [1]:
from google.cloud import texttospeech
digits = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
words = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']

# Zip the two lists together
digit_list = list(zip(digits, words))


In [2]:
import random

def generate_random_sequences(min_length, max_length, quantity):
    digits = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    sequences = set()  # Using a set to store unique sequences
    
    while len(sequences) < quantity:
        # Randomly select the length of the sequence
        length = random.randint(min_length, max_length)
        # Generate a random sequence of the selected length
        sequence = random.choices(digits, k=length)
        
        # Create a file_name by concatenating the digits into a single string
        file_name = ''.join(map(str, sequence))
        
        # Create an input_sentence by putting spaces between the digits
        input_sentence = ' '.join(map(str, sequence))
        
        # Add the tuple (file_name, input_sentence) to the set
        sequences.add((file_name, input_sentence))
    
    return list(sequences) 

# Example usage
min_length = 2
max_length = 4
quantity = 50
random_sequences = generate_random_sequences(min_length, max_length, quantity)

# Print the result
for file_name, input_sentence in random_sequences:
    print(f"File Name: {file_name}, Input Sentence: {input_sentence}")


File Name: 0879, Input Sentence: 0 8 7 9
File Name: 8237, Input Sentence: 8 2 3 7
File Name: 3046, Input Sentence: 3 0 4 6
File Name: 696, Input Sentence: 6 9 6
File Name: 29, Input Sentence: 2 9
File Name: 17, Input Sentence: 1 7
File Name: 289, Input Sentence: 2 8 9
File Name: 1652, Input Sentence: 1 6 5 2
File Name: 787, Input Sentence: 7 8 7
File Name: 7788, Input Sentence: 7 7 8 8
File Name: 949, Input Sentence: 9 4 9
File Name: 6032, Input Sentence: 6 0 3 2
File Name: 163, Input Sentence: 1 6 3
File Name: 3846, Input Sentence: 3 8 4 6
File Name: 3934, Input Sentence: 3 9 3 4
File Name: 8165, Input Sentence: 8 1 6 5
File Name: 104, Input Sentence: 1 0 4
File Name: 42, Input Sentence: 4 2
File Name: 9088, Input Sentence: 9 0 8 8
File Name: 06, Input Sentence: 0 6
File Name: 02, Input Sentence: 0 2
File Name: 037, Input Sentence: 0 3 7
File Name: 7425, Input Sentence: 7 4 2 5
File Name: 7915, Input Sentence: 7 9 1 5
File Name: 91, Input Sentence: 9 1
File Name: 53, Input Sentence: 5

In [3]:
import os

# Replace with the path to your downloaded service account JSON file
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'keys/digitsynth-16af178552d6.json'


In [9]:
def generate_speech(text, output_filename):
    # Set up the input text to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Define the custom voice request
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",  # The language code
        name="en-US-Wavenet-B"  # Specific voice name
    )

    # Define the custom audio configuration
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16,  # WAV file format
        effects_profile_id=["small-bluetooth-speaker-class-device"],  # Effects profile
        pitch=-2.4,  # Custom pitch setting
        speaking_rate=1.15  # Custom speaking rate
    )

    # Perform the text-to-speech request with the custom config
    client = texttospeech.TextToSpeechClient()
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )

    # Save the output as a WAV file
    with open(output_filename, "wb") as out:
        out.write(response.audio_content)
        print(f'Audio content written to "{output_filename}"')

# Example usage: Generate sequences and convert them to speech
min_length = 1
max_length = 2
quantity = 20
output_directory = "digits_two/"  # Directory where the .wav files will be saved


# Generate random sequences
random_sequences = generate_random_sequences(min_length, max_length, quantity)

# Loop through the generated sequences and create the audio files
for file_name, input_sentence in random_sequences:
    # Define the output file path
    output_filename = os.path.join(output_directory, f"{file_name}.wav")
    
    # Generate and save the speech
    generate_speech(input_sentence, output_filename)

Audio content written to "digits_two/15.wav"
Audio content written to "digits_two/63.wav"
Audio content written to "digits_two/14.wav"
Audio content written to "digits_two/8.wav"
Audio content written to "digits_two/78.wav"
Audio content written to "digits_two/4.wav"
Audio content written to "digits_two/3.wav"
Audio content written to "digits_two/2.wav"
Audio content written to "digits_two/00.wav"
Audio content written to "digits_two/30.wav"
Audio content written to "digits_two/9.wav"
Audio content written to "digits_two/97.wav"
Audio content written to "digits_two/23.wav"
Audio content written to "digits_two/47.wav"
Audio content written to "digits_two/49.wav"
Audio content written to "digits_two/62.wav"
Audio content written to "digits_two/28.wav"
Audio content written to "digits_two/0.wav"
Audio content written to "digits_two/41.wav"
Audio content written to "digits_two/76.wav"


In [8]:
from IPython.display import Audio


# Play the .wav file
Audio("digits_two/75.wav")