# OpenAI TTS

In [1]:
import gradio as gr
import os
import tempfile
import numpy as np
import soundfile as sf
from dotenv import load_dotenv
from openai import OpenAI
import random

load_dotenv()
client = OpenAI()

In [3]:
def generate_audio_binary(openai_client: OpenAI, text: str) -> bytes:
    """
    Generate audio from text using OpenAI's TTS API and return it as binary data.

    Args:
        openai_client (OpenAI): Initialized OpenAI client
        text (str): Text to convert to speech

    Returns:
        bytes: Binary audio data in MP3 format

    Raises:
        OpenAIError: If the API request fails
    """
    voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
    selected_voice = random.choice(voices)

    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as temp_file:
        # Create speech using the OpenAI API
        with openai_client.audio.speech.with_streaming_response.create(
            model="tts-1", voice=selected_voice, input=text, response_format="mp3"
        ) as response:
            response.stream_to_file(temp_file.name)
        print(f"temp_file.name: {temp_file.name}")

        temp_file.seek(0)  # Reset file pointer to the beginning
        audio_bytes = temp_file.read()  # Read the binary content

    print(f"Size of binary data: {len(audio_bytes)} bytes")
    return audio_bytes
generate_audio_binary(client, "hejsa")

temp_file.name: /tmp/tmphx02z_x_.mp3
Size of binary data: 10560 bytes


b'\xff\xf3\xe4\xc4\x00e\xe4:\x00\x01Z\xc8\x00?\x1a\x8e\x93\x03\x04\x1c\xd5\xc3:4\xce\x8csv\xc4\xd5\xab6k\xceD\x03t\xc8\xd4\x1e2\xc4\x8cp\xe3\x1c@\xca\x163\x06\x8c\xd1CBd\xcc\x972\xa4Lx\xd3\x12\x1c\xc2\x830`\xcc\x18S\x0e\x14\xce(\xca \xc4\x08\x04\x01f\x0bHZB\xc8\x16`\x049m\xd3\x0e\x0fe\t\x88\xc1\x11\xdc\xcfT\xec\xb0\xea\x90\xdcH\xca\x10\x0c\x01i\x00\x80\x18C\x19C\x1ah\x9a(\x99\xe7\x99\xa4\x98\xe1\x81\x83/\x19g\xccQM#\x8c\xc2\x00\xc2\'C8a\x89x\x84\x84P@\x1a\x01\xd0\x0e\x80t\x03\xa0\xfa+\xa2\xbaa\xa6"\x12\x0b(\x00\x00\xb3\x05\xe0SF\xb8\xee9n[\x96\xc3\xd6\x1d1\xd2\x1d1\xd4\xddb*EH\xa9\x15"\xa4T\x8a\x91b&:c\xaauN\xa9\xd5:\xa7T\xeb\x1dR*EH\xa9\x15"\xa4X\x8b\xb1\x88.\xf5\xde\xbb\xd7z\xef]\xeb\xbdw\xb15H\xa9\x15"\xa4X\x8b\xb1v.\xc6 \xb1\xd7z\xef]\xeb\xbdw\xb16v\xce\xd8\x82\xec]\x8cA\x9c3\x86p\xce\x1a\xe3\x13gl\xed\x9d\xb3\xb6\xbe\xe5\xb3\xb6\xbe\xc4\x19\xc38g\x0c\xe1\x9c3\x86p\xd7\x19\xdb;gl\xed\x9d\xb3\xb6v\xce\xda\xfb\x10g\x0c\xe1\x9c9\x0eC\x90\xe49\x0c\xed\x9d\xb3\xb6v\xce\xd9\xdb_r\xdd\xf7

In [8]:
import tempfile
import random
import os


def generate_audio_binary(openai_client: OpenAI, text: str) -> bytes:
    """
    Generate audio from text using OpenAI's TTS API and return it as binary data.

    Args:
        openai_client (OpenAI): Initialized OpenAI client
        text (str): Text to convert to speech

    Returns:
        bytes: Binary audio data in MP3 format

    Raises:
        OpenAIError: If the API request fails
    """
    voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
    selected_voice = random.choice(voices)

    # Create a temporary file
    temp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
    temp_file_path = temp_file.name
    try:
        # Create speech using the OpenAI API
        with openai_client.audio.speech.with_streaming_response.create(
            model="tts-1", voice=selected_voice, input=text, response_format="mp3"
        ) as response:
            response.stream_to_file(temp_file_path)
        print(f"Temporary file created at: {temp_file_path}")

        # Read the binary content from the temporary file
        with open(temp_file_path, "rb") as file:
            audio_bytes = file.read()

        print(f"Size of binary data: {len(audio_bytes)} bytes")
        return audio_bytes

    except Exception as e:
        print(f"An error occurred: {e}")
        raise

    finally:
        # Attempt to delete the temporary file
        try:
            os.remove(temp_file_path)
            print(f"Temporary file at {temp_file_path} has been deleted.")
        except FileNotFoundError:
            print(
                f"Temporary file at {temp_file_path} not found. It may have been deleted already."
            )
        except OSError as e:
            print(f"Error deleting temporary file at {temp_file_path}: {e}")


generate_audio_binary(client, "hejsa")

Temporary file created at: /tmp/tmpdjbvdbky.mp3
Size of binary data: 9120 bytes
Temporary file at /tmp/tmpdjbvdbky.mp3 has been deleted.


b'\xff\xf3\xe4\xc4\x00g\xcc9\xe4\x01[\xd0\x00\x8d\xdb\xd4A\xac(\x01w\x00\x00F\x08\x04a\xa1\xa6\x1e"b"f&&b"&\x1e"b\xa3\xe6^\x8el,\xa754s\xd3\x87)\x0cn\xccF\x94\\D2a\xa4\xa6fzh&\xe7*\x01\xbexm\x98\x1a\x84\x86\x80\xe1\x9c4f\x8e\x19\xc3\xc6\x80\xd1\xa9Ji\xd4\x9a\x95&\xa59\xa1:eF\x98P&\x1c\xa9\xadjv)\x1cE\x81\xc5L\xba\xd3v\xfc\xdf\xb75f\xc3\t\xa4a\x8d\x1ed\xc8\x98\xd1 \xa0m\x9dJ\x10q\x80\x98q\xe6\x991\xa6Dg\x0b\x19!\xc6(A\x88\x10b\x04\x18\xc2\x06`\xb1\xabJh\xcf\x99q\xe6\x1c\tq\xd0}\x15\xd5\xdcy!\xd5\xdciL\x0b\xb8\\\x82\xf0$B\xe8e\xee[\x96\xe5\xb3\xb6v\xbb\xd7{\x13b\x0c1v*EH\xa9\x17c\x10k\x8c\xed\x9d\xae\xf5\xde\xbb\xd7{;k\xee\xfb\xf8\xfe;\x0eC\x90\xe45\x86\x18\xb1\x12-"\xd3\x1d1\xd4\x1dS\xaau\xde\xd7\xdc\x87!\xac3\x86p\xce\x1c\x87rYO\x9d<\xae\x1bv\xdc\xb7-\xdfw\x1f\xc7\xf1\xfc\x7f\x1f\xc7\xf1\xfc\x7f\x1d\xf7\xfd\xff\x7f\xe3q\xb8\xdcn7\x18\x8c?\x8f\xe3\xf8\xfe;\rav&:c\xa6:\xa7X\xecM\xcb\x87\xe5\xf60\xc2\x92\x92\x92Q\x0c?\x8e[\x96\xe5\xb9n[\x96\xe5\xb3\xb6v\xb1\x17b\xec]\x8b\xb1\x883\x86\xb9\

# Google

In [1]:
import urllib.parse

def construct_tts_link(text, lang_code):
    base_url = "https://translate.google.com/translate_tts"
    params = {
        "ie": "UTF-8",
        "tl": lang_code,
        "client": "tw-ob",
        "q": text
    }
    
    # Encode the query parameters
    query_string = urllib.parse.urlencode(params)
    
    # Construct the full URL
    tts_url = f"{base_url}?{query_string}"
    
    return tts_url

# Example usage for Danish (language code 'da'):
text = "God morgen, hvordan har du det?"
lang_code = "da"

tts_link = construct_tts_link(text, lang_code)
print(tts_link)


https://translate.google.com/translate_tts?ie=UTF-8&tl=da&client=tw-ob&q=God+morgen%2C+hvordan+har+du+det%3F


In [2]:
import requests

def save_tts_to_file(text, lang_code, filename):
    # Construct the TTS link
    tts_url = construct_tts_link(text, lang_code)
    
    # Make a request to get the audio content
    response = requests.get(tts_url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Write the audio content to a file
        with open(filename, 'wb') as file:
            file.write(response.content)
        print(f"MP3 saved to {filename}")
    else:
        print(f"Failed to retrieve audio: {response.status_code}")

# Example usage for Danish
text = "God morgen, hvordan har du det?"
lang_code = "da"
filename = "greeting.mp3"

save_tts_to_file(text, lang_code, filename)


MP3 saved to greeting.mp3
