# OpenAI TTS

In [1]:
import gradio as gr
import os
import tempfile
import numpy as np
import soundfile as sf
from dotenv import load_dotenv
from openai import OpenAI
import random

load_dotenv()
client = OpenAI()

In [6]:
def add_silence(input_file, output_file, silence_duration_sec=0.5):
    data, samplerate = sf.read(input_file)
    silence_samples = int(silence_duration_sec * samplerate)
    silence = np.zeros((silence_samples, data.shape[1])) if data.ndim > 1 else np.zeros(silence_samples)
    new_data = np.concatenate((silence, data), axis=0)
    sf.write(output_file, new_data, samplerate)


In [2]:
def remove_audio_start(input_file, output_file, duration_sec):
    # Read the audio file
    data, samplerate = sf.read(input_file)

    # Calculate the number of samples to remove
    samples_to_remove = int(duration_sec * samplerate)

    # Remove the first x seconds
    new_data = data[samples_to_remove:]

    # Write the modified audio to the output file
    sf.write(output_file, new_data, samplerate)



In [9]:
input_file = "/mnt/d/OneDrive/Projects/anki-words-builder/notebooks/audio_at bo bor.mp3"
output_file = "/mnt/d/OneDrive/Projects/anki-words-builder/notebooks/audio_at bo bor_shorter.mp3"
duration_sec = 1.07
remove_audio_start(input_file, output_file, duration_sec)

In [17]:
def get_audio(text, lang="da"):
    voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
    selected_voice = random.choice(voices)
    prompt = f"Læs dette på dansk: '{text}'"

    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
        temp_audio_path = temp_audio_file.name
        with client.audio.speech.with_streaming_response.create(
            model="tts-1",
            voice=selected_voice,
            input=prompt,
        ) as response:
            response.stream_to_file(temp_audio_path)

    output_file_path = f"audio_{text}.mp3"
    add_silence(temp_audio_path, output_file_path, silence_duration_sec=0)
    os.remove(temp_audio_path)
    return output_file_path

In [18]:
# Add a danish sentence to be read out loud
get_audio("Hej, jeg hedder Rasmus")
get_audio("kæreste")

'audio_kæreste.mp3'

# Google

In [1]:
import urllib.parse

def construct_tts_link(text, lang_code):
    base_url = "https://translate.google.com/translate_tts"
    params = {
        "ie": "UTF-8",
        "tl": lang_code,
        "client": "tw-ob",
        "q": text
    }
    
    # Encode the query parameters
    query_string = urllib.parse.urlencode(params)
    
    # Construct the full URL
    tts_url = f"{base_url}?{query_string}"
    
    return tts_url

# Example usage for Danish (language code 'da'):
text = "God morgen, hvordan har du det?"
lang_code = "da"

tts_link = construct_tts_link(text, lang_code)
print(tts_link)


https://translate.google.com/translate_tts?ie=UTF-8&tl=da&client=tw-ob&q=God+morgen%2C+hvordan+har+du+det%3F


In [2]:
import requests

def save_tts_to_file(text, lang_code, filename):
    # Construct the TTS link
    tts_url = construct_tts_link(text, lang_code)
    
    # Make a request to get the audio content
    response = requests.get(tts_url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Write the audio content to a file
        with open(filename, 'wb') as file:
            file.write(response.content)
        print(f"MP3 saved to {filename}")
    else:
        print(f"Failed to retrieve audio: {response.status_code}")

# Example usage for Danish
text = "God morgen, hvordan har du det?"
lang_code = "da"
filename = "greeting.mp3"

save_tts_to_file(text, lang_code, filename)


MP3 saved to greeting.mp3
