In [None]:

config = {
  "audioConfig": {
    "audioEncoding": "LINEAR16",
    "pitch": 0,
    "speakingRate": 1
  },
  "input": {
    "text": "Senaste gången Riksbanken sänkte räntan var i juni i år då den sänktes med 0,25 procentenheter till 2 procent. Därefter, i augusti, valde man att lämna den oförändrad. Då sa Riksbanken att man såg en viss sannolikhet för ytterligare en sänkning i år."
  },
  "voice": {
    "languageCode": "sv-SE",
    "name": "sv-SE-Chirp3-HD-Achernar"
  }
}

url = "https://texttospeech.googleapis.com/v1beta1/text:synthesize"

In [None]:
import pandas as pd

df_sentences = pd.read_csv("../dataset/sentences.csv")

df_sentences = df_sentences.drop(columns=["sentiment"])

# Preview the dataframe
df_sentences.head()

In [None]:
# Check the size of the dataset
print(f"Dataset contains {len(df_sentences)} sentences")
print(f"Columns: {list(df_sentences.columns)}")
print("\nFirst few sentences:")
for i, row in df_sentences.head().iterrows():
    print(f"{i}: {row['text'][:100]}...")

In [None]:
import os
from google.cloud import texttospeech

# Set up authentication - Option 1: Use Application Default Credentials
# First, authenticate using: gcloud auth application-default login
# Or set GOOGLE_APPLICATION_CREDENTIALS environment variable

# Initialize the client
client = texttospeech.TextToSpeechClient()

# Create the synthesis input, voice, and audio config from the config dictionary
synthesis_input = texttospeech.SynthesisInput(text=config["input"]["text"])
voice = texttospeech.VoiceSelectionParams(
    language_code=config["voice"]["languageCode"],
    name=config["voice"]["name"]
)
audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding[config["audioConfig"]["audioEncoding"]],
    pitch=config["audioConfig"]["pitch"],
    speaking_rate=config["audioConfig"]["speakingRate"]
)



# Save the audio content to a file
# with open("../dataset/audios/audio_0.mp3", "wb") as out:
#     out.write(response.audio_content)
#     print("Audio content written to file '../dataset/audios/audio_0.mp3'")

# response

In [None]:
# Import additional libraries needed for processing
import pyarrow.parquet as pq
import pyarrow as pa
from tqdm import tqdm
import time

def generate_audio_for_text(text):
    try:
        synthesis_input = texttospeech.SynthesisInput(text=text)
        response = client.synthesize_speech(
            input=synthesis_input, 
            voice=voice, 
            audio_config=audio_config
        )
        return response.audio_content
    except Exception as e:
        print(f"Error generating audio for text: {text[:50]}... Error: {str(e)}")
        return None

print("Audio generation function created successfully!")

In [None]:
# Process sentences and generate audio
# Let's start with a smaller subset for testing (first 10 sentences)
# You can change this to len(df_sentences) to process all sentences

# For testing, let's use just the first 10 sentences
test_size = 10
# df_sample = df_sentences.head(test_size).copy()
df_sample = df_sentences.copy()

print(f"Processing {len(df_sample)} sentences...")

# Initialize list to store audio data
audio_data = []

# Process each sentence
for idx, row in tqdm(df_sample.iterrows(), total=len(df_sample), desc="Generating audio"):
    text = row['text']
    
    # Generate audio for this text
    audio_bytes = generate_audio_for_text(text)
    
    if audio_bytes:
        audio_data.append(audio_bytes)
    else:
        # In case of error, append None
        audio_data.append(None)
    
    # Small delay to avoid hitting API rate limits
    time.sleep(0.1)

# Add audio column to dataframe
df_sample['audio'] = audio_data

print(f"Successfully processed {len(df_sample)} sentences!")
print(f"Audio column added with {sum(1 for x in audio_data if x is not None)} successful audio generations")