In [32]:
import requests
import pandas as pd

In [33]:

def download_tsv_from_google_sheet(sheet_url):
    # Modify the Google Sheet URL to export it as TSV
    tsv_url = sheet_url.replace('/edit#gid=', '/export?format=tsv&gid=')
    
    # Send a GET request to download the TSV file
    response = requests.get(tsv_url)
    response.encoding = 'utf-8'
    # Check if the request was successful
    if response.status_code == 200:
        # Read the TSV content into a pandas DataFrame
        from io import StringIO
        tsv_content = StringIO(response.text)
        df = pd.read_csv(tsv_content, sep='\t', encoding='utf-8')
        return df
    else:
        print("Failed to download the TSV file.")
        return None
    

In [34]:
terms_list_sheet="https://docs.google.com/spreadsheets/d/1Dk-uXWbrXIg59xGd8MnHRljUjrHPW4xKfmunrqMuBRw/edit#gid=2138349120"

df_prompts = download_tsv_from_google_sheet(terms_list_sheet)


In [35]:
df_prompts

Unnamed: 0,prompt_set_id,index,prompt_id,prompt
0,psych_child,1,psych_child-1,Lęk
1,psych_child,2,psych_child-2,Depresja
2,psych_child,3,psych_child-3,Zaburzenia lękowe
3,psych_child,4,psych_child-4,ADHD (zaburzenie deficytu uwagi i hiperaktywno...
4,psych_child,5,psych_child-5,Autyzm
...,...,...,...,...
95,psych_child,96,psych_child-96,Rozwój zawodowy
96,psych_child,97,psych_child-97,Zasoby wspierające
97,psych_child,98,psych_child-98,Referencje
98,psych_child,99,psych_child-99,Skierowanie


In [36]:
import azure.cognitiveservices.speech as speechsdk
import os



   
def generate_synth_speech(voice_name, prompt, output_file="output.wav"):
     # This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
    speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
    speech_config.speech_synthesis_voice_name=voice_name

    # supported voices: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#prebuilt-neural-voices
    audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    speech_synthesis_result = speech_synthesizer.speak_text_async(prompt).get()

    if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized for text [{}]".format(prompt))
    elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = speech_synthesis_result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            if cancellation_details.error_details:
                print("Error details: {}".format(cancellation_details.error_details))
                print("Did you set the speech resource key and region values?")

    print("Speech synthesis status: ", speech_synthesis_result.reason)
    print("Speech synthesized for text [{}]".format(prompt))
    print(f'Audio content written to file "{output_file}"')

    print(speech_synthesis_result)
    del speech_synthesizer


In [38]:
out_dir_root = "synth_data"
azure_voices_pl = {"0001":"pl-PL-AgnieszkaNeural", "0002":"pl-PL-MarekNeural", "0003":"pl-PL-ZofiaNeural"}

for voice_id, voice_name in azure_voices_pl.items():
    out_dir = os.path.join(out_dir_root, voice_id)
    os.makedirs(out_dir, exist_ok=True)
    print(f"Generating speech for voice: {voice_name}")
    # iterate over multiple columns per row
    for index, row in df_prompts.iterrows():
        id = row['index']    
        prompt_id = row['prompt_id']
        
        prompt_set = row['prompt_set_id'] # in case multiple prompts sets are in single input file
        prompt = row['prompt']
        
        # pad audio_file_id to 5 digits
        audio_file_id = str(id).zfill(5)

        output_fn = f"{prompt_set}-{voice_id}-{audio_file_id}.wav"
        out_fp = os.path.join(out_dir, output_fn)
        print("Generating speech for prompt: ", prompt)
        print("Saving results to: ", out_fp)
        generate_synth_speech(voice_name, prompt, out_fp)


Generating speech for prompt:  Lęk
Saving results to:  synth_data/0001/0     psych_child
1     psych_child
2     psych_child
3     psych_child
4     psych_child
         ...     
95    psych_child
96    psych_child
97    psych_child
98    psych_child
99    psych_child
Name: prompt_set_id, Length: 100, dtype: object-0001-0       1
1       2
2       3
3       4
4       5
     ... 
95     96
96     97
97     98
98     99
99    100
Name: index, Length: 100, dtype: int64.wav


RuntimeError: Exception with error code: 
[CALL STACK BEGIN]

/home/michal/.pyenv/versions/3.10.11/envs/bigos-hf/lib/python3.10/site-packages/azure/cognitiveservices/speech/libMicrosoft.CognitiveServices.Speech.core.so(+0x1994a0) [0x7f2f94d994a0]
/home/michal/.pyenv/versions/3.10.11/envs/bigos-hf/lib/python3.10/site-packages/azure/cognitiveservices/speech/libMicrosoft.CognitiveServices.Speech.core.so(+0x1bc58d) [0x7f2f94dbc58d]
/home/michal/.pyenv/versions/3.10.11/envs/bigos-hf/lib/python3.10/site-packages/azure/cognitiveservices/speech/libMicrosoft.CognitiveServices.Speech.core.so(+0x1fd4be) [0x7f2f94dfd4be]
/home/michal/.pyenv/versions/3.10.11/envs/bigos-hf/lib/python3.10/site-packages/azure/cognitiveservices/speech/libMicrosoft.CognitiveServices.Speech.core.so(synthesizer_create_speech_synthesizer_from_config+0xf3) [0x7f2f94ca1bd8]
/lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7f2fe2d36e2e]
/lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7f2fe2d33493]
/home/michal/.pyenv/versions/3.10.11/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12ea5) [0x7f2fe2fddea5]
/home/michal/.pyenv/versions/3.10.11/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xc940) [0x7f2fe2fd7940]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(_PyObject_Call+0x5c) [0x7f2fe3631c3c]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(_PyEval_EvalFrameDefault+0x289b) [0x7f2fe35d87eb]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(+0x1b9b94) [0x7f2fe3724b94]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(PyVectorcall_Call+0xc8) [0x7f2fe3631b18]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(_PyEval_EvalFrameDefault+0x289b) [0x7f2fe35d87eb]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(+0x1b9b94) [0x7f2fe3724b94]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(_PyObject_FastCallDictTstate+0x10e) [0x7f2fe36320ae]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(_PyObject_Call_Prepend+0x100) [0x7f2fe3632340]
/home/michal/.pyenv/versions/3.10.11/lib/libpython3.10.so.1.0(+0x13506e) [0x7f2fe36a006e]
[CALL STACK END]

Exception with an error code: 0x8 (SPXERR_FILE_OPEN_FAILED)

In [None]:
!ls

'pl-PL-AgnieszkaNeural (Female)}_Lęk.wav'   pl-PL-ZofiaNeural_Lęk.wav
 pl-PL-AgnieszkaNeural_Lęk.wav		    tts-playground.ipynb
 pl-PL-MarekNeural_Lęk.wav
