In [1]:
import soundfile as sf
import IPython
from transformers.tools import OpenAiAgent
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
def play_audio(audio, filename="speech_converted.wav"):
    # todo: take sample rate from audio tensor (second dimensi)
    # samplerate = audio.shape[1]
    sf.write(filename, audio.numpy(), samplerate=16000)
    return IPython.display.Audio(filename)

In [3]:
load_dotenv()

# OR, the same with increased verbosity
load_dotenv(verbose=True)

OPENAI_KEY = os.getenv("OPENAI_KEY")

In [4]:
agent_name = "OpenAI (API Key)" #@param ["StarCoder (HF Token)", "OpenAssistant (HF Token)", "OpenAI (API Key)"]

agent = OpenAiAgent(model="text-davinci-003", api_key=OPENAI_KEY)
print("OpenAI is initialized 💪")

OpenAI is initialized 💪


In [5]:
text = "84 vaščanov majhne švicarske vasice Brienz je imelo le 48 ur časa, da spakirajo svoje stvari in zapustijo svoje domove. Geologi so jih namreč opozorili, da je skalni podor z gore nad njimi neizbežen."
eng = agent.run("Translate text from Slovenian to English: \"" + text + "\"")

==Explanation from the agent==
I will use the following  tool: `translator` to translate the text from Slovenian to English.


==Code generated by the agent==
translated_text = translator(text="84 vaščanov majhne švicarske vasice Brienz je imelo le 48 ur časa, da spakirajo svoje stvari in zapustijo svoje domove. Geologi so jih namreč opozorili, da je skalni podor z gore nad njimi neizbežen.", src_lang="Slovenian", tgt_lang="English")
print(f"The translated text is {translated_text}.")


==Result==




The translated text is 84 residents of the small Swiss village of Brienz had only 48 hours to pack their bags and leave their homes, since geologists warned them that a rocky ridge overhead was inevitable..


In [7]:
eng

'84 residents of the small Swiss village of Brienz had only 48 hours to pack their bags and leave their homes, since geologists warned them that a rocky ridge overhead was inevitable.'

In [10]:
audio_tensor = agent.run("Read the following text out loud", text=eng)

==Explanation from the agent==
I will use the following  tool: `text_reader` to read the text out loud.


==Code generated by the agent==
audio_text = text_reader(text)


==Result==


Found cached dataset cmu-arctic-xvectors (/home/igor/.cache/huggingface/datasets/Matthijs___cmu-arctic-xvectors/default/0.0.1/a62fea1f9415e240301ea0042ffad2a3aadf4d1caa7f9a8d9512d631723e781f)


In [12]:
play_audio(audio_tensor, filename="speech_converted.wav")

In [23]:
txt = agent.run("convert speech to text", audio=audio_tensor)

It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.


==Explanation from the agent==
I will use the following  tool: `transcriber` to convert speech to text.


==Code generated by the agent==
text = transcriber(audio)


==Result==




In [24]:
txt

' Residents of the small Swiss village of Brands had only hours to pack their bags and leave their homes since geologists warned them that a rocky ridge overhead was inevitable.'

In [25]:
slo = agent.run("Translate text from English to Slovenian", text=txt)

==Explanation from the agent==
I will use the following  tool: `translator` to translate the text from English to Slovenian.


==Code generated by the agent==
translated_text = translator(text=text, src_lang="English", tgt_lang="Slovenian")


==Result==




In [26]:
slo

'Ljudje v majhnem švicarskem vasi Brands so imeli le nekaj ur časa, da se spakirajo in zapustijo svoje domove, saj so jih geologi opozorili, da je neizogibno, da bo nad njimi nastal kamnen greben.'

In [27]:
slo_audio_tensor = agent.run("Read the following text out loud", text=slo)

==Explanation from the agent==
I will use the following  tool: `text_reader` to read the text out loud.


==Code generated by the agent==
audio_text = text_reader(text)


==Result==


Found cached dataset cmu-arctic-xvectors (/home/igor/.cache/huggingface/datasets/Matthijs___cmu-arctic-xvectors/default/0.0.1/a62fea1f9415e240301ea0042ffad2a3aadf4d1caa7f9a8d9512d631723e781f)


In [29]:
play_audio(slo_audio_tensor, filename="slo_speech_converted.wav")