# ElevenLabs Demo
Can find the original source code from ElevenLabs official documentation: https://elevenlabs.io/docs/api-reference/introduction

In [1]:
! pip install elevenlabs


Collecting elevenlabs
  Obtaining dependency information for elevenlabs from https://files.pythonhosted.org/packages/4f/f0/f711ffe2fcc4fb8405889ca81a478c54ddaa290ca39d74823c8042eacf3f/elevenlabs-1.56.0-py3-none-any.whl.metadata
  Downloading elevenlabs-1.56.0-py3-none-any.whl.metadata (7.3 kB)
Collecting httpx>=0.21.2 (from elevenlabs)
  Obtaining dependency information for httpx>=0.21.2 from https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl.metadata
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting pydantic>=1.9.2 (from elevenlabs)
  Obtaining dependency information for pydantic>=1.9.2 from https://files.pythonhosted.org/packages/bf/c2/0f3baea344d0b15e35cb3e04ad5b953fa05106b76efbf4c782a3f47f22f5/pydantic-2.11.2-py3-none-any.whl.metadata
  Downloading pydantic-2.11.2-py3-none-any.whl.metadata (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[

In [2]:
from elevenlabs.client import ElevenLabs
from elevenlabs import stream
from config import API_KEY

client = ElevenLabs(
  api_key=API_KEY,
)

In [None]:
audio_stream = client.text_to_speech.convert_as_stream(
    text="This is a test",
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    model_id="eleven_multilingual_v2"
)

# option 1: play the streamed audio locally
stream(audio_stream)

# option 2: process the audio bytes manually
for chunk in audio_stream:
    if isinstance(chunk, bytes):
        print(chunk)


# Text to speech Demo
* Returns the complete audio file in a single response
* Requires the entire generation to finish before returning anything
* Better for pre-generating audio content that will be used later
* Suitable for non-real-time applications

In [None]:
client.text_to_speech.convert(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    output_format="mp3_44100_128",
    text="The first move is what sets everything in motion.",
    model_id="eleven_multilingual_v2",
)

# Create Speech with Timing Demo
This is different than the text to speech demo because it gives back the timing information of words when spoken.

In [None]:
client.text_to_speech.convert_with_timestamps(
    voice_id="21m00Tcm4TlvDq8ikWAM",
    text="This is a test for the API of ElevenLabs.",
)

# Stream Speech Demo
Converts text into speech using a voice of your choice and returns audio as an audio stream.

* Returns audio chunks as they're generated in real-time
* Begins playback before the entire audio is complete
* Uses HTTP chunked transfer encoding or server-sent events (SSE)
* Ideal for conversational or interactive applications

In [None]:
client.text_to_speech.convert_as_stream(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    output_format="mp3_44100_128",
    text="The first move is what sets everything in motion.",
    model_id="eleven_multilingual_v2",
)

# Stream Speech with Timing Demo
Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.

In [None]:
response = client.text_to_speech.stream_with_timestamps(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    output_format="mp3_44100_128",
    text="The first move is what sets everything in motion.",
    model_id="eleven_multilingual_v2",
)


# Create Transcript Demo
Provide audio or video and have a text transcript created

In [None]:
client.speech_to_text.convert(
    model_id="model_id",
)

# Voice Changer Demo
Transform audio from one voice to another. Maintain full control over emotion, timing and delivery.

In [None]:
client.speech_to_speech.convert(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    output_format="mp3_44100_128",
    model_id="eleven_multilingual_sts_v2",
)

# Voice Changer Stream Demo
Stream audio from one voice to another. Maintain full control over emotion, timing and delivery.

In [None]:
client.speech_to_speech.convert_as_stream(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    output_format="mp3_44100_128",
    model_id="eleven_multilingual_sts_v2",
)