In [2]:
import onnxruntime as ort
from kokoro_onnx import Kokoro
from onnxruntime.capi._pybind_state import set_default_logger_severity

# Only show ERROR or FATAL (3 or 4)
set_default_logger_severity(3)

# ─── Session Options for Performance ─────────────────────────────
sess_opts = ort.SessionOptions()
sess_opts.execution_mode = ort.ExecutionMode.ORT_PARALLEL
sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_opts.add_session_config_entry("arena_extend_strategy", "kNextPowerOfTwo")

# ─── CUDA Provider with Optimized Settings ───────────────────────
cuda_provider = (
    "CUDAExecutionProvider",
    {
        "device_id": 1,
        "arena_extend_strategy": "kNextPowerOfTwo",
        "cudnn_conv_algo_search": "DEFAULT",
        "do_copy_in_default_stream": True,
    },
)

# ─── Initialize ONNX Runtime Session ─────────────────────────────
sess = ort.InferenceSession(
    "kokoro-v1.0.onnx",
    sess_options=sess_opts,
    providers=[cuda_provider]
)

# ─── Initialize Kokoro with This Session ─────────────────────────
kokoro = Kokoro.from_session(
    session=sess,
    voices_path="voices-v1.0.bin"
)

print("✅ Kokoro is now optimized on GPU device 1 with enhanced memory handling.")


✅ Kokoro is now optimized on GPU device 1 with enhanced memory handling.


In [None]:
import soundfile as sf

# Synthesize
audio, sr = kokoro.create(
    "Hey.. Yes, I came here five years ago, when I was just sixteen. At the time, I was.. I was still in the tenth grade, and I clearly remember doing my homework in the backseat of the car as we drove to our new home. Everything felt unfamiliar and uncertain, but I tried to stay focused on school. I didn’t know what to expect, and it took a while to get used to the language, the people, and the new routines. I had to figure things out quickly—how to fit in, how to keep up with assignments, how to not fall behind. There were moments I felt out of place, but I kept.. I kept going. Looking back, it was, was one of the hardest transitions I’ve experienced, but it also taught me how to adapt, how to be more independent, and.. and how to push through when things felt overwhelming.",
    voice="af_heart",   # pick any available voice
    speed=1.0,          # 1.0 = normal speed
    lang="en-us"        # or another supported language code
)

# Write to disk
sf.write("hello_world.wav", audio, sr)
print("Saved: hello_world.wav")

Saved: hello_world.wav


: 

In [12]:
import numpy as np
import soundfile as sf

text = "Hey.. Yes, I came here five years ago, when I was just sixteen. At the time, I was.. I was still in the tenth grade, and I clearly remember doing my homework in the backseat of the car as we drove to our new home. Everything felt unfamiliar and uncertain, but I tried to stay focused on school. I didn’t know what to expect, and it took a while to get used to the language, the people, and the new routines. I had to figure things out quickly—how to fit in, how to keep up with assignments, how to not fall behind. There were moments I felt out of place, but I kept.. I kept going. Looking back, it was, was one of the hardest transitions I’ve experienced, but it also taught me how to adapt, how to be more independent, and.. and how to push through when things felt overwhelming."

# --- build the blend style ---------------------------------
v1, w1 = "af_sarah", 0.60   # 60 %
v2, w2 = "am_adam",  0.40   # 40 %

style = kokoro.get_voice_style(v1) * w1 + kokoro.get_voice_style(v2) * w2
# -----------------------------------------------------------

audio, sr = kokoro.create(
    text,
    voice=style,      # <-- pass the vector, not the string
    speed=1.0,
    lang="en-us",
)

sf.write("hello_blended.wav", audio, sr)
print("✅  Saved: hello_blended.wav")


✅  Saved: hello_blended.wav


In [None]:
import asyncio
import soundfile as sf
import numpy as np

async def main():
    # Synthesis config
    voice = "af_heart"
    speed = 1.0
    lang = "en-us"
    text = (
        "Hey.. Yes, I came here five years ago, when I was just sixteen. At the time, I was.. "
        "I was still in the tenth grade, and I clearly remember doing my homework in the backseat "
        "of the car as we drove to our new home. Everything felt unfamiliar and uncertain, but I "
        "tried to stay focused on school. I didn’t know what to expect, and it took a while to get "
        "used to the language, the people, and the new routines. I had to figure things out quickly—"
        "how to fit in, how to keep up with assignments, how to not fall behind. There were moments "
        "I felt out of place, but I kept.. I kept going. Looking back, it was, was one of the hardest "
        "transitions I’ve experienced, but it also taught me how to adapt, how to be more independent, "
        "and.. and how to push through when things felt overwhelming."
    )

    # Start streaming audio chunks
    chunks = []
    async for audio_chunk, sr in kokoro.create_stream(text, voice=voice, speed=speed, lang=lang):
        chunks.append(audio_chunk)

    # Concatenate and save to file
    final_audio = np.concatenate(chunks)
    sf.write("hello_streamed.wav", final_audio, sr)
    print("✅ Saved: hello_streamed.wav")

# Run the async streaming synthesis
await main()

