<a href="https://colab.research.google.com/github/flrtemis/happymothersday/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
# 1️⃣ Install kokoro
!pip install -q kokoro>=0.9.4 soundfile pydub
# 2️⃣ Install espeak, used for English OOD fallback and some non-English languages
!apt-get -qq -y install espeak-ng ffmpeg > /dev/null 2>&1  # ffmpeg needed for pydub

# 3️⃣ Initalize a pipeline
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
from pydub import AudioSegment  # <== Added for merging

# 🇺🇸 'a' => American English en
# 🇬🇧 'b' => British English en-GB
# 🇪🇸 'e' => Spanish es
# 🇫🇷 'f' => French fr-fr
# 🇮🇳 'h' => Hindi hi
# 🇮🇹 'i' => Italian it
# 🇯🇵 'j' => Japanese: pip install misaki[ja]
# 🇧🇷 'p' => Brazilian Portuguese pt-br
# 🇨🇳 'z' => Mandarin Chinese: pip install misaki[zh]
pipeline = KPipeline(lang_code='b')  # <= make sure lang_code matches voice, reference above.

# This text is for demonstration purposes only, unseen during training
text = '''
Okay. I think everything's setup right. I hope. Uh. Bell? Can you hear me?
Yep. I can hear you, Emma. (). (). George?
Yep, loud and clear! Lewis. You’re on standby if anything fucks up. Right?
Fuck off, George. Emma. Why's George always doubt my abilities?
Lewis, honey. Breathe. George. . Calm down, Lewis knows what he's doing.
'''.strip()

# 4️⃣ Generate, display, and save audio files in a loop.
voices = ['bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
lines = text.split('\n')  # split text by each line (not blank lines)
output_files = []

for i, line in enumerate(lines):
    voice = voices[i % len(voices)]  # rotate voices
    generator = pipeline(
        line, voice=voice,
        speed=1, split_pattern=r'$^'  # no internal splitting
    )
    gs, ps, audio = next(generator)
    print(i)      # i => index
    print(gs)     # gs => graphemes/text
    print(ps)     # ps => phonemes
    display(Audio(data=audio, rate=24000, autoplay=i==0))

    filename = f'{i}_{voice}.wav'
    sf.write(filename, audio, 24000)  # save each line/voice combo
    output_files.append(filename)

# 🔊 Merge all audio files into one
merged = AudioSegment.empty()
for filename in output_files:
    merged += AudioSegment.from_wav(filename)

merged.export("final_output.wav", format="wav")
print("✅ Merged audio saved as: final_output.wav")
display(Audio("final_output.wav", autoplay=True))