In [18]:
import os
import requests
import sounddevice as sd
from piper.voice import PiperVoice
import re
import queue
import time
import threading

class Text_to_Speech:
    def __init__(self):
        self.PIPER_MODEL = "./piper_model/hi_IN-priyamvada-medium.onnx"
        self.PIPER_CONFIG = "./piper_model/hi_IN-priyamvada-medium.json"
        self.SAMPLE_RATE = 22050
        self.OVERLAP = 0.15  # seconds of audio overlap for smooth playback
        
        # Check if model files exist, if not download them
        if not os.path.exists(self.PIPER_MODEL) or not os.path.exists(self.PIPER_CONFIG):
            print("‚ö†Ô∏è  Model files not found. Downloading...")
            self.download_model()
        
        self.model = PiperVoice.load(model_path=self.PIPER_MODEL, config_path=self.PIPER_CONFIG)

    def download_model(self):
    
        os.makedirs("./piper_model", exist_ok=True)

        # model_url = "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx?download=true"
        # model_path = "./piper_model/en_US-hfc_female-medium.onnx"

        model_url = "https://huggingface.co/rhasspy/piper-voices/resolve/main/hi/hi_IN/priyamvada/medium/hi_IN-priyamvada-medium.onnx?download=true"
#https://huggingface.co/rhasspy/piper-voices/resolve/main/hi/hi_IN/pratham/medium/hi_IN-pratham-medium.onnx?download=true
        model_path = "./piper_model/hi_IN-priyamvada-medium.onnx"

        print("üîΩ Downloading piper ONNX Quantized (60MB) model...")
        with requests.get(model_url, stream=True) as r:
            r.raise_for_status()
            with open(model_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"‚úÖ Model saved to {model_path} ({os.path.getsize(model_path)//1_000_000} MB)\n")

        voice_url = "https://huggingface.co/rhasspy/piper-voices/resolve/main/hi/hi_IN/priyamvada/medium/hi_IN-priyamvada-medium.onnx.json?download=true"
#https://huggingface.co/rhasspy/piper-voices/resolve/main/hi/hi_IN/pratham/medium/hi_IN-pratham-medium.onnx.json?download=true
        voice_path = "./piper_model/hi_IN-priyamvada-medium.json"

        print("üîΩ Downloading voice: piper config ...")
        with requests.get(voice_url, stream=True) as r:
            r.raise_for_status()
            with open(voice_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"‚úÖ Voice saved to {voice_path} ({os.path.getsize(voice_path)//1_000_000} MB)")
        return "Done"

    def synthesizer_worker(self,q_text: queue.Queue, q_audio: queue.Queue, logs: list):
        """Continuously pulls text sentences, synthesizes them, and queues audio."""
        while True:
            sentence = q_text.get()
            if sentence is None:
                print("[Synthesizer] Got None, stopping...")
                q_audio.put(None)
                break

            print(f"[Synthesizer] Processing: '{sentence}'")
            synth_start = time.time()
            audio_data = []
            for chunk in self.model.synthesize(sentence):
                audio_data.extend(chunk.audio_float_array)
            synth_end = time.time()

            synth_time = synth_end - synth_start
            print(f"[Synthesizer] Generated {len(audio_data)} samples in {synth_time:.3f}s")
            
            logs.append({
                "type": "synthesis",
                "text": sentence,
                "duration_sec": synth_time,
                "samples": len(audio_data),
            })

            q_audio.put(audio_data)

    def player_worker(self,q_audio: queue.Queue, logs: list):
        """Continuously pulls audio chunks and plays them with soft overlap."""
        print("üéß Player thread started...")

        while True:
            audio_chunk = q_audio.get()
            if audio_chunk is None:
                print("üõë Player thread stopping.")
                break
            play_start = time.time()
            sd.play(audio_chunk, samplerate=self.SAMPLE_RATE)
            
            # Wait for audio to complete playback
            sd.wait()
            play_end = time.time()

            logs.append({
                "type": "playback",
                "duration_sec": play_end - play_start,
                "samples": len(audio_chunk),
            })

    def text_to_speech_stream(self,text: str):
        """
        Takes a block of text, splits it into sentences,
        and streams synthesis + playback with overlap.
        """
        print("üß© Starting Text ‚Üí Speech pipeline...\n")

        q_text = queue.Queue(maxsize=5)
        q_audio = queue.Queue(maxsize=5)
        logs = []

        synth_thread = threading.Thread(target=self.synthesizer_worker, args=(q_text, q_audio, logs))
        play_thread = threading.Thread(target=self.player_worker, args=(q_audio, logs))

        synth_thread.start()
        play_thread.start()

        start_time = time.time()

        sentences = re.split(r'(?<=[.!?]) +', text.strip())

        for sentence in sentences:
            if sentence.strip():
                q_text.put(sentence.strip())

        q_text.put(None)

        synth_thread.join()
        play_thread.join()

        end_time = time.time()
        print("\n‚úÖ Pipeline complete.")
        print(f"‚è±Ô∏è Total runtime: {end_time - start_time:.2f}s\n")

    

In [19]:
text_class = Text_to_Speech()

‚ö†Ô∏è  Model files not found. Downloading...
üîΩ Downloading piper ONNX Quantized (60MB) model...
‚úÖ Model saved to ./piper_model/hi_IN-priyamvada-medium.onnx (63 MB)

üîΩ Downloading voice: piper config ...
‚úÖ Voice saved to ./piper_model/hi_IN-priyamvada-medium.json (0 MB)


In [24]:
text = """Hey Rutwik, can you tell us about yourself"""
print("‚úÖ Text-to-Speech initialized successfully!")
text_class.text_to_speech_stream(text)
print("Done")

‚úÖ Text-to-Speech initialized successfully!
üß© Starting Text ‚Üí Speech pipeline...

üéß Player thread started...
[Synthesizer] Processing: 'Hey Rutwik, can you tell us about yourself'
[Synthesizer] Generated 68352 samples in 0.173s
[Synthesizer] Got None, stopping...
üõë Player thread stopping.

‚úÖ Pipeline complete.
‚è±Ô∏è Total runtime: 3.44s

Done
