# Pipeline Faster

In [None]:
!pip install numpy==1.26.4
!pip install ollama==0.1.7
!pip install openai_whisper==20231117
!pip install scipy==1.12.0
!pip install sounddevice==0.4.6
!pip install torch==2.2.0
!pip install torchaudio==2.2.0
!pip install TTS==0.22.0


In [5]:
import asyncio
import os
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
import torch
from TTS.api import TTS
import ollama

# Pre-load models (ensure this is done in a way that they are kept in memory)
whisper_model = whisper.load_model("small", device="cuda")
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)  # Adjust model as needed

# Function to record audio asynchronously
async def record_audio_async(duration=5, fs=44100):
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype='float64')
    await asyncio.sleep(duration)  # Async sleep for non-blocking wait
    recording = np.int16(recording / np.max(np.abs(recording)) * 32767)
    return recording, fs

# Function to transcribe audio to text asynchronously
async def transcribe_audio_async(audio_file_path):
    result = whisper_model.transcribe(audio_file_path, language="en")
    return result["text"]

# Function to chat with Ollama asynchronously
async def chat_with_ollama_async(transcribed_text):
    # This function should be adapted to how you interact with your Ollama model.
    # Below is a simplified example.
    ollama_response = ollama.chat(model='mixtral:8x7b-instruct-v0.1-q4_0', messages=[{'role': 'user', 'content': transcribed_text}])
    ollama_text = ollama_response['message']['content']
    return ollama_text

# Main async function to handle the app's logic
async def main():
    output_directory = "../data/input/audio/speech_to_transcribe"
    os.makedirs(output_directory, exist_ok=True)
    
    # Record audio
    audio, fs = await record_audio_async(duration=5)
    audio_file_path = os.path.join(output_directory, "my_voice_recording.wav")
    write(audio_file_path, fs, audio)
    print(f"Recording saved to {audio_file_path}")

    # Transcribe audio to text
    transcribed_text = await transcribe_audio_async(audio_file_path)
    print("Transcribed text:", transcribed_text)

    # Dialogue with Ollama
    ollama_response = await chat_with_ollama_async(transcribed_text)
    print("Ollama response:", ollama_response)

    # Further processing like TTS can be added here, following similar async patterns

import asyncio

# your existing code here

if __name__ == "__main__":
    loop = asyncio.get_event_loop()  # Get the current running event loop
    if loop.is_running():  # Check if the loop is already running
        # If the loop is running, use create_task to schedule the coroutine
        task = loop.create_task(main())
    else:
        # If the loop is not running, use run_until_complete to run the coroutine
        loop.run_until_complete(main())



 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.
 > Using model: xtts


Recording saved to ../data/input/audio/speech_to_transcribe/my_voice_recording.wav
Transcribed text:  Thank you for watching!
