# Testing Jhana AI Pipeline

# Test Pipeline

In [None]:
!pip install numpy==1.26.4
!pip install ollama==0.1.7
!pip install openai_whisper==20231117
!pip install scipy==1.12.0
!pip install sounddevice==0.4.6
!pip install torch==2.2.0
!pip install torchaudio==2.2.0
!pip install TTS==0.22.0


In [None]:
import os
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
from IPython.display import Audio
import ollama
import torch
from TTS.api import TTS

# Function to record audio
def record_audio(duration=5, fs=44100):
    print("Recording for {} seconds...".format(duration))
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype='float64')
    sd.wait()  # Wait until recording is finished
    recording = np.int16(recording / np.max(np.abs(recording)) * 32767)  # Convert to int16
    return recording, fs

# Record audio
output_directory = "../data/input/audio/speech_to_transcribe"
os.makedirs(output_directory, exist_ok=True)
audio, fs = record_audio(duration=5)
audio_file_path = os.path.join(output_directory, "my_voice_recording.wav")
write(audio_file_path, fs, audio)
print(f"Recording saved to {audio_file_path}")

# Convert speech to text
model = whisper.load_model("small")
result = model.transcribe(audio_file_path, language="en")
transcribed_text = result["text"]
print("Transcribed text:", transcribed_text)

# Chat with Ollama
ollama_response = ollama.chat(model='mixtral:8x7b-instruct-v0.1-q4_0', messages=[{'role': 'user', 'content': transcribed_text}])
ollama_text = ollama_response['message']['content']
print("Ollama response:", ollama_text)

# Save Ollama's response as text
output_text_directory = "../data/output/text/"
os.makedirs(output_text_directory, exist_ok=True)
text_file_path = os.path.join(output_text_directory, "ollama_response.txt")
with open(text_file_path, "w") as text_file:
    text_file.write(ollama_text)
print(f"Ollama's response saved to {text_file_path}")

# Convert Ollama's response to speech
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(device)  # Adjust model as needed
output_audio_directory = "../data/output/audio/"
os.makedirs(output_audio_directory, exist_ok=True)
output_file_path = os.path.join(output_audio_directory, "ollama_response.wav")
tts.tts_to_file(text=ollama_text, file_path=output_file_path)
print(f"Text-to-speech audio saved to {output_file_path}")

# Play the generated speech
if os.path.exists(output_file_path):
    display(Audio(output_file_path))
else:
    print("Audio file not found.")


# Test Pipeline with Voice Cloning

In [None]:
import os
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
from IPython.display import Audio
import ollama
import torch
from TTS.api import TTS

torch.cuda.empty_cache()

# Function to record audio
def record_audio(duration=5, fs=44100):
    print("Recording for {} seconds...".format(duration))
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype='float64')
    sd.wait()  # Wait until recording is finished
    recording = np.int16(recording / np.max(np.abs(recording)) * 32767)  # Convert to int16
    return recording, fs

# Record audio
output_directory = "../data/input/audio/speech_to_transcribe"
os.makedirs(output_directory, exist_ok=True)
audio, fs = record_audio(duration=5)
audio_file_path = os.path.join(output_directory, "my_voice_recording.wav")
write(audio_file_path, fs, audio)
print(f"Recording saved to {audio_file_path}")

# Convert speech to text
model = whisper.load_model("small")
result = model.transcribe(audio_file_path, language="en")
transcribed_text = result["text"]
print("Transcribed text:", transcribed_text)

# Chat with Ollama
ollama_response = ollama.chat(model='mixtral:8x7b-instruct-v0.1-q4_0', messages=[{'role': 'user', 'content': transcribed_text}])
ollama_text = ollama_response['message']['content']
print("Ollama response:", ollama_text)

# Save Ollama's response as text
output_text_directory = "../data/output/text/"
os.makedirs(output_text_directory, exist_ok=True)
text_file_path = os.path.join(output_text_directory, "ollama_response.txt")
with open(text_file_path, "w") as text_file:
    text_file.write(ollama_text)
print(f"Ollama's response saved to {text_file_path}")

# Convert Ollama's response to speech
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)  # Adjust model as needed
output_audio_directory = "../data/output/audio/"
os.makedirs(output_audio_directory, exist_ok=True)
output_file_path = os.path.join(output_audio_directory, "ollama_response.wav")
tts.tts_to_file(text=ollama_text, file_path=output_file_path, language="en", speaker_wav="../data/input/audio/voices_to_clone/audio_cf_10_seconds.wav")
print(f"Text-to-speech audio saved to {output_file_path}")

# Play the generated speech
if os.path.exists(output_file_path):
    display(Audio(output_file_path))
else:
    print("Audio file not found.")


# Pipeline with Mixtral

mixtral:8x7b-instruct-v0.1-q4_0

In [2]:
import os
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
from IPython.display import Audio
import ollama
import torch
from TTS.api import TTS

torch.cuda.empty_cache()

# Function to record audio
def record_audio(duration=5, fs=44100):
    print("Recording for {} seconds...".format(duration))
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype='float64')
    sd.wait()  # Wait until recording is finished
    recording = np.int16(recording / np.max(np.abs(recording)) * 32767)  # Convert to int16
    return recording, fs

# Record audio
output_directory = "../data/input/audio/speech_to_transcribe"
os.makedirs(output_directory, exist_ok=True)
audio, fs = record_audio(duration=5)
audio_file_path = os.path.join(output_directory, "my_voice_recording.wav")
write(audio_file_path, fs, audio)
print(f"Recording saved to {audio_file_path}")

torch.cuda.empty_cache()

# Convert speech to text
model = whisper.load_model("small")
result = model.transcribe(audio_file_path, language="en")
transcribed_text = result["text"]
print("Transcribed text:", transcribed_text)

torch.cuda.empty_cache()

# Chat with Ollama
ollama_response = ollama.chat(model='mixtral:8x7b-instruct-v0.1-q4_0', messages=[{'role': 'user', 'content': transcribed_text}])
ollama_text = ollama_response['message']['content']
print("Ollama response:", ollama_text)

torch.cuda.empty_cache()

# Save Ollama's response as text
output_text_directory = "../data/output/text/"
os.makedirs(output_text_directory, exist_ok=True)
text_file_path = os.path.join(output_text_directory, "ollama_response.txt")
with open(text_file_path, "w") as text_file:
    text_file.write(ollama_text)
print(f"Ollama's response saved to {text_file_path}")

# Convert Ollama's response to speech
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)  # Adjust model as needed
output_audio_directory = "../data/output/audio/"
os.makedirs(output_audio_directory, exist_ok=True)
output_file_path = os.path.join(output_audio_directory, "ollama_response.wav")
tts.tts_to_file(text=ollama_text, file_path=output_file_path, language="en", speaker_wav="../data/input/audio/voices_to_clone/audio_cf_10_seconds.wav")
print(f"Text-to-speech audio saved to {output_file_path}")

# Play the generated speech
if os.path.exists(output_file_path):
    display(Audio(output_file_path))
else:
    print("Audio file not found.")


Recording for 5 seconds...
Recording saved to ../data/input/audio/speech_to_transcribe/my_voice_recording.wav
Transcribed text:  Guide me in a breath awareness.
Ollama response:  Sure, I'd be happy to guide you through a breath awareness exercise. Here are the steps:

1. Find a comfortable and quiet place to sit or lie down.
2. Close your eyes or soften your gaze.
3. Begin by bringing your attention to your natural breathing pattern. Notice the sensation of air entering and leaving your nostrils or mouth.
4. Don't try to control your breath, just observe it as it is. Allow your breath to flow in and out at its own pace.
5. As you focus on your breath, you may notice that your mind starts to wander. This is normal. When you realize that your mind has wandered, gently bring your attention back to your breath.
6. Try to stay present with each inhalation and exhalation. Notice the rise and fall of your chest or abdomen as you breathe.
7. Continue to focus on your breath for several minutes

  torch.utils._pytree._register_pytree_node(
2024-03-25 17:30:03.721982: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-25 17:30:03.722007: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-25 17:30:03.722812: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-25 17:30:03.727602: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  torch.

 > Using model: xtts


OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 7.79 GiB of which 15.44 MiB is free. Process 4466 has 5.91 GiB memory in use. Including non-PyTorch memory, this process has 1.81 GiB memory in use. Of the allocated memory 1.61 GiB is allocated by PyTorch, and 70.00 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [1]:
import os

# import numpy as np
# import sounddevice as sd
# from scipy.io.wavfile import write
# import whisper
# from IPython.display import Audio
# import ollama
# import torch
# from TTS.api import TTS

# Function to record and process audio
def record_and_process_audio(duration=5, fs=44100):
    import sounddevice as sd
    import numpy as np

    print(f"Recording for {duration} seconds...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2, dtype='float64')
    sd.wait()  # Wait until recording is finished
    recording = np.int16(recording / np.max(np.abs(recording)) * 32767)  # Convert to int16
    del sd, np  # Free up memory by deleting imported modules
    return recording, fs

# Record and process audio
output_directory = "../data/input/audio/speech_to_transcribe"
os.makedirs(output_directory, exist_ok=True)
audio, fs = record_and_process_audio(duration=5)
audio_file_path = os.path.join(output_directory, "my_voice_recording.wav")

# Write audio file
from scipy.io.wavfile import write
write(audio_file_path, fs, audio)
print(f"Recording saved to {audio_file_path}")

# Convert speech to text
def convert_speech_to_text(audio_path):
    import whisper

    model = whisper.load_model("small")
    result = model.transcribe(audio_path, language="en")
    del whisper  # Free up memory
    return result["text"]

transcribed_text = convert_speech_to_text(audio_file_path)
print("Transcribed text:", transcribed_text)

# Chat with Ollama
def get_ollama_response(text):
    import ollama

    response = ollama.chat(model='mixtral:8x7b-instruct-v0.1-q4_0', messages=[{'role': 'user', 'content': text}])
    del ollama  # Free up memory
    return response['message']['content']

ollama_text = get_ollama_response(transcribed_text)
print("Ollama response:", ollama_text)

# Save Ollama's response as text
output_text_directory = "../data/output/text/"
os.makedirs(output_text_directory, exist_ok=True)
text_file_path = os.path.join(output_text_directory, "ollama_response.txt")
with open(text_file_path, "w") as text_file:
    text_file.write(ollama_text)
print(f"Ollama's response saved to {text_file_path}")

# Convert Ollama's response to speech
def convert_text_to_speech(text):
    import torch
    from TTS.api import TTS

    device = "cuda" if torch.cuda.is_available() else "cpu"
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)  # Adjust model as needed
    output_audio_directory = "../data/output/audio/"
    os.makedirs(output_audio_directory, exist_ok=True)
    output_file_path = os.path.join(output_audio_directory, "ollama_response.wav")
    tts.tts_to_file(text=text, file_path=output_file_path, language="en", speaker_wav="../data/input/audio/voices_to_clone/audio_cf_10_seconds.wav")
    del torch, TTS  # Free up memory
    return output_file_path

output_file_path = convert_text_to_speech(ollama_text)
print(f"Text-to-speech audio saved to {output_file_path}")

# Play the generated speech
from IPython.display import Audio
if os.path.exists(output_file_path):
    display(Audio(output_file_path))
else:
    print("Audio file not found.")


Recording for 5 seconds...
Recording saved to ../data/input/audio/speech_to_transcribe/my_voice_recording.wav
Transcribed text:  Guide me in a breath awareness.
Ollama response:  Sure, I'd be happy to guide you through a breath awareness exercise. Here are the steps:

1. Find a comfortable and quiet place to sit or lie down.
2. Close your eyes or maintain a soft gaze.
3. Bring your attention to your natural breathing pattern. Don't try to change it, just observe it.
4. Notice the sensation of your breath as it enters and leaves your nostrils. You might feel a coolness on the inhale and warmth on the exhale.
5. Focus your attention on the physical sensations of your breath, rather than any thoughts or emotions that may arise.
6. If your mind wanders, gently bring your attention back to your breath without judgment.
7. Continue to observe your breath for several minutes, allowing yourself to become fully present in the moment.
8. When you're ready, slowly open your eyes and take a moment

  torch.utils._pytree._register_pytree_node(
2024-03-25 17:44:22.489781: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-25 17:44:22.489808: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-25 17:44:22.490637: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-25 17:44:22.495829: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  torch.

 > Using model: xtts
 > Text splitted to sentences.
["Sure, I'd be happy to guide you through a breath awareness exercise.", 'Here are the steps:', '1. Find a comfortable and quiet place to sit or lie down.', '2. Close your eyes or maintain a soft gaze.', '3. Bring your attention to your natural breathing pattern.', "Don't try to change it, just observe it.", '4. Notice the sensation of your breath as it enters and leaves your nostrils.', 'You might feel a coolness on the inhale and warmth on the exhale.', '5. Focus your attention on the physical sensations of your breath, rather than any thoughts or emotions that may arise.', '6. If your mind wanders, gently bring your attention back to your breath without judgment.', '7. Continue to observe your breath for several minutes, allowing yourself to become fully present in the moment.', "8. When you're ready, slowly open your eyes and take a moment to reflect on your experience.", 'Remember that the goal of this exercise is not to achieve 