# Prepare Development Environment

This will create the virtual environment

In [None]:
!conda create -c pytorch -c nvidia -n gen_voice pytorch torchvision torchaudio pytorch-cuda=11.8 -y
!conda activate gen_voice
!conda install conda-forge::ffmpeg
!pip install -r requirements.txt
!playwright install

If ffmpeg doesn't work correctly on Linux the following command may help

In [None]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg

# Import Libraries

In [2]:
from financial_chatbot import ChatBot
import speech_recognition as sr
import gradio as gr

%load_ext gradio

  from .autonotebook import tqdm as notebook_tqdm


# Pull Data for LLM Context

In [3]:
!python extract_web_data.py

# Check for Microphones

In [4]:
# List the available microphones
sr.Microphone.list_microphone_names()

['Microsoft Sound Mapper - Input',
 'Microphone (HD Pro Webcam C920)',
 'Microphone (2- G935 Gaming Head',
 'Microsoft Sound Mapper - Output',
 'Speakers (Realtek(R) Audio)',
 'Speakers (2- G935 Gaming Headse',
 'ASUS VE278 (NVIDIA High Definit',
 'ASUS VE278 (NVIDIA High Definit',
 'VS278 (NVIDIA High Definition A',
 'Realtek Digital Output (Realtek',
 'Primary Sound Capture Driver',
 'Microphone (HD Pro Webcam C920)',
 'Microphone (2- G935 Gaming Headset)',
 'Primary Sound Driver',
 'Speakers (Realtek(R) Audio)',
 'Speakers (2- G935 Gaming Headset)',
 'ASUS VE278 (NVIDIA High Definition Audio)',
 'ASUS VE278 (NVIDIA High Definition Audio)',
 'VS278 (NVIDIA High Definition Audio)',
 'Realtek Digital Output (Realtek(R) Audio)',
 'Speakers (2- G935 Gaming Headset)',
 'Speakers (Realtek(R) Audio)',
 'ASUS VE278 (NVIDIA High Definition Audio)',
 'ASUS VE278 (NVIDIA High Definition Audio)',
 'VS278 (NVIDIA High Definition Audio)',
 'Realtek Digital Output (Realtek(R) Audio)',
 'Microphone 

## Full Audio
If you're system has speakers and a microphone you can run in full audio using the communication loop below

In [None]:
# Initialize the chatbot specifying the index of the mic if one exists and whether or not speakers will be used.
# For best results a headset with a mic is recommended.
chatbot = ChatBot(mic_id=1, enable_speakers=True)

history = []
while True:
    guess = chatbot.recognize_speech_from_mic()
    if guess["success"]:
        prompt = guess['transcription']
        print(f'Prompt: {prompt} \n')

        if 'goodbye' in prompt:
            chatbot.communicate('See you next time')
            break
        
        response = chatbot.respond(prompt, history)
        history.append([guess['transcription'], response])
        print(f'{response}\n\n')
    else:
        chatbot.communicate(f"Error: {guess["error"]}")

## Speakers Optional (no mic)
The following gradio UI will allow you to communicate with the LLM.

In [None]:
%%blocks

# Create a gradio chat interface wire it to our chatbot
chatbot = ChatBot(enable_speakers=True, threaded=True)
with gr.Blocks() as demo:
    gr.ChatInterface(chatbot.respond)

In [10]:
from transformers import pipeline
import torch
import numpy as np

device = "cuda:0" if torch.cuda.is_available() else "cpu"
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=device)
chatbot = ChatBot(enable_speakers=True, threaded=True)


def transcribe(audio):
    sr, y = audio
    
    # Convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)
        
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    text = transcriber({"sampling_rate": sr, "raw": y})["text"]  
    chatbot.respond(text)

    return text



In [12]:
%%blocks



with gr.Blocks() as demo:
    gr.Interface(
        transcribe,
        gr.Audio(sources="microphone"),
        "text",
    )

