## Install Libraries

In [5]:
!pip install nltk transformers gtts
!pip install SpeechRecognition
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install pydub
!pip install pygame
!pip install openai

path = "/content/drive/MyDrive/CSI5180/Project"

Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.1
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.1-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.1
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libasound2-dev is already the newest version (1.2.6.1-1ubuntu1).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
Suggested packages:
  portaudio19-doc
The following NEW packages will be installed:
  libportaudio2 libportaudiocpp0 portaudio19-dev
0 upgraded, 3 newly installed, 0 to remove and 38 not upgraded.
Need to get 188 kB of archives.
After this operation, 927 kB of additional disk space will be used.
Get:1 http://archive.ub

## Record Audio

In [6]:
# all imports
from io import BytesIO
from base64 import b64decode
from google.colab import output
from IPython.display import Javascript

RECORD = """
const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
"""

def record(sec=3):
  print("Speak Now...")
  display(Javascript(RECORD))
  sec += 1
  s = output.eval_js('record(%d)' % (sec*1000))
  print("Done Recording !")
  b = b64decode(s.split(',')[1])
  return b #byte stream

# audio = record(3)
# import IPython.display as ipd

# ipd.display(ipd.Audio(audio))

## Speech to Text

In [34]:
from pydub import AudioSegment
import speech_recognition as sr
import tempfile

def save_to_file(byte_stream, file_format='mp4'):
    # Save the byte stream to a temporary file
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.' + file_format)
    temp_file.write(byte_stream)
    temp_file.close()
    return temp_file.name

def transcribe_audio(audio_bytes, file_format='mp4'):
    # Save the audio bytes to a file
    audio_file_path = save_to_file(audio_bytes, file_format=file_format)

    # Use pydub to load the audio file
    audio_segment = AudioSegment.from_file(audio_file_path)

    # Export the audio to a new WAV file
    wav_file_path = audio_file_path + '.wav'
    audio_segment.export(wav_file_path, format='wav')

    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Open the WAV file for transcription
    with sr.AudioFile(wav_file_path) as source:
        audio_data = recognizer.record(source)
        try:
            return recognizer.recognize_google(audio_data)
            # print("Transcription: " + text)
            # response = process_intent(text)
            # print("Response: "+response)
        except sr.UnknownValueError:
            print("Still listenning...")
        except sr.RequestError as e:
            print(f"Could not request results from Google Web Speech API")

# Use the record function to capture audio
audio_bytes = record(5)

# Transcribe the audio
transcribe_audio(audio_bytes)



Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
Still listenning...


## Text to Speech

In [8]:
from gtts import gTTS
from IPython.display import Audio, display

def text_to_speech(text):
    # Convert the text to speech
    tts = gTTS(text=text, lang='en')
    # Save the speech to a temporary file
    tts.save('temp_audio.mp3')

    # Play the audio file in the notebook
    display(Audio('temp_audio.mp3', autoplay=True))

## OpenAI API

In [32]:
import openai



def ask_gpt(user_request):

    openai.api_key = 'ADD API Key'

    # Replace 'your-api-key' with your actual OpenAI API key
    OPENAI_API_KEY='ADD API Key'


    model_choice = "gpt-3.5-turbo-16k"  #@param ['gpt-3.5-turbo-16k', 'gpt-3.5-turbo', 'gpt-4']
    insert_prompt = user_request  #@param {type: "string"}
    try:
        # Make an API call to OpenAI
        response = openai.ChatCompletion.create(
            model=model_choice,
            messages=[
                {"role": "system", "content": "You are an empathetic therapist skilled in conversational support. Your goal is to provide compassionate and understanding responses to your patient's concerns. Listen attentively, and respond with empathy. You may ask clarifying questions to better understand your patient's feelings or situation. If a topic arises that is beyond your scope (not related to emotions or therapy), kindly acknowledge it and suggest seeking additional support. Keep your responses concise and focused on fostering a supportive dialogue."},
                {"role": "user", "content": insert_prompt}
            ]
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"An error occurred: {e}"

# print(ask_gpt("Hello"))

# Run the Virtual Assistant

In [16]:
def run():
  try:
    # Use the record function to capture audio
    audio_bytes = record(4)

    # Transcribe the audio
    transcript = transcribe_audio(audio_bytes)
    # print("USER: "+transcript)

    # Complete the intent detection and generate answer
    response = ask_gpt(transcript)
    print("VA: "+response)

    # Convert text to speech
    text_to_speech(response)
  except:
    # Convert text to speech
    text_to_speech("Sorry I ran into an issue, try again later.")

run()

Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
VA: It sounds like there may have been some confusion or misunderstanding. Can you share more about the situation so I can better understand your feelings?


In [35]:
import speech_recognition as sr
from gtts import gTTS
import IPython.display as ipd
import openai
import tempfile

# # Initialize the recognizer
# r = sr.Recognizer()

# # OpenAI API setup
# openai.api_key = 'your-api-key'

# def listen():
#     # Use the Microphone as source for input.
#     with sr.Microphone() as source:
#         print("Listening...")
#         # Adjust for ambient noise and record
#         r.adjust_for_ambient_noise(source)
#         audio = r.listen(source, timeout=5, phrase_time_limit=5)
#     return audio

# def transcribe(audio):
#     try:
#         # Use Google Web Speech API to transcribe the audio
#         text = r.recognize_google(audio)
#         print(f"Transcribed Text: {text}")
#         return text
#     except sr.UnknownValueError:
#         return "Could not understand audio"
#     except sr.RequestError as e:
#         return f"Could not request results; {e}"

# def ask_gpt(question):
#     response = openai.ChatCompletion.create(
#         model="gpt-3.5-turbo",
#         messages=[
#             {"role": "user", "content": question}
#         ]
#     )
#     return response.choices[0].message.content.strip()

def speak(text):
    tts = gTTS(text=text, lang='en')
    fp = tempfile.NamedTemporaryFile(delete=False)
    tts.save(fp.name)
    ipd.display(ipd.Audio(fp.name, autoplay=True))

def main_loop():
    while True:
        audio = record()
        query = transcribe_audio(audio)
        if query:
            print("User: ",query)
            response = ask_gpt(query)
            print("GPT Response:", response)
            speak(response)

# Run the main loop
main_loop()


Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
User:  hello
GPT Response: Hello! How are you feeling today? Is there something specific you'd like to talk about or do you just need someone to listen?


Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
Still listenning...
Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
Still listenning...
Speak Now...


<IPython.core.display.Javascript object>

Done Recording !
Still listenning...
Speak Now...


<IPython.core.display.Javascript object>

KeyboardInterrupt: 