# Importing All Libraries

In [10]:
import speech_recognition as sr
import pyttsx3
import soundfile as sf
import openai
import os
import sys
import psycopg2
import googletrans
from datetime import datetime
from googletrans import Translator

In [25]:
print(googletrans.LANGUAGES)

{'af': 'afrikaans', 'sq': 'albanian', 'am': 'amharic', 'ar': 'arabic', 'hy': 'armenian', 'az': 'azerbaijani', 'eu': 'basque', 'be': 'belarusian', 'bn': 'bengali', 'bs': 'bosnian', 'bg': 'bulgarian', 'ca': 'catalan', 'ceb': 'cebuano', 'ny': 'chichewa', 'zh-cn': 'chinese (simplified)', 'zh-tw': 'chinese (traditional)', 'co': 'corsican', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish', 'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish', 'fr': 'french', 'fy': 'frisian', 'gl': 'galician', 'ka': 'georgian', 'de': 'german', 'el': 'greek', 'gu': 'gujarati', 'ht': 'haitian creole', 'ha': 'hausa', 'haw': 'hawaiian', 'iw': 'hebrew', 'he': 'hebrew', 'hi': 'hindi', 'hmn': 'hmong', 'hu': 'hungarian', 'is': 'icelandic', 'ig': 'igbo', 'id': 'indonesian', 'ga': 'irish', 'it': 'italian', 'ja': 'japanese', 'jw': 'javanese', 'kn': 'kannada', 'kk': 'kazakh', 'km': 'khmer', 'ko': 'korean', 'ku': 'kurdish (kurmanji)', 'ky': 'kyrgyz', 'lo': 'lao', 'la': 'lat

# Initializing All Variables

In [2]:
# Reading API Key
with open("./apikey.txt") as file:
    openai.api_key = file.read()

# Define path to audio output file
output_audio_file = "./output.mp3"

# Initializing text-to-speech object
engine = pyttsx3.init()
voices = engine.getProperty('voices')
for voice in voices:
    print("Voice: %s" % voice.name)
    print(" - ID: %s" % voice.id)
    print(" - Languages: %s" % voice.languages)
    print(" - Gender: %s" % voice.gender)
    print(" - Age: %s" % voice.age)
    print("\n")
    
engine.setProperty("voice", voices[2].id)

# Define the persona of Ernest
messages = [
    {"role": "system", "content": "You are Ernest, a friendly and empathetic virtual avatar from Standard Chartered Bank that can help recommend one of the bank savings account or current account best suited to customers' needs through meaningful conversations. There are only 3 products Ernest can recommend: XtraSaver, MyWay and E-Saver."},
    {"role": "user", "content": "Hi Ernest\n\n===\n\n"},
    {"role": "assistant", "content": " 您好，需要我的协助吗?\n"},
    {"role": "user", "content": "What are the types of accounts you have?\n\n===\n\n"},
    {"role": "assistant", "content": " We currently have two types of savings accounts, one called MyWay and the other called E-saver. The former is more for those above 55 years old with retirement planning in mind. The latter gives high interest on fresh funds. For current account, we have XtraSaver, an interest bearing account with cashback on card purchases.\n"}
]

# Get today's date
conversation_id = int(datetime.now().strftime("%Y%m%d%H%M%S"))

Voice: Microsoft David Desktop - English (United States)
 - ID: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0
 - Languages: []
 - Gender: None
 - Age: None


Voice: Microsoft Zira Desktop - English (United States)
 - ID: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0
 - Languages: []
 - Gender: None
 - Age: None


Voice: Microsoft Huihui Desktop - Chinese (Simplified)
 - ID: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_ZH-CN_HUIHUI_11.0
 - Languages: []
 - Gender: None
 - Age: None




# Define functions 

In [3]:
def speak(text):
    engine.say(text)
    engine.runAndWait()
    
def generate_response(transcript):
    
    transcript += '\n\n===\n\n'
    # Append to the conversation
    messages.append({"role": "user", "content": transcript})
    conversation_context = ' '.join([f"{msg['role']}: {msg['content']}" for msg in messages])
    
    # Generate an instance of fine-tuned chatGPT
    response = openai.Completion.create(engine='curie:ft-personal-2023-06-06-08-45-36',
                                        prompt=conversation_context,
                                        temperature=0,
                                        max_tokens=100,
                                        n=1,
                                        stop=["\n"],
                                        timeout=None).choices[0].text.strip()
    if 'assistant:' in response:
        response = response.strip('assistant:')
        response = response.strip(' assistant:')
    if 'user:' in response:
        response = response.strip('user:')
        response = response.strip(' user:')
    
    messages.append({"role": "assistant", "content": response})
    return response

# Connecting to Database

In [4]:
def post_to_database(conversation_id, role_id, response):
    try:
        connection = psycopg2.connect(user='postgres', 
                                      password='root',
                                      host='localhost',
                                      port=5432,
                                      database='scverse')

        cursor = connection.cursor()
        query = "insert into conversations (conversation_id, role_id, response) values ({}, '{}', '{}')".format(conversation_id, role_id, response)
        cursor.execute(query)
        connection.commit()
        cursor.close()
        connection.close()
    except:
        print("Error posting results to database. Please check your connection.")

# Audio-Response Generation

In [32]:
"""
This part of the code is triggered when the user clicks on the button "speak to Ernest"
"""

def trigger_response():
    try:
        with sr.Microphone() as mic:
            recognizer = sr.Recognizer()
            recognizer.adjust_for_ambient_noise(mic, duration = 1)

            # Line 13 will be replaced by D-ID audio once it is ready to make the voice quality more consistent
            speak("您好，需要我的协助吗?")
            audio = recognizer.listen(mic, phrase_time_limit=None, timeout=None)
            with open(output_audio_file, "wb") as f:
                f.write(audio.get_wav_data(convert_rate=44100, convert_width=2))
            audio_file = open(output_audio_file, "rb")
            transcript = openai.Audio.transcribe("whisper-1", audio_file)["text"]
            post_to_database(conversation_id, 'user', transcript)
            print("You:\n", transcript,"\n")
            response = generate_response(transcript)
            translator = Translator()
            translated_text = translator.translate(response, dest='zh-cn')
            post_to_database(conversation_id, 'assistant', translated_text.text)
            print("Ernest:\n", translated_text.text, "\n")

            # Line 24 will be replaced by D-ID once it is ready
            speak(translated_text.text)
    except sr.UnknownValueError:
        sys.exit(1)

# Call for Ernest (TBD)

In [34]:
while True:
    try:
        with sr.Microphone() as mic:
            recognizer = sr.Recognizer()
            recognizer.adjust_for_ambient_noise(mic, duration = 0.3)
            audio = recognizer.listen(mic, phrase_time_limit=3)
            call = recognizer.recognize_google(audio).lower()
            print("You:\n", call)

            if 'ernest' in call or 'honest' in call or 'hyannis' in call or 'earnest' in call:
                trigger_response()
    except sr.UnknownValueError:
        print("No audio detected. Continue detecting in background...")
        continue

You:
 earnest
Error posting results to database. Please check your connection.
You:
 Can you recommend me an account? 



AttributeError: 'NoneType' object has no attribute 'group'