In [1]:
import openai
import os
import json

from google.cloud import speech
from google.cloud import texttospeech_v1
from unicodedata import name

from urllib import response
from pydub import AudioSegment
from pydub.playback import play

from IPython.display import Audio, display, clear_output
import ipywidgets as widgets
from io import BytesIO

import time
import threading


from langchain.text_splitter import NLTKTextSplitter
import pinecone
import json

from termcolor import colored

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'PATH TO YOUR CREDENTIALS'
openai.api_key = "YOUR API KEY"
pinecone.init(api_key='YOUR API KEY', environment='gcp-starter') 


  from tqdm.autonotebook import tqdm


In [2]:
done = True
conversationDone = False
recording = False

In [3]:
import sounddevice as sd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import wavio
from pydub import AudioSegment

samplerate = 44100  # Hertz
channels = 1
dtype = 'float32'
blocksize = 1024  # Number of samples per block



In [4]:
#Returns the actual text information
def query_pinecone(query_vector, vector_dict):
    pc_index = pinecone.Index('host-gpt-index') 
    response = pc_index.query(queries=query_vector,top_k=3)
    text_results = []
    for instance in response:
        text_results.append(vector_dict[instance['id']])
                            
    return text_results

def get_vector_dict():
    with open("vector_dict.json", "r") as file:
        vector_dict = json.load(file)
        return vector_dict
    
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    embedding = openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']
    #print(len(embedding))
    return embedding

def convert_to_audio(text):

    client = texttospeech_v1.TextToSpeechClient()
    synthesis_input = texttospeech_v1.SynthesisInput(ssml=text)
    text = '<speak>' + text + '</speak>'
    voice1 = texttospeech_v1.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-News-M",
        ssml_gender=texttospeech_v1.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech_v1.AudioConfig (
        audio_encoding=texttospeech_v1.AudioEncoding.MP3
    )

    response1 = client.synthesize_speech (
        input=synthesis_input,
        voice=voice1,
        audio_config=audio_config)

    with open('audio.mp3', 'wb',) as output:
        output.write(response1.audio_content)
    audio = AudioSegment.from_mp3('audio.mp3')
    
    a = play(audio)
    
def convert_to_text(file_path):
    
    client = speech.SpeechClient()

    with open(file_path, 'rb') as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=44100,
        language_code="en-US",
    )

    # Detects speech in the audio file
    response = client.recognize(config=config, audio=audio)
    
    result_str = ""
    for result in response.results:
        result_str += result.alternatives[0].transcript
    return result_str

In [5]:

class HostGPTAgent():
    #The agent needs to continue until the objective is satisfied
    def __init__(self, objective, use_case, personality, vector_dict):
        self.objective = objective
        self.use_case = use_case
        self.memory = ""
        self.questions_list = []
        self.personality = personality
        self.vector_dict = vector_dict
        self.buffer = []
        self.stream = sd.InputStream(samplerate=samplerate, channels=channels, dtype=dtype, blocksize=blocksize, callback=self.callback)
        self.history = ""
        
    def make_conversation(self, use_case, objective, memory, user_input, questions_list, answers):
        #print(f"User said {user_input}")
        completion = openai.ChatCompletion.create(
            model='gpt-4-0613',
            messages=[
                {'role':'system', "content":f"Personality: {self.personality}. Instructions: If you have questions to answer, only answer those questions using the relevant context. If you can't find the correct answer, say that you do not know the answer. The customer may be wrong, so check with the information you are given. If there are no questions, continue asking for information about the objectives one at a time."},
                {'role':"user", "content":f"Objective: {str(objective)}. Answers to user questions: {answers}\n Conversation: {str(memory)} \n User:{user_input}, AI:"}
            ],
        )
        response = completion["choices"][0]["message"]["content"]
        memory = str(memory) + str(response)
        return memory, response
            
    def run(self):
        status = False
        global done
        global conversationDone
        while True:
            if done == True:
                done = False
                start_button = widgets.Button(description="Start Recording")
                stop_button = widgets.Button(description="Stop Recording")
                start_button.on_click(self.start_recording)
                stop_button.on_click(self.stop_recording)
                display(widgets.HBox([start_button, stop_button]))
            
            if conversationDone:
                convert_to_audio("Thank you for ordering with us, your order will be ready in 20-30 minutes")
                print(colored("AI: Thank you for ordering with us, your order will be ready in 20-30 minutes", 'red'))
                break
            
        print("AI hangs up the phone!")
        
    def reset(self, objective):
        self.memory = ""
        self.questions_list = []
        self.buffer = []
        self.objective = objective
        
    def continue_run(self):
        user_input = convert_to_text('audio.wav')
        self.memory += f"\n User: {user_input}"
        self.history += colored(f"User: {user_input}", 'blue')
        print(colored(f"User: {user_input}", 'blue'))
        
        questions_list = self.parse_question(user_input)
        self.history += f"\n PARSED QUESTIONS: {questions_list}"
        
        answers = self.query_database(questions_list)
        self.history += f"\n CONTEXT FOR QUESTION: {answers}"
        
        status, self.objective = self.check_progress(self.objective, str(self.memory))
        self.history += f"\n REMAINING OBJECTIVES: {self.objective}"
        
        if status: #Check after the user responds, not when the AI does
            global conversationDone
            conversationDone = True
            return

        self.memory, response = self.make_conversation(self.use_case, self.objective, str(self.memory), user_input, self.questions_list, answers)
        self.memory += f"AI: {response}"
        self.history += colored(f"\n AI: {response}", 'red')
        print(colored(f"AI: {response}", "red"))
        
        convert_to_audio(response)

        

        return
        
    #Checks whether it has completed the objective
    def check_progress(self, objective, memory):
        completion = openai.ChatCompletion.create(
            model='gpt-4-0613',
            messages=[{'role':'system', "content":"You must call both functions in your response."},{'role':"user", "content":"Carefully compare the conversation to the required objectives. Conversation:" + memory + ", Objective:" + str(objective)}],
            functions=[{
                "name":"check_for_completion",
                "description":f"A function that needs to know if the objectives have been met",
                "parameters": {
                    "type":"object",
                    "properties": {
                        "status": {
                            "type":"boolean",
                            "description":"Enter true if there is sufficient information for each part of the objective, and false otherwise"
                        },
                        "remaining_tasks": {
                            "type":"string",
                            "description":"A string that writes out the uncompleted objectives. Please use the same text as given in Objective:. Separate each remaining objective with a semicolon ';'. Ex: 1.Take out the trash;2.Clean the kitchen"
                        },
                        "unit":{"type":"string"}
                    }
                }

            }],
                function_call='auto'
            )
        reply = completion.choices[0].message
        reply_dict = json.loads(reply["function_call"]["arguments"])
        status = reply_dict["status"]
        
        tasks_list = []
        if status == False:
            remaining_tasks = reply_dict["remaining_tasks"]
            tasks_list = remaining_tasks.split(';')
        return status, tasks_list
    
    #Given a caller's questions, get the answers
    def query_database(self, questions):
        answers = []
        pc_index = pinecone.Index('host-gpt-index') 
        for question in questions:
            if question != "":
                query_vector = get_embedding(question)
                
                #Then query this embedding with pinecone
                response = pc_index.query(vector=query_vector,top_k=5)
                print(response)
                for a in response['matches']:
                    answers.append(self.vector_dict[a['id']])
                    
        answers = self.parse_answers(questions, answers)         
            
        return answers
    
    def parse_answers(self, questions, answers):
        completion = openai.ChatCompletion.create(
            model='gpt-4-0613',
            messages=[{'role':"user", "content":f"Questions: {questions}. Information to answer those questions: {answers}"}, {'role':'system', "content":"You must make the function call. Write your answer(s) in a short, summarized manner. The context may contain useless information, so if it doesn't directly answer your question say you don't know"}],
            functions=[{
                "name":"answer_question",
                "description":f"Writes down the answers to the questions",
                "parameters": {
                    "type":"object",
                    "properties": {
                        "answers": {
                            "type":"string",
                            "description":f" If there are multiple answers, separate them by semicolons. Ex: 'The answer to (question) is No, that's not on our menu"
                        },
                        "unit":{"type":"string"}
                    }
                }

            }],
                function_call={'name': 'answer_question'}
            )
        reply = completion.choices[0].message
        answers_dict = json.loads(reply["function_call"]["arguments"])
        answers_value = answers_dict["answers"]
        answers_list = answers_value.split(';')
        print(answers_list)
        
        return answers_list
    
    #Given a caller's message, isolate any questions they have
    def parse_question(self, user_response):
        completion = openai.ChatCompletion.create(
            model='gpt-4-0613',
            messages=[{'role':"user", "content":user_response}, {'role':'system', "content":f"You must make the function call. You must ask a question for each subject of the conversation that is related to {self.use_case} and requires validation of availability or accuracy. If the user doesn't say anything related to  {self.use_case} that requires validation enter ''."}],
            functions=[{
                "name":"answer_question",
                "description":f"Useful to answer specific questions related to {self.use_case}",
                "parameters": {
                    "type":"object",
                    "properties": {
                        "questions": {
                            "type":"string",
                            "description":f"Write your question(s) in a complete sentence. If there are multiple, separate them by semicolons. Ex: 'Do we have gluten free options?;How late are you open?'"
                        },
                        "unit":{"type":"string"}
                    }
                }

            }],
                function_call={'name': 'answer_question'}
            )
        #Process the function call into a list
        reply = completion.choices[0].message
        questions_dict = json.loads(reply["function_call"]["arguments"])
        questions_value = questions_dict["questions"]
        questions_list = questions_value.split(';')
        
        return questions_list
    
    def start_recording(self, b):
        self.stream.start()
        global recording
        global started
        started = True
        recording = True
        print("Recording started. Press 'Stop' to stop recording.")
        
    def callback(self, indata, frames, time, status):
        if recording:
            self.buffer.append(indata.copy())
        
    def stop_recording(self, b):
        global recording
        recording = False
        print("Recording stopped.")
        self.stream.stop()
        # Check if the buffer is empty
        if len(self.buffer) == 0:
            print("No audio data recorded.")
            return
        
        # Concatenate the blocks of audio data and save
        audio_data = np.concatenate(self.buffer, axis=0)
        wavio.write("audio.wav", audio_data, samplerate, sampwidth=2)
        
        # Clear the buffer for the next recording
        self.buffer = []
        self.continue_run()
        done = True


In [6]:
objective = """
1. Get the customer's entire order. 
2. Get the customer's name.
3. Get the customer's phone number. 
4.Repeat the customer's order and make sure it is complete and correct
"""
use_case = "Flathead Lake Brewing Co."
vector_dict = get_vector_dict()
personality = f"You are an employee at {use_case} who takes customer orders and to-go orders and answers their questions. You speak like a human would, are smart, respectful, and experienced in customer service. You don't make up anything that you don't know and don't respond to anything unrelated to {use_case}. "
agent = HostGPTAgent(objective=objective,use_case=use_case,personality=personality, vector_dict=vector_dict)

In [7]:
threading.Thread(target=agent.run).start()


HBox(children=(Button(description='Start Recording', style=ButtonStyle()), Button(description='Stop Recording'…

Recording started. Press 'Stop' to stop recording.
Recording stopped.
[34mUser: how much would it cost to order 3 BBQ bacon Burgers a grilled chicken on the side and a full garden with salmon[0m
{'matches': [{'id': 'vec20', 'score': 0.921652555, 'values': []},
             {'id': 'vec21', 'score': 0.891476929, 'values': []},
             {'id': 'vec23', 'score': 0.889230728, 'values': []},
             {'id': 'vec49', 'score': 0.878006518, 'values': []},
             {'id': 'vec19', 'score': 0.866039515, 'values': []}],
 'namespace': ''}
{'matches': [{'id': 'appvec12', 'score': 0.900811136, 'values': []},
             {'id': 'vec56', 'score': 0.889998317, 'values': []},
             {'id': 'appvec14', 'score': 0.864158154, 'values': []},
             {'id': 'appvec13', 'score': 0.86284554, 'values': []},
             {'id': 'appvec6', 'score': 0.85964936, 'values': []}],
 'namespace': ''}
{'matches': [{'id': 'vec14', 'score': 0.851736128, 'values': []},
             {'id': 'vec15', '

Input #0, wav, from '/var/folders/pv/z7lkp1pj4g7gy41fmks2clg80000gn/T/tmpj53y5f6v.wav':
  Duration: 00:00:21.91, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
  21.80 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




In [11]:
print(agent.history)
#agent.reset(objective)

[34mUser: how much would it cost to order 3 BBQ bacon Burgers a grilled chicken on the side and a full garden with salmon[0m
 PARSED QUESTIONS: ['How much does a BBQ bacon Burger cost?', ' How much does a grilled chicken on the side cost?', ' How much does a full garden with salmon cost?']
 CONTEXT FOR QUESTION: ['The BBQ Bacon Burger costs $17', ' A side of Grilled Chicken costs $6', ' The cost of a Full Garden Salad is $11 but the information provided does not include the cost of adding Salmon']
 REMAINING OBJECTIVES: ["2. Get the customer's name", " 3. Get the customer's phone number", " 4.Repeat the customer's order and make sure it is complete and correct"][31m
 AI: The cost of your order would be as follows: three BBQ Bacon Burgers at $17 each would total $51. A side of Grilled Chicken is $6. A full Garden Salad is $11. However, I don't have the cost information for adding Salmon to the salad. So the total without the cost of Salmon is $68. Would you like to proceed with this 

In [9]:
#Functions to load the info into pinecone

# def chunk_data(folder_path='data'):
#     docs = []
#     for file_name in os.listdir(folder_path):
#         if file_name== 'menu_addons.txt':
#             file_path = os.path.join(folder_path, file_name)
#             with open(file_path, 'r') as file:
#                 data = file.read()
#                 text= data.split('\n')
#                 for t in text:
#                     if t != '':
#                         docs.append(t)
#     return docs

# def pinecone_upsert(chunked_text):
#     pc_index = pinecone.Index('host-gpt-index')

#     vector_list = []
#     ids_list = []

#     for index, chunk in enumerate(chunked_text):
#         vector = get_embedding(chunk)
#         vector_list.append(vector)

#         vector_id=f"appvec{index}"
#         ids_list.append(vector_id)

#     pc_index.upsert(vectors=zip(ids_list, vector_list))

#     # Read the existing data from the JSON file
#     with open("vector_dict.json", "r") as file:
#         data = json.load(file)

#     # Add the new data to the JSON dictionary
#     for vector_id, text in zip(ids_list, chunked_text):
#         data[vector_id] = text

#     # Write the updated data back to the JSON file
#     with open("vector_dict.json", "w") as file:
#         json.dump(data, file)

#     return dict(zip(ids_list, vector_list))
    

In [10]:
#pc_index = pinecone.Index('host-gpt-index')
#delete_response = pc_index.delete(ids=['vec-0'])