In [None]:
import os
import subprocess
import uuid
from enum import Enum
import openai
import speech_recognition as sr

In [None]:
"""

short-term memory is the record of the current chat,
long-term memory could be indexed using a one-sentence summary of the entire chat,
(short enough to be able to parse many without running out of tokens) 
and saved to a file externally.

"""

Role = Enum('Role', ['system', 'user', 'assistant'])

class ChatBot():

    def __init__(self):
        self._model = "gpt-4"
        self._short_term_memory = []
        self._system_prompt = None
        
        self._id = uuid.uuid4().hex
    
    def run(self) -> None:
        self._initialize()
        self._program_loop()

    def chat(self) -> None:
        prompt_ = self._capture_user_text_input()
        self._memorize_short_term(prompt_, Role.user)
        raw_response_ = self._prompt_llm()
        response_ = self._llm_output_to_text(raw_response_)
        self._memorize_short_term(response_, Role.assistant)
        self._output(response_, Role.assistant)

    def forget_short_term_memory(self):
        self._short_term_memory = []
        if self._system_prompt is not None:
            self._memorize_short_term(self._system_prompt, Role.system)

    def show_short_term_memory(self) -> None:
        print(self._short_term_memory)

    def speak(self):
        prompt_ = self._capture_user_speech_input()
        if len(prompt_) == 0:
            return
        self._output(prompt_, Role.user)
        self._memorize_short_term(prompt_, Role.user)
        raw_response_ = self._prompt_llm()
        response_ = self._llm_output_to_text(raw_response_)
        self._memorize_short_term(response_, Role.assistant)
        self._output(response_, Role.assistant)
    
    def _initialize(self):
        sys_prompt_ = input("System prompt?\n")
        if sys_prompt_ == "":
            sys_prompt_ = None
        if sys_prompt_ is not None:
            self._system_prompt = sys_prompt_
            self._memorize_short_term(sys_prompt_, Role.system)
    
    def _program_loop(self) -> None:
        while (True):
            input_ = input("Action? (C=chat, S=speak, F=forget, M=show memory, Q=quit)\n")
            if input_ == "C":
                self.chat()
            elif input_ == "S":
                self.speak()
            elif input_ == "F":
                self.forget_short_term_memory()
            elif input_ == "M":
                self.show_short_term_memory()
            elif input_ == "Q":
                break
            else:
                print(f'Invalid input: {input_}')
    
    def _memorize_short_term(self, memory_, role_) -> None:
        self._short_term_memory += [{'role': role_.name, 'content': memory_}]

    def _prompt_llm(self):
        return openai.ChatCompletion.create(
            model=self._model,
            messages=self._short_term_memory
        )
    
    @staticmethod
    def _capture_user_text_input():
        return input('User: ')
    
    @staticmethod
    def _capture_user_speech_input() -> str:
        r = sr.Recognizer()
        with sr.Microphone() as source:
            print("Listening...", flush=True)
            r.adjust_for_ambient_noise(source, duration=0.2)
            audio = r.listen(source)
        # recognize speech using Whisper API
        OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
        try:
            prompt_ = r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)
            return prompt_
        except sr.RequestError as e:
            print("Could not request results from Whisper API.")
            print(e)
            return ""

    @staticmethod
    def _output(output_: str, role_: Role) -> None:
        print(f'{role_.name}: {output_}', flush=True)
        
    @staticmethod
    def _llm_output_to_text(output_) -> str:
        if isinstance(output_, dict):
            return output_["choices"][0]["message"]["content"]
        else:
            raise NotImplementedError("Only `str` is supported so far. Deal with voice later.")

In [None]:
c = ChatBot()
c.run()

In [None]:
# Added an "E=execute" option to the Actions. It simply executes the LLM's output as a shell script.
class Agent(ChatBot):
    def _program_loop(self) -> None:
         while (True):
            input_ = input("Action? (C=chat, S=speak, F=forget, M=show memory, Q=quit)\n")
            if input_ == "C":
                self.chat()
            elif input_ == "S":
                self.speak()
            elif input_ == "E":
                self.execute()
            elif input_ == "F":
                self.forget_short_term_memory()
            elif input_ == "M":
                self.show_short_term_memory()
            elif input_ == "Q":
                break
            else:
                print(f'Invalid input: {input_}')

    def execute(self):
        last_memory_ = self._short_term_memory[-1]
        if last_memory_['role'] == Role.assistant.name:
            cmd_str_ = last_memory_['content']
            subprocess.run(cmd_str_, shell=True)
        else:
            print(f"Last memory must be of `assistant`, but is of role=`{last_memory_['role']}`")
        

In [None]:
a = Agent()
a.run()

In [None]:
class RecursiveAgent(Agent):
    pass

In [None]:
class ParallelRecursiveAgent(RecursiveAgent):
    pass