In [1]:
import autogen
from autogen import config_list_from_json
from autogen.retrieve_utils import TEXT_FORMATS
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
import chromadb
import os
import autogen

config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST.json")
cheap_config_list = autogen.config_list_from_json(
    env_or_file="OAI_CONFIG_LIST.json",  
    filter_dict={
        "model": {
            "gpt-3.5-turbo",
        }
    }
)

costly_config_list = autogen.config_list_from_json(
    env_or_file="OAI_CONFIG_LIST.json", 
    filter_dict={
        "model": {
            "gpt-4-1106-preview",
        }
    }
)

LLM_CONFIG = {
    "cache_seed": 42,  # change the cache_seed for different trials
    "temperature": 0,
    "config_list": costly_config_list, # Can be amended to either cheap_config_list or costly_config_list
    "timeout": 120, # Default was 120
    # "tools": tools_list, # TESTING: function calling and automated admin 
}

RETRIEVE_CONFIG={
        "task": "qa", # Possible values are "code", "qa" and "default". System prompt will be different for different tasks. The default value is `default`, which supports both code and qa.
        "docs_path": [
            os.path.join(os.path.abspath(''), "data"),
        ],
        # "custom_text_types": ["mdx"], # Default: autogen.retrieve_utils.TEXT_FORMATS = ['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']
        "chunk_token_size": 2000,
        "model": LLM_CONFIG["config_list"][0]["model"],
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "embedding_model": "all-mpnet-base-v2",
        "get_or_create": True,  # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually
    }

CODE_EXECUTION_CONFIG={
        # "code": None,
        "work_dir": "generated_code", # Codes will be saved in this folder, if "save the code to disk." is used in the prompt
        # "filename": "test1.py",
        "use_docker": False
    }

In [2]:
# Accepted file formats for that can be stored in 
# a vector database instance
from autogen.retrieve_utils import TEXT_FORMATS

print("Accepted file formats for `docs_path`:")
print(TEXT_FORMATS)

Accepted file formats for `docs_path`:
['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']


In [3]:
# 1. create an RetrieveAssistantAgent instance named "assistant"
assistant = RetrieveAssistantAgent(
    name="assistant",
    system_message="You are a helpful assistant.",
    llm_config=LLM_CONFIG,
)

# 2. create the RetrieveUserProxyAgent instance named "ragproxyagent"
# By default, the human_input_mode is "ALWAYS", which means the agent will ask for human input at every step. We set it to "NEVER" here.
# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,
# it is set to None, which works only if the collection is already created.
# `task` indicates the kind of task we're working on. In this example, it's a `code` task.
# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.
# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.
# This only applies to files under the directories in `docs_path`. Explictly included files and urls will be chunked regardless of their types.
# In this example, we set it to ["mdx"] to only process markdown files. Since no mdx files are included in the `websit/docs`,
# no files there will be processed. However, the explicitly included urls will still be processed.
ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=0,
    retrieve_config=RETRIEVE_CONFIG,
    code_execution_config=False, # set to False if you don't want to execute the code
)


user_proxy = autogen.UserProxyAgent(
   name="user_proxy_agent",
   system_message="Automated agent",
#    system_message="An automated admin. Once the executor executes a code without any errors, terminate the chat", # TESTING: automated admin
   human_input_mode="NEVER",
   max_consecutive_auto_reply=0,
#    default_auto_reply='Are you sure this is the best prompt for another LLM to generate code? Pls rewrite it.',
   code_execution_config=False,
)   

critic = autogen.AssistantAgent(
    name="critic",
    system_message=''' You are a helpful assistant.
    ''',
    llm_config=LLM_CONFIG,
)

In [16]:
content = 'may i get a time extension pls'
message = f"""expand the following question to add more relevant question. think of the 5 most relevant questions, and select 3 of the best ones and put them into 1 question. only return the final question.
\n
'{content}'"""

message

"expand the following question to add more relevant question. think of the 5 most relevant questions, and select 3 of the best ones and put them into 1 question. only return the final question.\n\n\n'may i get a time extension pls'"

## Comparing critic with RAG proxy agent vs normal proxy agent

<b>Conclusion: RAG proxy agent is needed for critic to give a better expanded question. But at the cost of more tokens used and slower respons</b>

Benefits: More accurate expansion of question due to more context.

Cost: 
1) More tokens used, so costs more. 
2) Also slower response, 4s vs 8s. 

In [17]:
user_proxy.initiate_chat(critic, message=message)

user_proxy_agent (to critic):

expand the following question to add more relevant question. think of the 5 most relevant questions, and select 3 of the best ones and put them into 1 question. only return the final question.


'may i get a time extension pls'

--------------------------------------------------------------------------------
critic (to user_proxy_agent):

Could you please provide me with a time extension for my current project, and if so, what is the process for requesting it, and are there any specific conditions or limitations I should be aware of?

--------------------------------------------------------------------------------


In [18]:
ragproxyagent.initiate_chat(critic, problem=message)

doc_ids:  [['doc_12', 'doc_21', 'doc_3', 'doc_30', 'doc_11', 'doc_29', 'doc_2', 'doc_20', 'doc_5', 'doc_32', 'doc_23', 'doc_14', 'doc_31', 'doc_4', 'doc_13', 'doc_22', 'doc_38', 'doc_37', 'doc_33', 'doc_15']]
Adding doc_id doc_12 to context.
Adding doc_id doc_21 to context.
Adding doc_id doc_3 to context.
ragproxyagent (to critic):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: expand the following question to add more relevant question. think of the 5 most relevant questions, and select 3 of the best ones and put them into 1 question. only return the final question.


'may i get a time extension pls'

Context is:  
Thank you for your submission of the <Survey name> . 
Would it be convenient for me to call you at <xxx am / pm

In [34]:
expanded_message = ragproxyagent.last_message(critic)['content']
expanded_message1 = user_proxy.last_message(critic)['content']
print(expanded_message)
print(expanded_message1)

Could you please confirm if an extension for the survey submission is possible, and if so, how long can it be, and what is the process to request it?
Could you please provide me with a time extension for my current project, and if so, what is the process for requesting it, and are there any specific conditions or limitations I should be aware of?


In [38]:
problem = f"""Always answer all questions. Always say if you are not sure of some parts of the question.
'{expanded_message}'
"""
problem

"Always answer all questions. Always say if you are not sure of some parts of the question.\n'Could you please confirm if an extension for the survey submission is possible, and if so, how long can it be, and what is the process to request it?'\n"

In [39]:
# assistant.reset() # it says always to reset, but havent read thru to find out more

ragproxyagent.initiate_chat(assistant, problem=problem)

doc_ids:  [['doc_3', 'doc_12', 'doc_30', 'doc_21', 'doc_22', 'doc_4', 'doc_13', 'doc_31', 'doc_15', 'doc_33', 'doc_6', 'doc_24', 'doc_11', 'doc_29', 'doc_2', 'doc_20', 'doc_23', 'doc_32', 'doc_5', 'doc_14']]
Adding doc_id doc_3 to context.
Adding doc_id doc_12 to context.
Adding doc_id doc_30 to context.
ragproxyagent (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: Always answer all questions. Always say if you are not sure of some parts of the question.
'Could you please confirm if an extension for the survey submission is possible, and if so, how long can it be, and what is the process to request it?'


Context is:  
Thank you for your submission of the <Survey name> . 
Would it be convenient for me to call y