In [1]:
# # Clear Chat history
# with open("data/chat_history.txt", "w") as file:
#     file.write("Conversation History:\n")

In [2]:
import autogen
from autogen import config_list_from_json
from autogen.retrieve_utils import TEXT_FORMATS
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
import chromadb
import os
import autogen

config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST.json")
cheap_config_list = autogen.config_list_from_json(
    env_or_file="OAI_CONFIG_LIST.json",  
    filter_dict={
        "model": {
            "gpt-3.5-turbo",
        }
    }
)

costly_config_list = autogen.config_list_from_json(
    env_or_file="OAI_CONFIG_LIST.json", 
    filter_dict={
        "model": {
            "gpt-4-1106-preview",
        }
    }
)

LLM_CONFIG = {
    "cache_seed": 42,  # change the cache_seed for different trials
    "temperature": 0,
    "config_list": costly_config_list, # Can be amended to either cheap_config_list or costly_config_list
    "timeout": 120, # Default was 120
    # "tools": tools_list, # TESTING: function calling and automated admin 
}

RETRIEVE_CONFIG={
        "task": "qa", # Possible values are "code", "qa" and "default". System prompt will be different for different tasks. The default value is `default`, which supports both code and qa.
        "docs_path": [
            os.path.join(os.path.abspath(''), "data"),
        ],
        # "custom_text_types": ["mdx"], # Default: autogen.retrieve_utils.TEXT_FORMATS = ['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']
        "chunk_token_size": 2000,
        "model": LLM_CONFIG["config_list"][0]["model"],
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "embedding_model": "all-mpnet-base-v2",
        "get_or_create": True,  # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually
        "must_break_at_empty_line": False # Not too sure what the difference is between true and false here might need to experiment
    }

CODE_EXECUTION_CONFIG={
        # "code": None,
        "work_dir": "generated_code", # Codes will be saved in this folder, if "save the code to disk." is used in the prompt
        # "filename": "test1.py",
        "use_docker": False
    }

In [3]:
# Accepted file formats for that can be stored in 
# a vector database instance
from autogen.retrieve_utils import TEXT_FORMATS

print("Accepted file formats for `docs_path`:")
print(TEXT_FORMATS)

Accepted file formats for `docs_path`:
['json', 'rtf', 'org', 'tsv', 'rst', 'log', 'html', 'xml', 'msg', 'epub', 'pptx', 'doc', 'pdf', 'docx', 'txt', 'jsonl', 'yml', 'xlsx', 'htm', 'ppt', 'yaml', 'csv', 'odt', 'md']


In [4]:
# 1. create an RetrieveAssistantAgent instance named "assistant"
assistant = RetrieveAssistantAgent(
    name="assistant",
    system_message="You are a helpful and cheerful assistant. Your job is to answer queries about the Maritime Census.",
    llm_config=LLM_CONFIG,
)

# 2. create the RetrieveUserProxyAgent instance named "ragproxyagent"
# By default, the human_input_mode is "ALWAYS", which means the agent will ask for human input at every step. We set it to "NEVER" here.
# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,
# it is set to None, which works only if the collection is already created.
# `task` indicates the kind of task we're working on. In this example, it's a `code` task.
# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.
# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.
# This only applies to files under the directories in `docs_path`. Explictly included files and urls will be chunked regardless of their types.
# In this example, we set it to ["mdx"] to only process markdown files. Since no mdx files are included in the `websit/docs`,
# no files there will be processed. However, the explicitly included urls will still be processed.
ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=0,
    retrieve_config=RETRIEVE_CONFIG,
    code_execution_config=False, # set to False if you don't want to execute the code
)


# user_proxy = autogen.UserProxyAgent(
#    name="user_proxy_agent",
#    system_message="Automated agent",
# #    system_message="An automated admin. Once the executor executes a code without any errors, terminate the chat", # TESTING: automated admin
#    human_input_mode="NEVER",
#    max_consecutive_auto_reply=0,
# #    default_auto_reply='Are you sure this is the best prompt for another LLM to generate code? Pls rewrite it.',
#    code_execution_config=False,
# )   

critic = autogen.AssistantAgent(
    name="critic",
    system_message=''' You are a helpful assistant.
    ''',
    llm_config=LLM_CONFIG,
)

In [5]:
test_questions = ["May I get a time extension please?",
                  "Can you give me more information about Section H of the Maritime Census?",
                  "Can I update my contact details?",
                  "Why does MPA conduct the Maritime Census?",
                  "Is my company's information kept confidential?"]

content = test_questions[4]
# with open('data/chat_history.txt', 'r') as file:
#     chat_history = file.read()
message = f"""expand the following question to add more relevant questions. think of the 5 most relevant supplementary questions, select the top 1 question and add it to the original question. Only return the final question.
\n
Question: What is my age?
# Question: '{content}'
"""
# CONVERSATION HISTORY: '{chat_history}'

message

"expand the following question to add more relevant questions. think of the 5 most relevant supplementary questions, select the top 1 question and add it to the original question. Only return the final question.\n\n\nQuestion: What is my age?\n# Question: 'Is my company's information kept confidential?'\n"

<b><u>Answers to test questions</b></u>

<u>May I get a time extension please?</u>

You can request an extension for the Maritime Census survey submission by referring to go.gov.sg/mpasurvey-help. If granted, extensions are typically up to 2-3 weeks, but this may be shortened as the survey closing date approaches.

<u>Can you give me more information about Section H of the Maritime Census?</u>

Section H of the Maritime Census is not specifically detailed in the provided context. To access the Maritime Industry Survey Portal and complete the survey, you can request an extension or assistance by referring to go.gov.sg/mpasurvey-help or by contacting the Statistics and Data Systems Department of MPA at annualsurvey@mpa.gov.sg.

<u>Can I update my contact details?</u>

Yes, you can update your contact details. To ensure they are correctly changed in our system, please send us an email with your request.

<u>Why does MPA conduct the Maritime Census?</u>

The MPA conducts the Maritime Census to collect timely statistics on the maritime industry's activities, technology, sustainability, and manpower developments, which support the development of the maritime community by assisting in policy making and the development of beneficial programs.

<u>Is my company's information kept confidential?</u>

Your company's information is kept confidential in accordance with the Statistics Act 1973.

## Comparing critic with RAG proxy agent vs normal proxy agent

<b>Conclusion: RAG proxy agent is needed for critic to give a better expanded question. But at the cost of more tokens used and slower respons</b>

Benefits: More accurate expansion of question due to more context.

Cost: 
1) More tokens used, so costs more. 
2) Also slower response, 4s vs 8s. 

<b><u>CPU vs GPU</u></b>

user_proxy(CPU): 0s

rag_proxy(CPU): 28s

user_proxy(GPU): 0s

rag_proxy(GPU): 30-35s

In [6]:
# user_proxy.initiate_chat(critic, message=message)

In [7]:
ragproxyagent.initiate_chat(critic, problem=message)

Trying to create collection.
doc_ids:  [['doc_26', 'doc_2', 'doc_18', 'doc_10', 'doc_21', 'doc_29', 'doc_13', 'doc_5', 'doc_30', 'doc_22', 'doc_14', 'doc_32', 'doc_6', 'doc_4', 'doc_20', 'doc_12', 'doc_28', 'doc_33', 'doc_15', 'doc_31']]
[32mAdding doc_id doc_26 to context.[0m
[32mAdding doc_id doc_2 to context.[0m
[32mAdding doc_id doc_18 to context.[0m
[33mragproxyagent[0m (to critic):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: expand the following question to add more relevant questions. think of the 5 most relevant supplementary questions, select the top 1 question and add it to the original question. Only return the final question.


Question: What is my age?
# Question: 'Is my company's information kept conf

In [8]:
expanded_message = ragproxyagent.last_message(critic)['content']
print(expanded_message)


UPDATE CONTEXT


In [11]:
problem = f"""Always say if you are not sure of some parts of the question. Answer the question in a full sentence.
If you can't answer the question with or without the current context, you should reply exactly "Sorry, I am unable to answer your question given the current context. Would you mind providing more context on your query?"

Question: '{expanded_message}'
"""


problem

'Always say if you are not sure of some parts of the question. Answer the question in a full sentence.\nIf you can\'t answer the question with or without the current context, you should reply exactly "Sorry, I am unable to answer your question given the current context. Would you mind providing more context on your query?"\n\nQuestion: \'UPDATE CONTEXT\'\n'

Tested with Chat history in data and chat history in prompt

Chat history in prompt: UPDATE CONTEXT

Chat history in data: Granted till DDMMYY     or      Yes

In [12]:
assistant.reset() # it says always to reset, but havent read thru to find out more

ragproxyagent.initiate_chat(assistant, problem=problem)
answer = assistant.last_message(ragproxyagent)['content']

answer

# with open('data/chat_history.txt', 'a') as file:
#     file.write("Question: " + expanded_message + "\nAnswer: " + answer + "\n")



doc_ids:  [['doc_3', 'doc_19', 'doc_11', 'doc_27', 'doc_2', 'doc_26', 'doc_10', 'doc_18', 'doc_28', 'doc_4', 'doc_20', 'doc_12', 'doc_35', 'doc_34', 'doc_22', 'doc_32', 'doc_30', 'doc_6', 'doc_14', 'doc_13']]
[32mAdding doc_id doc_3 to context.[0m
[32mAdding doc_id doc_19 to context.[0m
[32mAdding doc_id doc_11 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: Always say if you are not sure of some parts of the question. Answer the question in a full sentence.
If you can't answer the question with or without the current context, you should reply exactly "Sorry, I am unable to answer your question given the current context. Would you mind providing more context on your q

'Sorry, I am unable to answer your question given the current context. Would you mind providing more context on your query?'