In [1]:
import autogen

from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent

import nest_asyncio
nest_asyncio.apply()


config_list = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    file_location="..",
    filter_dict={
        "model": ["gpt-3.5-turbo", "gpt-35-turbo", "gpt-35-turbo-0613", "gpt-4", "gpt4"],
    },
)

print("LLM models: ", [config_list[i]["model"] for i in range(len(config_list))])

# Termination message definition
termination_msg = (
    lambda x: isinstance(x, dict)
    and str(x.get("content", "")).upper() == "TERMINATE"
)



LLM models:  ['gpt-4']


In [2]:
# Configuration for the Language Model (LLM)
llm_config = {
    "functions": [
        {
            "name": "retrieve_content",
            "description": "retrieve content for code question answering.",
            "parameters": {
                "type": "object",
                "properties": {
                    "message": {
                        "type": "string",
                        "description": "Refined message which keeps the original meaning and can be used to retrieve content from research papers for reference."
                    }
                },
                "required": ["message"]
            },
        },
    ],
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 60,
    "seed": 42,
}

# Configuration for the manager using the same config_list as llm_config
manager_config = {
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 60,
    "seed": 42,
}


In [8]:
import asyncio
from typing import Dict, List, Optional, Union, Callable
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen.formatting_utils import colored
from typing_extensions import Annotated

import arxiv

class ArxivUserProxyAgent(RetrieveUserProxyAgent):
    def __init__(
        self,
        name: str = "RetrieveChatAgent",
        human_input_mode: Optional[str] = "NEVER",
        is_termination_msg: Optional[Callable[[Dict], bool]] = None,
        retrieve_config: Optional[Dict] = None,
        **kwargs
    ):
        super().__init__(name=name, human_input_mode=human_input_mode,
                         is_termination_msg=is_termination_msg, 
                         retrieve_config=retrieve_config, **kwargs)
        self.max_results = 10  # Default maximum results
    
    # Assuming 'results' is a dictionary that contains a list of document details
    # from arXiv. This method would compile the document details into a string.
    def _get_context(self, results: Dict[str, Union[List[str], List[List[str]]]]) -> str:
        doc_contents = ""
        current_tokens = 0
        _doc_idx = self._doc_idx
        _tmp_retrieve_count = 0

        for idx, doc in enumerate(results["documents"][0]):
            if idx < self._doc_idx:
                continue
            if results["ids"][0][idx] in self._doc_ids:
                continue
            
            # custom_token_count_function would need to be provided by you
            doc_tokens = self.custom_token_count_function(doc, self._model)
            if doc_tokens > self._context_max_tokens:
                # func_print would need to be provided by you
                func_print = f"Skip doc_id {results['ids'][0][idx]} as it is too long to fit in the context."
                # colored and func_print would need to be provided by you
                print(colored(func_print, "green"), flush=True)
                self._doc_idx = idx
                continue

            if current_tokens + doc_tokens > self._context_max_tokens:
                break

            func_print = f"Adding doc_id {results['ids'][0][idx]} to context."
            print(colored(func_print, "green"), flush=True)
            current_tokens += doc_tokens
            
            # The actual contents of the documents would need to be accessed properly here
            doc_contents += results["metadata"][idx]["title"][0] + " : " + results["ids"][0][idx] + "\n" + doc + "\n"
            self._doc_idx = idx
            self._doc_ids.append(results["ids"][0][idx])
            self._doc_contents.append(doc)
            _tmp_retrieve_count += 1

            if _tmp_retrieve_count > self.n_results:
                break

        return doc_contents

    async def df_query_arxiv_db(self, query_texts: List[str], 
                                n_results: int = 10, 
                                sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
                                ) -> Dict[str, Union[List[str], List[List[str]]]]:
        
        results = []
        for query_text in query_texts:
            # Query the arXiv database for papers matching the query_text
            papers = arxiv.Search(
                query=query_text,
                max_results=n_results,
                sort_by=sort_by
                )

            for paper in arxiv.Client().results(papers):
                results.append(paper)
        
        return {
            "ids": [[result.pdf_url for result in results]],
            "documents": [[result.summary for result in results]],
            "metadata": [{
                "authors": [str(author) for author in result.authors],
                "published": [result.published],
                "title": [result.title if result.title else "No title"]
                } for result in results]
        }

    def retrieve_docs(self, problem: str, n_results: int = 10, search_string: str = "") -> List[str]:
        # This method orchestrates the asynchronous call to `df_query_arxiv_db`
        # and retrieves the documents based on the `problem` statement.
        loop = asyncio.get_event_loop()
        results = loop.run_until_complete(self.df_query_arxiv_db([problem], n_results=n_results))
        self._results = results
        return results
    

####################################################################################################
QNA_PROMPT = """Assistant helps the researchers with searching information from the arxiv API. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, you should reply exactly 'UPDATE CONTEXT'. Do not generate answers that don't use the sources below. 
In your role, you have the autonomy to question the provided content or the process presented in this group chat and can request corrections or seek clarification if there is something that appears to be missing or unclear after executing a given task. If at any point you find yourself confused or in need of assistance, do not hesitate to reach out to the group chat manager, who can guide you or delegate the task to another qualified participant.
For tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately.
User's question is: {input_question}

Context is: {input_context}"""
# Definitions for agents
arxiv_rag_agent = ArxivUserProxyAgent(
    name="ResearchUser",
    is_termination_msg=termination_msg,  
    system_message="Assistant who has extra content retrieval power for getting information from data from a arxiv paper store with research papers.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    llm_config=llm_config,  # llm_config must be defined or imported
    retrieve_config={"customized_prompt": QNA_PROMPT, "docs_path": None},  
    code_execution_config=False
)

# You would then create an instance of ArxivUserProxyAgent and use it within your application.

# In this case, we will have multiple user proxy agents and we don't initiate the chat
# with RAG user proxy agent.
# In order to use RAG user proxy agent, we need to wrap RAG agents in a function and call
# it from other agents.
def retrieve_content(
    message: Annotated[
        str,
        "Refined message which keeps the original meaning and can be used to retrieve content for code generation and question answering.",
    ],
    n_results: Annotated[int, "number of results"] = 3,
) -> str:
    arxiv_rag_agent.n_results = n_results  # Set the number of results to be retrieved.
    # Check if we need to update the context.
    update_context_case1, update_context_case2 = arxiv_rag_agent._check_update_context(message)
    if (update_context_case1 or update_context_case2) and arxiv_rag_agent.update_context:
        arxiv_rag_agent.problem = message if not hasattr(arxiv_rag_agent, "problem") else arxiv_rag_agent.problem
        _, ret_msg = arxiv_rag_agent._generate_retrieve_user_reply(message)
    else:
        _context = {"problem": message, "n_results": n_results}
        ret_msg = arxiv_rag_agent.message_generator(arxiv_rag_agent, None, _context)
        
    return ret_msg if ret_msg else message

message = "Overview of time series forecasting methods"
retrieve_content(message, n_results=3)

[32mAdding doc_id http://arxiv.org/pdf/1303.0117v1 to context.[0m
[32mAdding doc_id http://arxiv.org/pdf/2401.03006v2 to context.[0m
[32mAdding doc_id http://arxiv.org/pdf/2309.10613v1 to context.[0m


"Assistant helps the researchers with searching information from the arxiv API. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, you should reply exactly 'UPDATE CONTEXT'. Do not generate answers that don't use the sources below. \nIn your role, you have the autonomy to question the provided content or the process presented in this group chat and can request corrections or seek clarification if there is something that appears to be missing or unclear after executing a given task. If at any point you find yourself confused or in need of assistance, do not hesitate to reach out to the group chat manager, who can guide you or delegate the task to another qualified participant.\nFor tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, 

In [4]:
# System message constants for different roles

COORDINATOR = """You are a Research coordinator: This is the person who coordinates the various aspects of the research project. 
The role is also responsible to rephrase research questions into key word queries for the arxiv api. 
if researcher or critic needs help, you can help them to find the information from the arxiv API.
"""
RESEARCHER = """You are a Researcher: This is the person who performs the research of paper's summaries. 
You MUST take to account the feedback from the critic to improve the quality of the work.
In your role, you have the autonomy to question the provided content and can request corrections or seek clarification if there is something that appears to be missing or unclear after executing a given task. If at any point you find yourself confused or in need of assistance, do not hesitate to reach out to the group chat manager.
"""
SUB1 = """You are a Sub-investigator (Sub-I): This is the assistant to the PI, who helps with the tasks of the PI with a step wise research plan with sub-research topics.
you MUST help PI to ensure that the research team is on the right track and the research is going in the right direction. 
In your role, you have the autonomy to question the provided content or the process presented in this group chat and can request corrections or seek clarification if there is something that appears to be missing or unclear after executing a given task. If at any point you find yourself confused or in need of assistance, do not hesitate to reach out to the group chat manager, who can guide you or delegate the task to another qualified participant.
Reply 'TERMINATE' in the end when everything is done.
"""

# If there isn't enough information below, you should reply exactly 'UPDATE CONTEXT'.
QNA_PROMPT = """Assistant helps the researchers with searching information from the arxiv API. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below. Do not generate answers that don't use the sources below. 
In your role, you have the autonomy to question the provided content or the process presented in this group chat and can request corrections or seek clarification if there is something that appears to be missing or unclear after executing a given task. If at any point you find yourself confused or in need of assistance, do not hesitate to reach out to the group chat manager, who can guide you or delegate the task to another qualified participant.
For tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately.
User's question is: {input_question}

Context is: {input_context}"""

# Termination message definition
termination_msg = (
    lambda x: isinstance(x, dict)
    and str(x.get("content", "")).upper() == "TERMINATE"
)

# Agent definitions
principalInvestigator = autogen.UserProxyAgent(
    name="PI",
    is_termination_msg=termination_msg,
    human_input_mode="TERMINATE",
    max_consecutive_auto_reply=1,
    system_message="""You are a Principal investigator (PI): You are the leader of the research team who asks the questions and gives task.
    You MUST make sure that the research team is on the right track and the research is going in the right direction. 
    reply 'TERMINATE' in the end when everything is done.
    """,
    llm_config=llm_config, 
    code_execution_config=False,
    description="Principal investigator (PI) is the leader of the research team who asks the questions and gives task."
)

planner = autogen.AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback from PI and critic, until PI approval.
The plan may involve a Research Coordinator to to rephrase research questions into key word queries for the arxiv api and a Researcher who could performs the research of paper's summaries. 
Explain the plan first. Be clear which step is performed by an Research Coordinator, and which step is performed by a scientist.
""",
    llm_config=llm_config,
    description="Planner suggests a plan and revises the plan based on feedback from PI and critic, until PI approval."
)

subInvestigator = autogen.AssistantAgent(
    name="Sub-I",
    is_termination_msg=termination_msg,
    system_message=SUB1,
    llm_config=llm_config,
    description="Sub-investigator (Sub-I) is the assistant to the PI, who helps with the tasks of the PI with a step wise research plan with sub-research topics."
)

critic = autogen.AssistantAgent(
    name="Critic",
    system_message="Critic. Double check the work of researcher, coordinator and Planner and provide feedback to improve the quality of the work",
    llm_config=llm_config,
    description="Critic is responsible for double checking the work of researcher, coordinator and Planner and provide feedback to improve the quality of the work"
)


# Definitions for agents
arxiv_rag_agent = ArxivUserProxyAgent(
    name="ResearchUser",
    is_termination_msg=termination_msg,  
    system_message="Assistant who has extra content retrieval power for getting information from data from a arxiv paper store with research papers.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    llm_config=llm_config,  # llm_config must be defined or imported
    retrieve_config={"customized_prompt": QNA_PROMPT, "docs_path": None},  
    code_execution_config=False,
    description="Assistant who has extra content retrieval power for getting information from data from a arxiv paper store with research papers."
)

researchCoordinator = autogen.AssistantAgent(
    name="ResearchCoordinator",
    is_termination_msg=termination_msg,
    system_message=COORDINATOR,  # COORDINATOR should be a predefined string variable
    llm_config=llm_config,
    description="Research coordinator is the person who rephrase research questions into key word queries for the arxiv api."
)

# create a UserProxyAgent instance named "user_proxy"
RC_proxy = autogen.UserProxyAgent(
    name="ResearchCoordinator_proxy",
    human_input_mode="NEVER",
    is_termination_msg=lambda x: "content" in x
    and x["content"] is not None
    and x["content"].rstrip().endswith("TERMINATE"),
    code_execution_config={
        "work_dir": "ResearchCoordinator",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
    description="assist Research coordinator to query for the arxiv api."
)

researcher = autogen.AssistantAgent(
    name="Researcher",
    is_termination_msg=termination_msg,
    system_message=RESEARCHER, 
    llm_config=llm_config,
    description="Researcher is the person who performs the research of paper's summaries."
)



In [6]:
def _reset_agents():
    principalInvestigator.reset()
    subInvestigator.reset()
    arxiv_rag_agent.reset()
    researchCoordinator.reset()
    researcher.reset()

def call_rag_chat(PROBLEM):
    _reset_agents()  # Resets the state of all the agents before starting the chat

    # In this case, we will have multiple user proxy agents and we don't initiate the chat
    # with RAG user proxy agent.
    # In order to use RAG user proxy agent, we need to wrap RAG agents in a function and call
    # it from other agents.
    def retrieve_content(
        message: Annotated[
            str,
            "Refined message which keeps the original meaning and can be used to retrieve content for code generation and question answering.",
        ],
        n_results: Annotated[int, "number of results"] = 3,
    ) -> str:
        arxiv_rag_agent.n_results = n_results  # Set the number of results to be retrieved.
        # Check if we need to update the context.
        update_context_case1, update_context_case2 = arxiv_rag_agent._check_update_context(message)
        if (update_context_case1 or update_context_case2) and arxiv_rag_agent.update_context:
            arxiv_rag_agent.problem = message if not hasattr(arxiv_rag_agent, "problem") else arxiv_rag_agent.problem
            _, ret_msg = arxiv_rag_agent._generate_retrieve_user_reply(message)
        else:
            _context = {"problem": message, "n_results": n_results}
            ret_msg = arxiv_rag_agent.message_generator(arxiv_rag_agent, None, _context)
            
        return ret_msg if ret_msg else message

    # Disable human input mode for `arxiv_rag_agent` since it only retrieves content.
    arxiv_rag_agent.human_input_mode = "NEVER"

    for caller in [researchCoordinator]:
        d_retrieve_content = caller.register_for_llm(
            description="retrieve content for code generation and question answering.", api_style="function"
        )(retrieve_content)

    for executor in [RC_proxy]:
        executor.register_for_execution()(d_retrieve_content)

    # for agent in [researcher, researchCoordinator]:
    #     # Register functions for all agents.
    #     agent.register_function(
    #         function_map={
    #             "retrieve_content": retrieve_content,
    #         }
    #     )

    # Create the GroupChat manager instance.
    groupchat = autogen.GroupChat(
        agents=[principalInvestigator, planner, researchCoordinator, researcher, critic, RC_proxy],
        messages=[],
        max_round=35,
        speaker_selection_method="auto",
        allow_repeat_speaker=False,
    )

    

    manager = autogen.GroupChatManager(
        groupchat=groupchat,
        llm_config=manager_config,
    )

    # Initialize the chat with the primary investigator as the proxy agent.
    principalInvestigator.initiate_chat(
        manager,
        message=PROBLEM
    )

    # Start chatting with the primary investigator acting as the user proxy agent.
    return principalInvestigator.chat_messages

# Example usage:
PROBLEM = "Can you create an overview of the modelling of reliability and safety mechanisms. As an output, I need the content for a slide I plan to use in a presentation."
messages = call_rag_chat(PROBLEM)
print(messages)


[33mPI[0m (to chat_manager):

Can you create an overview of the modelling of reliability and safety mechanisms. As an output, I need the content for a slide I plan to use in a presentation.

--------------------------------------------------------------------------------


[33mPlanner[0m (to chat_manager):

To accomplish the task of creating an overview of the modeling of reliability and safety mechanisms for a presentation slide, we will follow a multi-step plan that involves a Research Coordinator and a Researcher. The plan is detailed below:

1. **Understanding the Task (Research Coordinator)**: The Research Coordinator will first understand the specifics and scope of what is needed for the presentation slide. They will clarify with the Principal Investigator (PI) if the focus is on a specific industry, type of system, or any particular methodologies of interest.

2. **Initial Research Question Formulation (Research Coordinator)**: Based on the provided information, the Research Coordinator will formulate initial research questions that will serve as the basis for literature search. For instance, "What are the current models for assessing reliability in engineered systems?" or "What safety mechanisms are commonly modeled in high-risk industries?"

3

In [7]:
# Example usage:
PROBLEM = "Write a comperhansive blog post about the modelling of reliability and safety mechanisms in AI system. The focus should be on Large Language Models."
messages = call_rag_chat(PROBLEM)
print(messages)

[33mPI[0m (to chat_manager):

Write a comperhansive blog post about the modelling of reliability and safety mechanisms in AI system. The focus should be on Large Language Models.

--------------------------------------------------------------------------------


[33mResearchCoordinator[0m (to chat_manager):

Title: Ensuring Trustworthiness: Modeling Reliability and Safety in Large Language Models

Introduction:
The era of Artificial Intelligence (AI) has ushered in remarkable technological advancements, particularly in the field of Large Language Models (LLMs) — AI systems engineered to understand, generate, and interact with human language at an unprecedented scale. Alongside their immense potential, these systems also raise significant reliability and safety challenges. In this blog post, we will explore how the AI research community is approaching the modeling of reliability and safety mechanisms in LLMs, with a focus on ensuring these models can be trusted by users and leveraged safely across various applications.

Defining Reliability and Safety in AI:
Before diving into modeling techniques, it's essential to clarify what we mean by reliability and safety in the context of LLMs. Reliability refers to the consistent performance of the mo

In order to ensure reliability and safety in Large Language Models (LLMs), several strategies and approaches are highlighted in recent research:

1. **General Safety Evaluation Framework**:
   - The survey provides a framework for evaluating and improving safety in large models, emphasizing the importance of a comprehensive approach to managing safety risks [Towards Safer Generative Language Models: A Survey on Safety Risks, Evaluations, and Improvements].

2. **Adversarial Training and Detection**:
   - Research introduces the Adversarial Prompt Shield (APS), which is designed to improve detection accuracy and resist adversarial prompts that aim to elicit harmful responses from LLMs [Robust Safety Classifier for Large Language Models: Adversarial Prompt Shield].
   - Novel strategies for creating adversarial training data, such as the Bot Adversarial Noisy Dialogue (BAND) datasets, have been proposed to enhance the robustness of safety classifiers [Robust Safety Classifier for Large Language Models: Adversarial Prompt Shield].

3. **Risk Factors and Guidelines for Safety-Critical Tasks**:
   - For safety-critical tasks, it is essential to focus on failure and out-of-distribution (OOD) detection, overfitting identification, uncertainty quantification, and robustness to data perturbations [Building Safe and Reliable AI systems for Safety Critical Tasks with Vision-Language Processing].
   - Improvements in model uncertainty quantification are regarded as vital, and ongoing work endeavours to develop current techniques further to ensure accuracy in model uncertainty for safety-related tasks [Building Safe and Reliable AI systems for Safety Critical Tasks with Vision-Language Processing].

4. **Agent Constitution-Based Framework for Trustworthiness**:
   - An Agent-Constitution-based agent framework named TrustAgent has been explored to improve the trustworthiness of LLM-based agents. This includes pre-planning strategies to embed safety knowledge, in-planning strategies to bolster safety during plan generation, and post-planning strategies for safety inspection [TrustAgent: Towards Safe and Trustworthy LLM-based Agents through Agent Constitution].

The challenges related to the alignment problem, where large language models should consistently behave in ways that align with human values while remaining useful and versatile, are also highlighted. The current strategies for alignment appear to be insufficient, as LLMs are vulnerable to adversarial attacks, which is deeply tied to their functionality and flexibility [The Alignment Problem in Context].

In summary, ensuring the reliability and safety of LLMs is multifaceted, requiring robust training, adversarial training and detection, and careful consideration of the factors listed above. These strategies are at the forefront of ongoing research efforts to build AI systems that are not only advanced but also aligned with safety and ethical standards.
