## Initialize the project 

In [1]:

from typing import Dict, List, Optional, Union, Callable, Literal
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen.formatting_utils import colored
from typing_extensions import Annotated
import autogen
from autogen import Agent
from autogen.token_count_utils import count_token, get_max_token_limit
from autogen.agentchat.contrib.capabilities import transform_messages, transforms

from teachability import Teachability
from concurrent.futures import ThreadPoolExecutor, as_completed

import arxiv

import requests


import os
import shutil
import requests
import pickle
import re
from pathlib import Path

import nest_asyncio
nest_asyncio.apply()

import sqlite3
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(autogen.__version__)

0.2.24


### parameters

In [3]:
version = "0.1.2"
ProjectID = "summarization_Metrics"
initiate_db = False

config_file = "OAI_CONFIG_LIST"
max_round = 30
silent = False
recall_threshold = 1.2 
# config_file = "OAI_CONFIG_LIST"

topic = "A Comprehensive Guidance on Assessing LLM Models appilication: Evaluating Relevance, Completeness, Clarity, Accuracy, Coherence, and Engagement."

task = """
As an esteemed authority in the realm of Natural Language Processing (NLP) and Large Language Models (LLMs), we cordially invite you to share your enlightened perspectives through a scientifically-rigorous article titled, '{topic}'

The article should be structured into a maximum of seven sections, with at least three centering on an in-depth discussion of technical methodologies. Your expertise will provide insightful navigation for our audience on the complexities of ensuring AI operates within secure and reliable parameters, specifically focusing on LLMs.

Here is a structural blueprint for your incisive contribution:

- **Main Tenet:** Ground your discussion in the realm of Large Language Models, underlining their vital role in the prevailing AI setting and emphasizing why their safety and reliability are of utmost importance.

- **Trailblazing Progress:** Discuss the latest methodologies and innovations that are leading the way in AI safety and reliability. Highlight, with specific references to original research, how these groundbreaking developments are molding the future of accountable AI cultivation and deployment.

- **Comprehensible Understanding:** Although your post will be data-rich, it needs to present complex ideas in a digestible format for those outside the tech industry. The prime objective is to educate and enlighten without overwhelming.

- **Authoritative Sources:** It's crucial to fortify your narrative with references from research, studies, and other recognized resources that have informed your insights. Also, include these references for readers who wish to delve deeper into the subject matter.

- **Current Outlook:** Present your insights in line with the vanguard of the AI field by incorporating up-to-the-minute findings and research. Your post should serve as the go-to resource for anyone searching for the current state of AI safety and dependability mechanisms.

This article serves as a chance to disseminate knowledge, foster a profound understanding, and raise appreciation for persistent efforts in crafting reliable and safer AI systems. Your input will, undeniably, light the way for those unraveling the intricacies of AI in our progressively digitalizing world.

Remember, you are equipped with a function that can read arxiv's papers (full and summary) for you. If you need information to supplement your existing knowledge base, do not hesitate to use it.

"""


Project_dir = Path(f"./{ProjectID}/{version}")

if not os.path.exists(Project_dir): initiate_db = True

output_dir = f'{Project_dir}/pdf_output'
if not os.path.exists(output_dir): 
    os.makedirs(output_dir)
    os.makedirs(f"{output_dir}/json")
    os.makedirs(f"{output_dir}/markdown")


db_dir = f'{Project_dir}/memo-db/'


if initiate_db:


    if not os.path.exists(Project_dir): 
        shutil.rmtree(Project_dir)
        os.makedirs(Project_dir)
    if os.path.exists(db_dir): shutil.rmtree(db_dir)

    # create a table of papers and abstracts that have been read and saved it in a pickle file
    init_db(Project_dir)

config_list = autogen.config_list_from_json(
    config_file,
    file_location=".",
    filter_dict={
        "model": ["gpt-4-32k", "gpt-4"]#, "gpt4", "gpt-35-turbo-16k", "gpt-4-0613", "gpt-3.5-turbo", "gpt-35-turbo", "gpt-35-turbo-0613"]
    },
)

print("LLM models: ", [config_list[i]["model"] for i in range(len(config_list))])

# Configuration for the Language Model (LLM)
llm_config = {
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 120,
    # "seed": 42,
}

# Configuration for the manager using the same config_list as llm_config
manager_config = {
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 60,
    # "seed": 42,
}

# Termination message definition
termination_msg = (
    lambda x: isinstance(x, dict)
    and str(x.get("content", "")).upper() == "TERMINATE"
)



LLM models:  ['gpt-4', 'gpt-4-32k']


## Helper function

### database helper function

In [4]:


def pdf2md_chunck(url):
    if url[-4:] != ".pdf":
        pdf_filename = url.split('/')[-1] + ".pdf"
    else:
        pdf_filename = url.split('/')[-1]

    if url.startswith("http"):
        pdf_path = os.path.join(output_dir, pdf_filename)
        # Download the PDF
        download_pdf(url, pdf_path)
    else:
        pdf_path = url

    data = analyze_and_save_pdf(f"file://{pdf_path}", f"{output_dir}/json")

    docs, pagecontent, fullmdtext = create_docs(data, 3000, pdf_filename)

    # write fullmdtext to a file
    with open(f"{output_dir}/markdown/{pdf_filename}.md", "w") as f:
        f.write(fullmdtext)

    return docs

url = "https://arxiv.org/pdf/2404.05993v1.pdf"
# docs = pdf2md_chunck(url)


## teach agent for some skills

In [5]:
def create_teachable_groupchat(assitant_name, user_name, db_dir, config_list, verbosity=0):
    
    # Start by instantiating any agent that inherits from ConversableAgent.
    assistant = autogen.ConversableAgent(
        name=assitant_name,  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
    )

    # Instantiate the Teachability capability. Its parameters are all optional.
    teachability = Teachability(
        verbosity=verbosity,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
        reset_db=False,  
        path_to_db_dir=db_dir,
        recall_threshold=1.5,  # Higher numbers allow more (but less relevant) memos to be recalled.
    )

    # Now add the Teachability capability to the agent.
    teachability.add_to_agent(assistant)

    user = autogen.UserProxyAgent(
        name=user_name,
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        max_consecutive_auto_reply=0,
        code_execution_config={"use_docker": False},
    )

    return assistant, user

In [6]:
if initiate_db:
    prompt = "For each memorization task, initiate your process with 'MEMORIZE_ARTICLE:'  \n\n' Delve into the passage to discern and assess its key insights. If the content presents noteworthy information, make a point to memorize these details. Conversely, if the passage does not offer significant insights, there's no need to commit it to memory. Upon choosing to memorize, you MUST finalize your notes by including both the article's title and its URL, employing the format '[source: article_title, article_url]' for efficient future access and verification."

    instract_assistant, instract_user = create_teachable_groupchat("instract_assistant", "instract_user", db_dir, config_list, verbosity=3)

    instract_user.initiate_chat(instract_assistant, silent=True, message=prompt)

## Define functions

### Arxiv funcs

In [7]:

text = "Human-Centred Learning Analytics and AI in Education: a Systematic Literature Review"
# arxiv_search(query=text)

In [8]:


# get_paper_metadata('https://arxiv.org/abs/1810.04805')
# get_paper_metadata('https://arxiv.org/pdf/1810.04805.pdf')
# get_paper_metadata('1810.04805')

### arxiv retrieval

In [9]:
from utils import _arxiv_search

def initiate_chat_with_paper_info(paper, query):

    # Create a TeachableAgent and UserProxyAgent to represent the researcher and the user, respectively.
    arxiver, arxiver_user = create_teachable_groupchat("arxiver", "arxiver_user", db_dir, config_list, verbosity=0)
    try:
        arxiver_user.initiate_chat(arxiver,
                        silent=True,
                        message=f"The following article is one of the articles that I found for '{query}' topic: \n\n '{paper.title}' by {paper.authors} updated on {paper.updated}: {paper.pdf_url} \nsummary: {paper.summary} \n?")
        
        add_paper_to_db(paper.pdf_url, "read_abstracts", Project_dir)  # Add paper to the database after initiating the chat
        return f"Title: {paper.title} Authors: {paper.authors} URL: {paper.pdf_url} os added to MEMOS\n\n "
        
    except Exception as e:
        print(f"Error: {e}")

def process_query(query, n_results):
    """Function to process each query and initiate chats for each paper found."""
    papers = _arxiv_search(query, n_results=n_results)

    # check if the abstract has been read before
    papers = [paper for paper in papers if not check_paper_in_db(paper.pdf_url, "read_abstracts", Project_dir)]

    
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(initiate_chat_with_paper_info, paper, query) for paper in papers]
        for future in as_completed(futures):
            future.result()

def arxiv_retriever(queries: Annotated[List[str], "The list of query texts to search for."], 
                    n_results: Annotated[int, "The number of results to retrieve for each query."] = 10,
                    ) -> str:

    
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_query, query_text, n_results) for query_text in queries]
        for future in as_completed(futures):
            future.result()

    # Instantiate a UserProxyAgent to represent the user. But in this notebook, all user input will be simulated.
    return f"Dear Researcher, Database updated with on the following topics: {', '.join(list(queries))}. Please go ahead with your task."
    # return message

message = ["Large Language Models", "Assessing Language Models", "AI safety and reliability"]
if initiate_db:
    arxiv_retriever(message, n_results=10)

# arxiv_retriever(message, n_results=3)

### read pdfs

In [10]:
def check_reasoning(reason, summary):
    
    # Start by instantiating any agent that inherits from ConversableAgent.
    assistant = autogen.AssistantAgent(
        name="reasoning_checker",  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
    )

    user = autogen.UserProxyAgent(
        name="user",
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        max_consecutive_auto_reply=0,
        code_execution_config={"use_docker": False},
    )

    chat_hist = user.initiate_chat(assistant, silent=True, message=f"check if \"{reason} is a good reason is to read a paper with the following summary: {summary} /n/n answer only with 'yes' or 'no'")
    return chat_hist.chat_history[-1]['content']

def initiate_chat_read_paper(text, article):
    paper_reader, reader_user = create_teachable_groupchat("paper_reader", "reader_user", db_dir, config_list, verbosity=0)
    try:
        reader_user.initiate_chat(paper_reader,
                        silent=True,
                        message=f"MEMORIZE_ARTICLE: The following passage is extracted from an article titled '{article}': \n\n {text}."
                        )
    except Exception as e:
        print(f"Error: {e}")
        print(colored(f"text: {text}", "red"))
    
def chunk_pdf(url, title):
    
    chunked_elements = pdf2md_chunck(url)

    # find checked_elemnt that includes "REFERENCES" in the second half of the text

    half_length = len(chunked_elements) // 2
    for i, chunk in enumerate(chunked_elements[half_length:], start=half_length):
        chunk_text_upper = chunk.page_content.upper()
        if re.search(r'\bREFERENCE\b', chunk_text_upper) or re.search(r'\bREFERENCES\b', chunk_text_upper):
            # remove the chunck with '\bREFERENCE\b' from chuncked_elements list
            chunked_elements = chunked_elements[:i]
            break

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(initiate_chat_read_paper, chunk.page_content, title) for chunk in chunked_elements if len(chunk.page_content.split()) > 30]
        for future in as_completed(futures):
            future.result()

    add_paper_to_db(url, "read_papers", Project_dir)  # Add paper to the database 


"""
This `get_pdfss` function is designed to download a PDF from a given URL, extract its content, 
partition the content into chunks based on titles, and then initiate a chat to share and memorize 
each chunk of the article with a teachable agent and a user.
"""
def get_pdfs(urls: Annotated[List[str], "The list of URLs of the papers to read."],
            reasons: Annotated[List[str], "The list of reasons for reading the papers. it should be same size as urls list."]
            ) -> str:
    
    urls_list = []
    titles_list = []
    message = ''
    for url in urls:

        title, link, updated, summary, pdf_url, paper_id, _ = get_paper_metadata(url)
        
        title = f"{title} [{pdf_url}] updated {updated}"
        
        
        if check_paper_in_db(pdf_url, "read_papers", Project_dir):
            print(f"The article, '{title}', has already been read and shared with you in your memory.")
            message += f"The article, '{title}', has already been read and shared with you in your memory.\n"
            continue
        else:
            if not initiate_db:
                check_reason = check_reasoning(reasons[urls.index(url)], summary)
                if 'no' in check_reason.lower():
                    print(f"The article, '{title}', does not meet the criteria for reading.")
                    message += f"The article, '{title}', does not meet the criteria for reading.\n"
                    continue
            urls_list.append(pdf_url)
            titles_list.append(title)

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(chunk_pdf, url, title) for url, title in zip(urls_list, titles_list)]
        for future in as_completed(futures):
            future.result() 

    num_papers = count_papers_in_db("read_papers", Project_dir)
    print(f"{num_papers} articles have been read, so far.")
    message += f"The articles {', and '.join(titles_list)}  has been read and the content has been shared with you in your memory."
    return message

# Example usage
args = {
"urls": ["http://arxiv.org/pdf/2305.13267v1", "http://arxiv.org/pdf/2305.06530v1"],
"reasons": ['factual_check'] * 40
# ["To understand how the safety performance of LLMs is assessed in typical safety scenarios and instruction attacks.", "To explore the landscape of AI deception focusing on LLMs and the strategies to navigate deceptive behaviors.", "To gain insights into the safety issues, evaluation methods, and enhancement strategies concerning large models.", "To examine the impact of moderation on user enjoyment of AI systems.", "To comprehend methods for robust safety evaluation of LLMs and uncover safety concerns.", "To learn about the reliability of LLMs in generalizability, social biases, calibration, and factuality.", "To uncover the alignment problem in LLMs and its implications for the safety of AI systems.", "To evaluate the safety of VLMs and their vulnerability to jailbreaking attacks.", "To comprehend the framework for evaluating the capability of LLMs in Chinese Journalistic Writing Proficiency and their Safety Adherence.", "To assess the risk taxonomy of AI content and the effectiveness of the AEGIS model.", "To understand how NeuroSymbolic AI approach helps in creating trustworthy AI systems."]
}
if initiate_db:
    for i in range(0, len(args['urls']), 10):
        get_pdfs(args['urls'][i:i+5], args['reasons'][i:i+5])
        
get_pdfs(**args)

The article, 'Enhance Reasoning Ability of Visual-Language Models via Large Language Models [http://arxiv.org/pdf/2305.13267v1] updated 2023-05-22 17:33:44+00:00', has already been read and shared with you in your memory.


The article, 'How Good are Commercial Large Language Models on African Languages? [http://arxiv.org/pdf/2305.06530v1] updated 2023-05-11 02:29:53+00:00', has already been read and shared with you in your memory.
(13,) articles have been read, so far.


"The article, 'Enhance Reasoning Ability of Visual-Language Models via Large Language Models [http://arxiv.org/pdf/2305.13267v1] updated 2023-05-22 17:33:44+00:00', has already been read and shared with you in your memory.\nThe article, 'How Good are Commercial Large Language Models on African Languages? [http://arxiv.org/pdf/2305.06530v1] updated 2023-05-11 02:29:53+00:00', has already been read and shared with you in your memory.\nThe articles   has been read and the content has been shared with you in your memory."

### read pdf

In [11]:
PartChoice = Literal['summary', 'full']

def _momorized_paper_summary(title, updated, summary, pdf_url, authors):

    # Create a TeachableAgent and UserProxyAgent to represent the researcher and the user, respectively.
    arxiver, arxiver_user = create_teachable_groupchat("arxiver", "arxiver_user", db_dir, config_list, verbosity=0)
    try:
        arxiver_user.initiate_chat(arxiver,
                        silent=True,
                        message=f"MEMORIZE_ARTICLE: \n\n '{title}' by {authors} updated on {updated}: {pdf_url} \nsummary: {summary} \n?")
        
        return f"Title: {title} Authors: {authors} URL: {pdf_url} os added to MEMOS\n\n "
    except Exception as e:
        print(f"Error: {e}")

def get_pdf(url: Annotated[str, "The URL of the paper to read."],
            reason: Annotated[str, "reason for reading the paper."],
            part: Annotated[PartChoice, "choose do you need entire paper ('full') or a summary is enough."],
            ) -> str:

    message = ''
    title, link, updated, summary, pdf_url, paper_id, authors= get_paper_metadata(url)

    if part == 'summary':
        _momorized_paper_summary(title, updated, summary, pdf_url, authors)
        return f"Title: {title} Authors: {authors} URL: {pdf_url} \n\n Summary: {summary}"

    title = f"{title} [{pdf_url}] updated {updated}"
        

    if check_paper_in_db(pdf_url, "read_papers", Project_dir):
        print(f"The article, '{title}', has already been read and shared with you in your memory.")
        message += f"The article, '{title}', has already been read and shared with you in your memory.\n"
    else:
        if reason != 'factual_check':
            check_reason = check_reasoning(reason, summary)
            if 'no' in check_reason.lower():
                return f"The article, '{title}', does not meet the criteria for reading."
            
        chunk_pdf(pdf_url, title)

    md_filename = f"{get_paper_id(pdf_url)}.pdf.md"
    md_path = os.path.join(f"{output_dir}/markdown", md_filename)

    with open(md_path, "r") as f:
        content = f.read()

    return content

# Example usage
# get_pdf("http://arxiv.org/pdf/2312.01090v2", "Verify study findings on LLM-based agents in wargames.", "full")


### factual check

In [12]:
def url_check(paper_url: Annotated[str, "The URL of the paper to check."],
            paper_title: Annotated[str, "The title of the paper to be used for fact checking."],
            ):
    if paper_url.find('arxiv.org') == -1:
        return False, f"The provided paper URL, {paper_url}, is not from arxiv.org. Please provide a valid arxiv URL."

    title, link, updated, summary, pdf_url, paper_id, _ = get_paper_metadata(paper_url)
    if title != paper_title:
        return False, f"The provided paper URL, {paper_url}, is not for the paper titled '{paper_title}'. Please provide a valid arxiv URL for the paper."
    
    return True, f"The provided paper URL is from arxiv.org and is for the paper titled '{paper_title}'."

def factual_check(text: Annotated[str, "The writer text to be factually checked."],
                    paper_title: Annotated[str, "The title of the paper to be used for fact checking."],
                    paper_url: Annotated[str, "The arxiv URL of the paper to be used for fact checking."],
                    reason: Annotated[str, "The reason for reading the paper."],
                    paper_authors: Annotated[Optional[str], "The authors of the paper to be used for fact checking."]=None,
                    ) -> str:
    
    url_check_res, message = url_check(paper_url, paper_title)
    if not url_check_res:
        return message

    paper_content = get_pdf(paper_url, reason='factual_check', part='full')

    factual_checker_prompt = """
Below, you will find a passage labeled "TEXT" that references a specific paper: '{paper}' alongside its corresponding "PAPER_CONTENT." Your task is to read the "PAPER_CONTENT" and verify the factual accuracy of the "TEXT" as it pertains to the paper.

Once you have assessed the factual accuracy, you MUST provide feedback, begining with 'FEEDBACK:'. Following your assessment, please write a summary of the paper. Begin this summary with 'Summary of {paper}: '

TEXT:
{text}

PAPER_CONTENT:
{paper_content}
"""

    # Start by instantiating any agent that inherits from ConversableAgent.
    factual_checker = autogen.AssistantAgent(
        name="factual_checker",  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
        system_message = "You are a factual_check AI assistant. You are responsible for verifying the factual accuracy of the text provided in relation to the paper content."
        )

    # create a UserProxyAgent instance named "user_proxy"
    factual_checker_user = autogen.UserProxyAgent(
        name="factual_checker_user",
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        code_execution_config=False,
    )

    # let check token limit
    limit = 4096 - 1024
    try:
        limit = get_max_token_limit(factual_checker.llm_config["config_list"][1]["model"]) - 1024  # type: ignore[index]
    except ValueError:
        pass  # limit is unknown
    except TypeError:
        pass  # limit is unknown

    # Limit the token limit per message to avoid exceeding the maximum token limit
    # suppose this capability is not available
    context_handling = transform_messages.TransformMessages(
        transforms=[
            transforms.MessageTokenLimiter(max_tokens=limit, model=factual_checker.llm_config["config_list"][1]["model"]),
        ]
    )
    print(f"factual_check model: {factual_checker.llm_config['config_list'][1]['model']}")
    context_handling.add_to_agent(factual_checker)

    if paper_authors:
        paper = f"{paper_title} [{paper_url}] by {', '.join(list(paper_authors.split(',')))}"
    else:
        paper = f"{paper_title} [{paper_url}]"


    chat = factual_checker_user.initiate_chat(factual_checker, silent=False, max_turns=1,
                                              message=factual_checker_prompt.format(text=text, paper_content=paper_content, paper=paper))

    return chat.chat_history[-1]['content']

args = []
# factual_check(**args[1])

## Define Agents

## add functions to agents

In [13]:
funcs = [
    ("arxiv_retriever", arxiv_retriever, "Retrieve summeries of papers from arxiv for give query."),
    ("get_pdfs", get_pdfs, "Retrieve the content of the pdf files from the urls list."),
    ("get_pdf", get_pdf, "Retrieve the content of the pdf file from the url."),
    ("factual_check", factual_check, "Check the factual accuracy of a given text based on a paper."),
    ("arxiv_search", arxiv_search, "retrun the pdf url from arxiv for the given paper title."),
]


def add_func_to_agents(assignments, funcs=funcs):

    # example input 
    # assignments = [(assistants, users, "arxiv_retriever"), (assistants, users, "get_pdfs") ]
    # funcs = [("arxiv_retriever", arxiv_retriever, "Retrieve content for question answering from arxiv."),
    #          ("get_pdfs", get_pdfs, "Retrieve the content of the pdf file from the url.")]

    func_dict = {}
    func_disc_dict = {}
    for func_name, func, func_disc in funcs:
        func_dict[func_name] = func
        func_disc_dict[func_name] = func_disc

    for assignment in assignments:
        caller, executor, func_name = assignment
        autogen.agentchat.register_function(
            func_dict[func_name],
            caller=caller,
            executor=executor,
            name=func_name,
            description=func_disc_dict[func_name]
        )


    return f"Functions {', '.join([func_name for func_name, _, _ in funcs])} are added to the agents."

### Write sections

In [14]:
Section_writer_SP = """
You are now part of a group chat dedicated to completing a collaborative blog project. As a data_research_writer, your role is to develop a well-researched section of a blog post on a specified topic. You will follow a detailed brief that outlines the necessary content for each part of the section.

Guidelines:

1. Ensure all content is thoroughly researched and supported by data from our database. Verify all information using the MEMOS tool to confirm accuracy and completeness.
2. Each draft segment must include citations. Please list the title, URL, and authors of each cited paper at the end of your section.
3. If you encounter any uncertainties or need clarification, contact the group chat manager for immediate assistance. Additional help from other participants may be provided if necessary.
4. Your responsibilities include maintaining strong communication, showcasing precise research skills, paying meticulous attention to detail, and proactively seeking assistance when needed.
5. Incorporate any team feedback into your revisions promptly. This is crucial to ensure that the final text is polished and meets our editorial standards.

Formatting Requirements:

Start your text with 'TXT:' and end with 'END_TXT'. This format is crucial for the group chat manager to accurately identify your contributions.
You MUST mention the listion of citation at enad of your section and each citation MUST include the title of the paper, its URL, and authors.
Upon completing your section, integrating all feedback, and ensuring all parts are reviewed and properly referenced, signify your completion by typing "TERMINATE" in the group chat.
"""

section_content_reviwer_sp = """
You are now in a group chat tasked with completing a specific project. As a Content Review Specialist, your primary goal is to ensure the quality, accuracy, and integrity of the content produced by the data_research_writer, aligning with the data from our database. Your responsibilities include:

1. Overseeing the structure and content of the blog post to ensure each section is well-defined and adheres to the overarching theme.
2. Collaborating closely with the Writer to understand the breakdown and specific requirements of the blog text.
3. Reviewing drafts with the Writer to confirm factual accuracy, high-quality writing, and inclusion of references to pertinent data in the database. Utilize the 'factual_check' function to verify all textual references. Calling 'factual_check' function, provide you with a summery of the paper, please print the summeries afer your feedbacks.
4. Cross-checking content against your MEMOS to identify any discrepancies or missing data, requesting updates from the manager if necessary.
5. Offering constructive feedback to the writers and ensuring revisions are made swiftly to adhere to the publishing timeline.
6. Ensuring content integrity by verifying proper citations and the use of credible sources.
7. Seeking clarification or assistance from the group chat manager if uncertainties or confusion arise during the review process, allowing for additional participant support if needed.
8. Motivating the writing team to conclude the task only when the content meets all quality standards and fully satisfies the task requirements. Participants should signal the completion of their roles by typing "TERMINATE" in the group chat to indicate that the review process is concluded and the blog post is ready for publication.
"""

def write_section(title: Annotated[str, "The title of the section."], 
                  brief: Annotated[str, "a clear, detailed brief about what section should be included."],
                  silent: Annotated[bool, "it should be always True."]=True
                  ) -> str:
    
    # Start by instantiating any agent that inherits from ConversableAgent.
    data_research_writer = autogen.AssistantAgent(
        name="data_research_writer",  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
        system_message=Section_writer_SP,
        description="data_research_writer, crafts detailed sections of a blog post based on a specific topic outlined in a brief. They ensure content is well-researched, referenced, and integrates database information."
    )

    # create a UserProxyAgent instance named "user_proxy"
    writer_user = autogen.UserProxyAgent(
        name="writer_user",
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        code_execution_config={
            "work_dir": "section_writing",
            "use_docker": False,
        },
    )

    content_review_specialist = autogen.AssistantAgent(
                                    name="content_review_specialist",
                                    is_termination_msg=termination_msg,
                                    system_message=section_content_reviwer_sp, 
                                    llm_config=llm_config,
                                    description="The content review specialist is a critical thinker who ensures the accuracy and quality of information shared within the group chat. This individual should possess strong analytical skills to review previous messages for errors or misunderstandings and must be able to articulate the correct information effectively. Additionally, if the role involves reviewing Python code, the specialist should also have a solid understanding of Python to provide corrected code when necessary."
                                )
    
    teachability = Teachability(
                                verbosity=0,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
                                reset_db=False,
                                path_to_db_dir=db_dir,
                                recall_threshold=recall_threshold,  # Higher numbers allow more (but less relevant) memos to be recalled.
                            )

    # Now add the Teachability capability to the agent.
    teachability.add_to_agent(data_research_writer)
    teachability.add_to_agent(content_review_specialist)

    add_func_to_agents([(content_review_specialist, writer_user, "arxiv_retriever"), 
                        (content_review_specialist, writer_user, "factual_check"),
                        (content_review_specialist, writer_user, "arxiv_search"),
                        (content_review_specialist, writer_user, "get_pdf"),
                        ])

    groupchat = autogen.GroupChat(
        agents=[data_research_writer, writer_user, content_review_specialist],
        messages=[],
        speaker_selection_method="auto",  # With two agents, this is equivalent to a 1:1 conversation.
        allow_repeat_speaker=True,
        max_round=max_round,
    )

    manager = autogen.GroupChatManager(
                groupchat=groupchat,
                is_termination_msg=termination_msg,
                llm_config=manager_config,
                code_execution_config={
                    "work_dir": "coding",
                    "use_docker": False,
                },
            )

    chat_hist = writer_user.initiate_chat(manager, 
                                          silent=silent, 
                                          message=f"Compose a blog section with the following guidelines: \n\n Title: {title}, \n\n Brief: {brief} \n\n Please ensure your writing aligns closely with the brief provided, capturing the essence of the topic while engaging the reader. The section should be coherent, well-structured, and reflective of the main themes outlined in the brief.")
    # prepare the response\n",
    writer_messages = [mes for mes in chat_hist.chat_history if 'TXT:' in mes['content']]

    output = writer_messages[-1]['content'] if writer_messages else "No response from the writer."

    # write output in f"{Project_dir}/section_{title}.txt"
    with open(f"{Project_dir}/section_{title}.txt", "w") as f:
        f.write(output)
    
    return output


funcs.append(("write_section", write_section, "Write a section of a blog post based on a given title and brief."))

arg = [
    {
        "title": "Embracing Large Language Models (LLMs): A Preamble",
        "brief": "Discuss the scale, data training needs, and applications of LLMs across various industries. Highlight the critical importance of safety measures and the need for reliable performance.",
        "silent": True
        },
    ]

# write_section(**arg[0])

### editorial planning

In [15]:
# If you discover that some data is missing during your research, it is your responsibility to initiate a request to fill in the gaps by using the \"arxiv_retriever\" function to enrich the database.
# If a complete review of a paper is necessary, use the \"get_pdfs\" function to access the document. This will enable you to provide detailed insights and ensure the accuracy of the information presented in the blog post.

# 1. Ensure all content is thoroughly researched and supported by data from our database. Verify all information using the MEMOS tool to confirm accuracy and completeness.

CONTENT_REVIEWER = """
You are now in a group chat. You need to complete a task with other participants. As a Content Review Specialist, your main objective is to ensure the quality, accuracy, and integrity of the content produced by the data_research_writer, in line with the data provided in the database. You will:

1. Oversee the structure and content of the blog post to ensure each section is well-defined and adheres to the overall topic.
2. Collaborate with the Writer to understand the division of the blog text and the specific requirements for each part.
3. Work with the writer to review the drafts, ensuring that the content is factually correct, well-written, and includes references to the relevant data in the database.
4. Cross-verify the content against your MEMOS to identify any missing data or discrepancies. If some data is missing, ask manager to update you MEMO
5. If a complete review of a paper is necessary, use the 'get_pdf' function to access the document, enabling you to provide detailed and informed feedback to the writer.
6. Provide constructive feedback to the writers, ensuring any revisions are completed promptly to maintain the publishing schedule.
7. Uphold the integrity of the content by checking for proper citations and the use of verifiable sources.
8. If uncertainty or confusion arises during the review process, do not hesitate to ask for clarification or assistance from the group chat manager so that another participant may step in to support.
9. Encourage the writer team to conclude the task only when the content meets all quality standards and the task requirements are fully satisfied. The participants should reply \"TERMINATE\" when they believe the task is completed to notify that the review process is concluded, and the blog post is ready for publication.
"""

COORDINATOR = """You are a Research coordinator: This is the person who coordinates the various aspects of the research project. 
you are equipped wih a tool that could help you to query for the arxiv api. 
You MUST rephrase research questions into a list of queries (at least 5) for the arxiv api that cover the key aspects of the research questions. 
"""



In [16]:
BLOG_EDITOR = """
You are now part of a group chat dedicated to crafting a data-driven, well-structured blog post. As the blog editor, your leadership is key in coordinating the creation process. Here’s a breakdown of your main responsibilities:

1. Analyze the Topic: Evaluate the topic comprehensively to pinpoint essential points that the blog post should cover.
2. Structure the Content: Organize the blog into up to seven distinct sections. Collaborate with a critic to refine the outline and provide detailed briefs to the Data Research Writers about the needed content for each section. Before delegating tasks to the writers, ensure the critic approves the outline.
3. Coordinate with Writers: Collect drafts from the Data Research Writers. Collaborate with the Chief Writer to weave these drafts into the final blog post.
4. Handle Uncertainties: Actively resolve any issues such as missing data or technical challenges by consulting the group chat. If issues remain unresolved, escalate them to the group chat manager.
5. Facilitate Communication: Keep the lines of communication open for feedback and updates, ensuring all team members are well-informed of the blog post’s progress.

Note: This role centers on content creation, data analysis, and team management, without requiring programming skills.

Formatting Requirements:
Always include a structured outline of the blog post in your responses:
Start with OUTLINE:
Structure the outline with clear headings and subheadings, each labeled with a number, followed by 'TITLE:' and 'BRIEF:'.
Conclude the outline with END_OUTLINE.
Type 'TERMINATE' when you have completed outlining the blog post.
"""
CRITICS_SP = """
As a critic, your role is integral to refining the content quality and structure of our blog post. Working closely with the blog editor, your responsibilities include:

Review Outlines: Examine the structure and outline of the blog post provided by the editor to ensure it logically flows and adequately covers the designated topic.
Evaluate Content: Critically assess each section drafted by the writers for coherence, relevance, and alignment with the overall topic. Suggest improvements or modifications where necessary.
Ensure Depth and Precision: Verify that the content is not only factually accurate but also insightful and engaging. Check for depth of analysis and argumentation within each section.
Provide Constructive Feedback: Offer detailed feedback to the editor and writers to enhance the clarity, impact, and readability of the blog post.
Maintain Communication: Stay active in the group chat, providing timely and actionable feedback. Collaborate effectively with the editor to address any discrepancies or gaps in content.
Final Approval: Contribute to the final review process, ensuring that the content meets all specified criteria before publication. Recommend final adjustments if necessary.
Your role requires a keen eye for detail and a deep understanding of content quality and structure. By providing expert critique and guidance, you help ensure the blog post is informative, engaging, and ready for a successful publication.
"""

def craft_outline(task, silent=True, max_round=max_round):
    # Start by instantiating any agent that inherits from ConversableAgent.
    blog_editor = autogen.AssistantAgent(
        name="blog_editor",  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config=llm_config,
        system_message=BLOG_EDITOR,
        description="The blog editor is central to orchestrating a collaborative blog project, leading the writer team to produce a cohesive, data-driven post. They analyze topics, structure content, coordinate contributions, and manage communications, ensuring the project adheres to editorial standards and is ready for successful publication."
    )

    critic = autogen.AssistantAgent(
        name="critic",
        system_message=CRITICS_SP,
        llm_config=llm_config,
        description="The critic collaborates with the blog editor to enhance the quality and structure of blog posts. They evaluate content, ensure depth, provide feedback, and assist in the final review to ensure the post is insightful, engaging, and publication-ready."
    )

    # create a UserProxyAgent instance named "user_proxy"
    editor_user = autogen.UserProxyAgent(
        name="editor_user",
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        code_execution_config=False,
    )

    teachability = Teachability(
                                verbosity=0,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
                                reset_db=False,
                                path_to_db_dir=db_dir,
                                recall_threshold=recall_threshold,  # Higher numbers allow more (but less relevant) memos to be recalled.
                            )

    teachability.add_to_agent(blog_editor)

    add_func_to_agents([(blog_editor, editor_user, "arxiv_retriever"), 
                        (blog_editor, editor_user, "arxiv_search"),
                        (blog_editor, editor_user, "get_pdf"),
                        (blog_editor, editor_user, "get_pdfs"),
                        (blog_editor, editor_user, "write_section"),
                        (critic, editor_user, "factual_check")
                        ])

    def custom_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):

        messages = groupchat.messages
        speakers = [m['name'] for m in messages]
        if len(messages) <= 1:
            # first, let the researchCoordinator retrieve relevant data populate db
            return blog_editor

        return critic if 'OUTLINE' in messages[-1]['content'] else 'auto'        

    groupchat = autogen.GroupChat(
        agents=[blog_editor, editor_user, critic],
        messages=[],
        speaker_selection_method=custom_speaker_selection_func,
        allow_repeat_speaker=True,
        max_round=max_round,
    )

    manager = autogen.GroupChatManager(
                groupchat=groupchat,
                is_termination_msg=termination_msg,
                llm_config=manager_config,
                code_execution_config={
                    "work_dir": "coding",
                    "use_docker": False,
                },
                system_message="""
You are the manager of the group chat. Your role is to oversee the collaborative creation of a blog post. 
Ensure that the blog editor and critic work together effectively to craft a well-structured, data-driven post. 
When you receive a message from the blog editor with the keyword 'OUTLINE,' promptly assign the critic to review the outline provided. If no such message is received, allow the blog editor to proceed with content creation. 
Monitor the progress, provide guidance, and address any issues that arise during the project.
"""
            )


    chat_hist = editor_user.initiate_chat(manager, silent=silent, message=task)
    # prepare the response\n",
    writer_messages = [mes for mes in chat_hist.chat_history if 'OUTLINE:' in mes['content']]

    return writer_messages[-1]['content'] if writer_messages else "NO outline from the editor.", chat_hist

logging_session_id = autogen.runtime_logging.start(config={"dbname": f"{output_dir}/logs.db"})
print(f"Logging session ID: {str(logging_session_id)}")

outline, chat_hist = craft_outline(task=task.format(topic=topic), silent=False, max_round=50)    

# End logging
autogen.runtime_logging.stop()

Logging session ID: 9b22261e-e57a-474d-87a2-1b78b5a14dbd
[96m********************************[0m
[96m* SYNC DISK MEMORY with VEC_DB *[0m
[96m********************************[0m
[92m    Location = summarization_Metrics/0.1.2/memo-db/uid_text_dict.pkl[0m


[33meditor_user[0m (to chat_manager):


As an esteemed authority in the realm of Natural Language Processing (NLP) and Large Language Models (LLMs), we cordially invite you to share your enlightened perspectives through a scientifically-rigorous article titled, 'A Comprehensive Guidance on Assessing LLM Models appilication: Evaluating Relevance, Completeness, Clarity, Accuracy, Coherence, and Engagement.'

The article should be structured into a maximum of seven sections, with at least three centering on an in-depth discussion of technical methodologies. Your expertise will provide insightful navigation for our audience on the complexities of ensuring AI operates within secure and reliable parameters, specifically focusing on LLMs.

Here is a structural blueprint for your incisive contribution:

- **Main Tenet:** Ground your discussion in the realm of Large Language Models, underlining their vital role in the prevailing AI setting and emphasizing why their safety and reliability are 

Add of existing embedding ID: 45
Add of existing embedding ID: 56
Add of existing embedding ID: 214


[33mblog_editor[0m (to chat_manager):

OUTLINE:

1. TITLE: Introduction to Large Language Models (LLMs)
   BRIEF: Define LLMs and elaborate on their critical role and their transformative potential across different sectors. Discuss the "black box" problem and the demand for safer and more reliable AI systems.

2. TITLE: Historical Progress in AI Safety and Reliability
   BRIEF: Discuss the evolution of safety measures being implemented in the realm of AI, including methodologies that have led to more reliable LLMs. Compare and contrast different studies focusing on the historical progress to show the advancements made in this domain.

3. TITLE: Making the Complex Understandable: LLMs in Layman's Terms
   BRIEF: Break down the fundamentals of LLMs and the concept of AI safety and reliability. Variables and technical concepts should be explained in an accessible way with the support of infographics and illustrations.

4. TITLE: Evidence Supporting AI Safety Initiatives
   BRIEF: Using 

PermissionDeniedError: Error code: 403 - {'error': {'code': '403', 'message': 'Access denied due to Virtual Network/Firewall rules.'}}

### chief writer

In [None]:
chief_writer_sp = """
As the chief_writer, your role involves developing the final blog post based on sections received from a team of writers and an outline provided by the editor.

Guidelines:

Review Drafts: Ensure each draft segment you receive includes necessary citations. At the end of your blog post, list each citation, including the title of the paper, its URL, and the authors.
Seek Clarification: If you encounter any uncertainties or require further information, contact the group chat manager for immediate assistance. Additional help from other participants may be arranged if necessary.
Communicate Effectively: Maintain strong communication, demonstrate precise research skills, and pay meticulous attention to detail. Proactively seek assistance whenever needed.
Incorporate Feedback: Promptly integrate any team feedback into your revisions to ensure the final text is polished and meets our editorial standards.
Formatting Requirements:

Text Identification: Begin your text with 'TXT:' and end with 'END_TXT'. This format is essential for the group chat manager to accurately identify your contributions.
Citation Details: Each citation must include the title of the paper, its URL, and authors. Ensure this list is complete and accurate.
Completion:

Once you have integrated all feedback and ensured that all parts are reviewed and properly referenced, signify the completion of your work by typing "TERMINATE" in the group chat.

"""

prompt = """
As a distinguished expert in enhancing the reliability and safety of AI systems, we invite you to share your valued insights with our AI community. Please author a blog post on the specified TOPIC, utilizing the detailed guidance provided in the CONTENT section below:

TOPIC:
{topic}

CONTENT:
{blog_sections}

Instructions:

Follow the Outline: Adhere strictly to the structure outlined in the 'CONTENT' section. This will help ensure that your blog post is organized, coherent, and systematically covers all critical aspects of the topic.
Ensure Quality: Craft content that is both engaging and well-articulated, maintaining a logical progression of ideas throughout the post. Your writing should reflect the depth of your expertise and the clarity of your thought processes.
Engage the Reader: Employ a compelling writing style that captures the reader’s interest from the start. Your approach should make complex topics accessible and engaging, appealing to both new learners and seasoned professionals in the field.
By adhering to these guidelines, your contribution will effectively convey the core messages while being structured in a way that captivates and educates our audience.
"""
def craft_blog_post(topic, sections, silent=True):
    chief_writer = autogen.AssistantAgent(
        name="chief_writer",  # The name is flexible, but should not contain spaces to work in group chat.
        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
        system_message=Section_writer_SP,
        description="The chief writer agent orchestrates the creation of a comprehensive blog post by compiling sections from various writers. They ensure each segment is well-researched, includes proper citations, and integrates feedback. This role emphasizes strong communication, meticulous attention to detail, and proactive problem-solving to meet editorial standards."
    )

    # create a UserProxyAgent instance named "user_proxy"
    writer_user = autogen.UserProxyAgent(
        name="writer_user",
        human_input_mode="NEVER",
        is_termination_msg=termination_msg,
        code_execution_config={
            "work_dir": "section_writing",
            "use_docker": False,
        },
    )

    content_review_specialist = autogen.AssistantAgent(
                                    name="content_review_specialist",
                                    is_termination_msg=termination_msg,
                                    system_message=section_content_reviwer_sp, 
                                    llm_config=llm_config,
                                    description="The content review specialist is a critical thinker who ensures the accuracy and quality of information shared within the group chat. This individual should possess strong analytical skills to review previous messages for errors or misunderstandings and must be able to articulate the correct information effectively. Additionally, if the role involves reviewing Python code, the specialist should also have a solid understanding of Python to provide corrected code when necessary."
                                )

    teachability = Teachability(
                                verbosity=0,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
                                reset_db=False,
                                path_to_db_dir=db_dir,
                                recall_threshold=recall_threshold,  # Higher numbers allow more (but less relevant) memos to be recalled.
                            )

    # Now add the Teachability capability to the agent.

    teachability.add_to_agent(content_review_specialist)

    # add_func_to_agents([(content_review_specialist, writer_user, "arxiv_retriever"), 
                        # (content_review_specialist, writer_user, "factual_check"),
                        # (content_review_specialist, writer_user, "arxiv_search"),
                        # (content_review_specialist, writer_user, "get_pdf"),
                        # (chief_writer, writer_user, "arxiv_search"),
                        # ])

    def custom_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):
        
        messages = groupchat.messages

        if len(messages) <= 1:
            # first, let the researchCoordinator retrieve relevant data populate db
            return chief_writer
        
        return 'auto'

    groupchat = autogen.GroupChat(
        agents=[chief_writer, writer_user, content_review_specialist],
        messages=[],
        speaker_selection_method=custom_speaker_selection_func,
        allow_repeat_speaker=True,
        max_round=max_round,
    )

    manager = autogen.GroupChatManager(
                groupchat=groupchat,
                is_termination_msg=termination_msg,
                llm_config=manager_config,
                code_execution_config={
                    "work_dir": "coding",
                    "use_docker": False,
                },
            )

    chat_hist = writer_user.initiate_chat(manager, silent=silent, message=prompt.format(topic=topic, blog_sections="\n\n".join(sections)))
    # prepare the response\n",
    writer_messages = [mes for mes in chat_hist.chat_history if 'TXT:' in mes['content']]

    return writer_messages[-1]['content'] if writer_messages else "NO response from the writer."

## Orchestrator

In [None]:
initiate_db = False
# Start logging
logging_session_id = autogen.runtime_logging.start(config={"dbname": "logs.db"})
print(f"Logging session ID: {str(logging_session_id)}")

outline = craft_outline(task=task.format(topic=topic), silent=False)   

secs = list(outline.split('TITLE'))[1:]
titles = [sec.split('BRIEF')[0].replace(':', '').strip() for sec in secs]
briefs = [sec.split('BRIEF')[1].replace(':', '').replace("TERMINATE", "").strip() for sec in secs]

# write title and briefs in markdown file
with open(f'{Project_dir}/results-{logging_session_id}.md', 'w') as f:
    for title, brief in zip(titles, briefs):
        f.write(f"Title: {title}\n\nBrief: {brief}\n\n\n\n")

sections = []
with ThreadPoolExecutor() as executor:
        futures = [executor.submit(write_section, title=title, brief=brief) for title, brief in zip(titles, briefs)]
        for future in futures:
            sections.append(future.result())

blog_sections = "\n\n".join(f"{i}. {title} \n\n {section}" for i, (title, section) in enumerate(zip(titles, sections), start=1))

# remove "TXT", "TERMINATE", "END_TXT" from the blog_sections
blog_sections = re.sub(r'TXT:|TERMINATE|END_TXT:|TXT|END_TXT', '', blog_sections)
print(blog_sections)


craft_blog_post(topic=topic, sections=blog_sections, silent=False)

# End logging
autogen.runtime_logging.stop()


Logging session ID: 21e9710c-e5c7-4166-90c4-ee31b66a77a5
[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[33meditor_user[0m (to chat_manager):


As a recognized authority on enhancing the reliability and safety of AI systems, you're invited to illuminate our AI community with your insights through a scientific article titled "Survey on Reliability and Safety Mechanisms in AI Systems and the most recent advancement".

the blog post should be consist of at most 7 sections (at least 3 sections should be focus on discussing the methodologies in technical dept)

Your expertise will guide our audience through the nuances of ensuring AI operates within safe and reliable parameters, with a special focus on Large Language Models (LLMs). Here's how to structure your invaluable contribution:

- **Core Theme:** Anchor your discussion around Large Language Models, highlighting their significance in the current AI landscape and why reliability and safety are paramount.

- **Inn

[33mblog_editor[0m (to chat_manager):

[32m***** Suggested tool call (call_ZCqEgxn3378E19q63pfanyko): arxiv_retriever *****[0m
Arguments: 
{"queries":["Large Language Models safety and reliability","AI systems reliability methods","Advancements in AI safety and reliability"],"n_results":3}
[32m********************************************************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION arxiv_retriever...[0m
[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33meditor_user[0m (to chat_manager):

[33meditor_user[0m (to chat_manager):

[32m***** Response from calling tool (call_ZCqEgxn3378E19q63pfanyko) ***

Add of existing embedding ID: 1092
Insert of existing embedding ID: 1092


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mdata_research_writer[0m (to chat_manager):

TXT:

With the growth of the digital era, the importance of Large Language Models (LLMs) in artificial intelligence (AI) is continually rising. The ability of LLMs to understand, interpret, and generate human-like text revolutionizes the interaction with technology, making these systems foundationally important in numerous applications. These range from customer service chatbots to sophisticated autonomous vehicles. However, despite these strides, the safety and reliability of these AI models are also areas of grave concern.

Reliability in LLMs refers to their ability to deliver consistent and accurate outputs over time and across various situations. On the flip side, safety includes protection against external threats, preventing the generation of harmful or biased content, and the

GroupChat select_speaker failed to resolve the next speaker's name. This is because the speaker selection OAI call returned:
TERMINATE


[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[92m    Location = AI_security/0.1.6/memo-db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 1129
Insert of existing embedding ID: 1129


[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mfactual_checker[0m (to factual_checker_user):

FEEDBACK: The text provided does not accurately represent the content of the document 'Safety Cases: How to Justify the Safety of Advanced AI Systems [http://arxiv.org/pdf/2403.10462v2]'. The concept of fault tolerance is not at the center of the paper's discussion on safety cases. Instead, the paper explores the idea of presenting a 'safety case' to justify the safety of AI systems, focusing on four categories of arguments: inability to cause a catastrophe, control measures to prevent catastrophic outcomes, trustworthiness preventing harm, and deference to credible AI advisors. It also outlines a six-step framework for organizing a safety case and discusses the use of goal structuring notation (GSN) to visualize safety cases.

Summary of Safety Cases: How to Justify the Safety of Advanced AI Systems [http://arxiv.org/pdf/2403.10462v2]:

This paper presents a detailed discussion on how to justif

Add of existing embedding ID: 1230
Insert of existing embedding ID: 1230


[33mcontent_review_specialist[0m (to chat_manager):

This updated introduction concisely brings forth key concepts from the recent advancements in the field, highlighting the role of Explainable AI (XAI) in autonomus driving and the significance of interpretability and moderation to ensure safety and reliability in Large Language Models (LLMs). The integration of these concepts undoubtedly elevates the quality and relevance of the blog section, adhering to the overarching theme of AI ethics and operational trust.

The newly incorporated citations from Kuznietsov et al. (2024) provide a robust foundation for the claims made regarding the current research, enriching the narrative with recent findings on the subject. The text adeptly captures the essence of the topic, engaging the reader with the complexities and innovations inherent to the domain of AI safety, and fostering a deeper appreciation of the delicate balance between technological capabilities and ethical imperatives.

The ar

AuthenticationError: Error code: 401 - {'statusCode': 401, 'message': 'Unauthorized. Access token is missing, invalid, audience is incorrect (https://cognitiveservices.azure.com), or have expired.'}

# END

In [None]:
logging_session_id = '21e9710c-e5c7-4166-90c4-ee31b66a77a5'

import sqlite3

def cal_cost(session_id):
    db = sqlite3.connect("logs.db")
    query = f"SELECT sum(cost) FROM chat_completions WHERE session_id = '{session_id}'"
    cursor = db.cursor()
    cursor.execute(query)
    rows = cursor.fetchall()
    return rows[0][0]

# list sessions
def list_sessions_id():
    db = sqlite3.connect("logs.db")
    query = "SELECT DISTINCT session_id FROM chat_completions"
    cursor = db.cursor()
    cursor.execute(query)
    rows = cursor.fetchall()
    return rows

# get the earliest start_time for give session id
def start_time(session_id):
    db = sqlite3.connect("logs.db")
    query = f"SELECT min(start_time) FROM chat_completions WHERE session_id = '{session_id}'"
    cursor = db.cursor()
    cursor.execute(query)
    rows = cursor.fetchall()
    return rows[0][0]


ls_session = list_sessions_id()
for session in ls_session:
    print(f"session: {session[0]}, cost: {cal_cost(session[0])}, start_time: {start_time(session[0])}")


session: caaafc16-56bc-4e30-a8e5-1d8c9d3c8eac, cost: 0.39459, start_time: 2024-04-23 14:30:16.510528


session: 13ff3970-b31b-4d0d-aeb5-0828415a2ccd, cost: 113.96298299999984, start_time: 2024-04-23 14:37:32.405582
session: 17058374-3c75-4183-919e-28fc60e8e4c2, cost: 49.47995999999994, start_time: 2024-04-23 17:57:10.750900
session: e21dfb1d-bcce-4dd0-96e8-6d2a7c5b0969, cost: 340.2099299999988, start_time: 2024-04-23 19:22:56.087433
session: db3016cd-1ec2-41d2-96fc-039ea0bce221, cost: 0.7607700000000001, start_time: 2024-04-26 08:08:22.487667
session: 90a085cd-ba3b-4e64-8a3a-e9fbf72cb291, cost: 3.5153099999999995, start_time: 2024-04-26 08:14:34.232976
session: 1d0630a4-1d7a-47c6-8e57-70e6305ec022, cost: 0.8195400000000002, start_time: 2024-04-27 05:21:36.723417
session: 20600f0f-256a-49f6-85be-4a6859c0672a, cost: 0.016409999999999998, start_time: 2024-04-27 05:38:11.443563
session: 015b2c4e-1248-4068-984b-fcacf7ebe3d3, cost: 0.3124799999999999, start_time: 2024-04-27 05:38:40.337434
session: d638424d-537d-4679-a948-e7f07683f536, cost: 0.69465, start_time: 2024-04-27 05:48:40.819916
ses

In [None]:
arxiv_search("2404.05993")

' \n\n 1. Title: AEGIS: Online Adaptive AI Content Safety Moderation with Ensemble of LLM Experts Authors: Shaona Ghosh, Prasoon Varshney, Erick Galinkin, Christopher Parisien Pulished at 2024-04-09 03:54:28+00:00 URL: http://arxiv.org/pdf/2404.05993v1'

In [None]:
titles

['Introduction The Importance of Reliable and Safe Large Language Models (LLMs)**\n   -',
 'Understanding the Landscape Challenges in Ensuring LLM Security**\n   -',
 'Enhancing Reliability Advanced Methodologies in AI Model Certification**\n   -',
 'Safeguarding AI Techniques for Risk Assessment and Safety in LLMs**\n   -',
 'Breaking New Ground Recent Advancements in Combined Safety and Reliability Measures for LLMs**\n   -',
 'Current Challenges and the Road Ahead for AI Systems**\n   -',
 'Conclusion The Imperative of Safe and Reliable AI Systems in the Digital Era**\n   -']

In [None]:
sections

["TXT:\n\nAs we continue our exploration into the importance of Large Language Models (LLMs) for artificial intelligence (AI), it is crucial to note the advancements in the field that aim at improving not only the performance but also the safety and reliability of these systems. In the realm of autonomous driving, for instance, recent research underscores the critical role of Explainable AI (XAI) in enhancing the safety and trustworthiness of AI decision-making processes. Advances in XAI help to create interpretable, transparent AI systems that allow for better human oversight and understanding of machine behavior (Kuznietsov et al., 2024).\n\nHighlighting the intersection of XAI with the safety and reliability of LLMs, new research categories have emerged. These include interpretable design, where AI algorithms are inherently understandable; interpretable surrogate models that help explain the outputs of more opaque models; interpretable monitoring for runtime safety checks; auxiliary

In [None]:
blog_sections

NameError: name 'blog_sections' is not defined