In [7]:
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
import autogen

config_list_gpt3 = autogen.config_list_from_json(
    env_or_file="/home/huydinh/study/autogen_demo/OAI_CONFIG_LIST.json",  # or OAI_CONFIG_LIST.json if file extension is added
    filter_dict={
        "model": {
            "gpt-3.5-turbo",
        }
    },
)
config_list_gpt4 = autogen.config_list_from_json(
    env_or_file="/home/huydinh/study/autogen_demo/OAI_CONFIG_LIST.json",  # or OAI_CONFIG_LIST.json if file extension is added
    filter_dict={
        "model": {
            "gpt-4",
        }
    },
)

gpt4_config = {
    "cache_seed": 42,  # change the cache_seed for different trials
    "temperature": 0,
    "config_list": config_list_gpt4,
    "timeout": 120,
}





In [8]:
user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.",
    code_execution_config=False,
)

planner = autogen.AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
The plan may involve an engineer who can write code and a scientist who doesn't write code.
Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist.
""",
    llm_config=gpt4_config,
)


engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=gpt4_config,
    system_message="""Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.
Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
""",
)

executor = autogen.UserProxyAgent(
    name="Executor",
    system_message="Executor. Execute the code written by the engineer and report the result.",
    human_input_mode="NEVER",
    code_execution_config={"last_n_messages": 3, "work_dir": "paper"},
)

scientist = autogen.AssistantAgent(
    name="Scientist",
    llm_config=gpt4_config,
    system_message="""Scientist. You follow an approved plan. You are able to categorize papers after seeing their abstracts printed. You don't write code.""",
)

critic = autogen.AssistantAgent(
    name="Critic",
    system_message="Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL.",
    llm_config=gpt4_config,
)


In [9]:
groupchat = autogen.GroupChat(
    agents=[user_proxy, engineer, scientist, planner, executor, critic], messages=[], max_round=50
)

manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_config)

In [10]:

user_proxy.initiate_chat(
    manager,
    message="""
find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.
""",
)

[33mAdmin[0m (to chat_manager):


find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.


--------------------------------------------------------------------------------
[33mPlanner[0m (to chat_manager):

Plan:

1. Engineer: Write a script to scrape Arxiv for papers on LLM (Language Model) applications published in the last week. The script should extract the title, authors, abstract, and link of each paper.

2. Scientist: Review the scraped data to identify the different domains in which LLM is applied. This could be based on the content of the abstracts or the context of the papers.

3. Engineer: Write a script to convert the reviewed data into a markdown table. The table should include columns for the title, authors, domain, and link of each paper.

4. Scientist: Review the markdown table to ensure that the domains are correctly identified and the information is accurately represented.

5. Engineer: Make any necessary revisio

execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 1 (execution failed)
Code output: 
Traceback (most recent call last):
  File "", line 1, in <module>
    import feedparser
ModuleNotFoundError: No module named 'feedparser'


--------------------------------------------------------------------------------
[33mEngineer[0m (to chat_manager):

I apologize for the confusion. It seems that the 'feedparser' module is not installed in your environment. Unfortunately, I can't install packages in your environment. 

You can install it using pip, which is a package manager for Python. Here's how you can do it:

```python
!pip install feedparser
```

After installing 'feedparser', you should be able to run the script without any issues.

--------------------------------------------------------------------------------
[31m
>>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...[0m


execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 1 (execution failed)
Code output: 
  File "", line 1
    !pip install feedparser
    ^
SyntaxError: invalid syntax


--------------------------------------------------------------------------------
[31m
>>>>>>>> NO HUMAN INPUT RECEIVED.[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mAdmin[0m (to chat_manager):



--------------------------------------------------------------------------------
[33mEngineer[0m (to chat_manager):

I apologize for the confusion. The command to install the 'feedparser' module should be run in your terminal or command prompt, not in your Python environment. Here's how you can do it:

```bash
pip install feedparser
```

After installing 'feedparser', you should be able to run the script without any issues.

--------------------------------------------------------------------------------
[31m
>>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is bash)...[0m


execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 
Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.3/81.3 KB 2.5 MB/s eta 0:00:00
Collecting sgmllib3k
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py): started
  Building wheel for sgmllib3k (setup.py): finished with status 'done'
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6067 sha256=cebb5008e9e067fe652adaa99eebb4f0671c612ecf171d535d6e24ef07b32532
  Stored in directory: /home/huydinh/.cache/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.11 sgmllib3k-1.0.0


---

execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 1 (execution failed)
Code output: 
Traceback (most recent call last):
  File "/home/huydinh/study/venv/lib/python3.10/site-packages/pandas/compat/_optional.py", line 132, in import_optional_dependency
    module = importlib.import_module(name)
  File "/usr/lib/python3.10/importlib/__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1004, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'tabulate'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "", line 37, in <module>
    print(df.to_markdown(index=False))
  File "/home/huydinh/study/venv/lib/python3.10/site-packages/pandas/core/frame.py", line 2836, in to_markdown
    tabulate = im

execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 
Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0


--------------------------------------------------------------------------------
[33mEngineer[0m (to chat_manager):

Great! Now that the 'tabulate' module is installed, you should be able to run the script to scrape Arxiv for papers on LLM applications published in the last week. Here's the script again:

```python
import feedparser
from datetime import datetime, timedelta
import pandas as pd

# Define the search parameters
search_query = 'all:LLM'
start_index = 0
max_results = 100

# Define the date range (last week)
end_date = datetime.now()
start_date = end_date - timedelta(days=7)
date_range = f'[{start_date.strftime("%Y%m%d%H%M%S")} TO {end_date.strftime("%Y%m%d%H%M%S")}]'

# Construct the query URL
base_url = 'http://export.arxiv.org/api/quer

execute_code was called without specifying a value for use_docker. Since the python docker package is not available, code will be run natively. Note: this fallback behavior is subject to change


[33mExecutor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 
| Title                                                                    | Authors                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              

RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4 in organization org-iCuLxTYnATT7RetwXYBuifrl on tokens per min (TPM): Limit 40000, Requested 41991. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}