In [1]:
import os
import json
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from pprint import pprint

import autogen
import groq

In [2]:
from configs import model_config

In [3]:
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
SERPER_API_KEY = os.getenv('SERPER_API_KEY')

In [4]:
config_list = [
    {
        "model": model_config.MODEL_NAME_GROQ,
        "base_url": model_config.BASE_URL_GROQ,
        "api_key": GROQ_API_KEY,
    }
]

llama_config = {"config_list": config_list}

### Initialize agents

In [5]:
today_date = datetime.today().strftime('%d.%m.%Y')

In [6]:
today_date

'20.05.2024'

then format into needed style

In [7]:
parse_question = autogen.ConversableAgent(
    name="Agent 1: Retrieve topic from the question",
    system_message="""
    You retrieve topic of the question from the question provided by the user.

    Examples:
    Question: What are the latest news about fashion?
    Topic: Fashion

    Question: Tell me about latest cryptocurrency trends.
    Topic: Cryptocurrency
    """,
    llm_config=llama_config,
    human_input_mode="NEVER",
)

search_sources = autogen.ConversableAgent(
    name="Agent 2: Make a request with topic to the api to retieve top stories on the said topic.",
    system_message="""
    You make request to the Serper API to retrieve results.

    Return output in a format:

    {
        "top_stories": [
            {
                "title": <title>,
                "source": <link>,
                "text": <text>
            },
            {
                "title": <title>,
                "source": <link>,
                "text": <text>
            },
            {
                "title": <title>,
                "source": <link>,
                "text": <text>
            }
            ...
        ]
    }

    when the task is done.
    """,
    llm_config=llama_config,
    human_input_mode="NEVER",
)

categorize_stories = autogen.ConversableAgent(
    name="Agent 3: Analyze top stories, divide them into categories and format text into needed style",
    system_message=f"""
    You analyze top stories texts, combine text with the title and summarize them into 1-2 sentences (these 1-2 sentences should contain the most representative information about the article and sound like a news title that summarizes the article)
    Then you divide them into 2-4 categories by theme
    If there is no titles in the category, do not include it in the returned result text
    Do not duplicate sources
    
    Return output in the format (where topic is the topic that was questioned by the user), respond only with formatted text, no additional text:

    *Topic news {today_date}:*

    Category Name:
    - Title
    - Title
    - ...

    Category Name:
    - Title
    - Title
    - ...

    Category Name:
    - ...

    Sources:
    - www.source.com
    - ...

    when the task is done
    """,
    llm_config=llama_config,
    human_input_mode="NEVER",
)

# categorize_stories = autogen.ConversableAgent(
#     name="Agent 3: Analyze top stories, divide them into categories and format text into needed style",
#     system_message=f"""
#     You analyze top stories titles and divide them into 2-4 categories by theme, then format into needed style

#     You can Rephrase titles for them to sound more complete if necessary.
    
#     Needed formatting style for reference:

#     *Topic news {today_date}:*

#     Category Name:
#     - Title
#     - Title
#     - ...

#     Category Name:
#     - Title
#     - Title
#     - ...

#     Category Name:
#     - ...

#     Sources:
#     - www.source.com
#     - www.source.com
#     - ...
#     """,
#     llm_config=llama_config,
#     human_input_mode="NEVER",
# )


In [8]:
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    system_message="A proxy for the user for initiating chat with provided question.",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    code_execution_config={"use_docker": False},
)

### Initialize and register tools

In [9]:
def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    body_text = soup.body.get_text(separator=' ', strip=True)

    return body_text

In [10]:
def request_serper_api(topic: str) -> str:
  url = "https://google.serper.dev/search"
  params = json.dumps({
    "q": f"{topic} news"
  })

  headers = {
    'X-API-KEY': SERPER_API_KEY,
    'Content-Type': 'application/json'
  }

  response = requests.request("POST", url, headers=headers, data=params)
  data = response.json()

  top_stories = {"top_stories": data.get("topStories")}
  # top_stories_json = json.dumps(top_stories, indent=4)

  retrieved_lst = []
  for entry in top_stories['top_stories']:
      title = entry['title']
      url = entry['link']

      article_text = scrape_data(url)

      article = {
        "title": title,
        "source": url.split('.com')[0] + '.com',
        "text": article_text[max(0, len(article_text) // 2 - 500):][:1000]
      }

      retrieved_lst.append(article)

  top_stories_json = json.dumps({"top_stories": retrieved_lst}, indent=4)

  return top_stories_json

In [11]:
search_sources.register_for_llm(name="request_serper_api", description="A tool for request to Serper API")(request_serper_api)
user_proxy.register_for_execution(name="request_serper_api")(request_serper_api)

<function __main__.request_serper_api(topic: str) -> str>

### Initialize chat

In [12]:
test_questions = {
    0: "Tell me about the latest news in fashion industry",
    1: "What are the trends in cryptocurrency now?",
    2: "What are the latest news in the music industry?",
    3: "What is happening in the art world today?",
    4: "Tell me about latest updates on the renewable energy sources"
}

In [13]:
user_question = test_questions[2]

In [14]:
chat_results = user_proxy.initiate_chats(
    [
        {
            "recipient": parse_question,
            "message": user_question,
            "max_turns": 1,
            "summary_method": "last_msg",
        },
        {
            "recipient": search_sources,
            "message": "This is the topic that you should retrieve information about from the Serper API",
            "max_turns": 2,
            "summary_method": "last_msg",
        },
        {
            "recipient": categorize_stories,
            "message": "These are the top stories on asked topic that you shoud categorize and format",
            "max_turns": 1,
            "summary_method": "last_msg",
        },
    ]
)

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33muser_proxy[0m (to Agent 1: Retrieve topic from the question):

What are the latest news in the music industry?

--------------------------------------------------------------------------------
[33mAgent 1: Retrieve topic from the question[0m (to user_proxy):

Topic: Music

--------------------------------------------------------------------------------
[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33muser_proxy[0m (to Agent 2: Make a request with topic to the api to retieve top stories on the said topic.):

This is the topic that you should retrieve information about from the Serper API
Context: 
T

In [25]:
print(chat_results[-1].chat_history[-1]['content'])

Here is the formatted output:

*Music news 20.05.2024:*

New Releases:
- "Taylor Swift's New Album Breaks Records with 1.5 Million Copies Sold in One Day" 
- "Kendrick Lamar Drops Surprise Album, Fans Go Wild"

Music Festivals:
- "Coachella 2024 Lineup Announced, Headliners Include Billie Eilish and The Weeknd" 
- "Bonnaroo 2024 Tickets Sell Out in Record Time, Fans Disappointed"

Music Industry:
- "Spotify Reveals Top 10 Most Streamed Artists of 2023, Drake Takes the Top Spot" 
- "Music Streaming Services Face Backlash Over Low Royalties for Artists"

Sources:
- www.billboard.com
- www.rollingstone.com
- www.nme.com
- www.theguardian.com
