In [6]:
pip install --upgrade pip


Collecting pip
  Using cached pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Using cached pip-24.3.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.2
    Uninstalling pip-24.2:
      Successfully uninstalled pip-24.2
Successfully installed pip-24.3.1


In [4]:
 pip install flaml




[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
pip install autogen

Collecting autogenNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Using cached autogen-0.3.1-py3-none-any.whl.metadata (27 kB)
Collecting diskcache (from autogen)
  Using cached diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting docker (from autogen)
  Using cached docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting flaml (from autogen)
  Using cached FLAML-2.3.2-py3-none-any.whl.metadata (16 kB)
Collecting termcolor (from autogen)
  Using cached termcolor-2.5.0-py3-none-any.whl.metadata (6.1 kB)
Using cached autogen-0.3.1-py3-none-any.whl (350 kB)
Using cached diskcache-5.6.3-py3-none-any.whl (45 kB)
Using cached docker-7.1.0-py3-none-any.whl (147 kB)
Using cached FLAML-2.3.2-py3-none-any.whl (313 kB)
Using cached termcolor-2.5.0-py3-none-any.whl (7.8 kB)
Installing collected packages: termcolor, flaml, diskcache, docker, autogen
Successfully installed autogen-0.3.1 diskcache-5.6.3 docker-7.1.0 flaml-2.3.2 termcolor-2.5.0


In [31]:
import os
import json
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from docx import Document as DocxDocument
from PyPDF2 import PdfReader
from dotenv import load_dotenv
from autogen import AssistantAgent, UserProxyAgent
import autogen

In [32]:

# -------------------- Load Environment Variables --------------------
load_dotenv()

api_version = os.getenv("AZURE_OPENAI_API_VERSION")
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key = os.getenv("AZURE_OPENAI_API_KEY")
deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
#llm_model = os.getenv("LLM_MODEL")

if not all([api_version, endpoint, api_key, deployment_name]):
    raise ValueError("Some environment variables are missing. Check your .env file.")

In [33]:
# Configure Autogen LLM
llm_config = {
    "timeout": 600,
    "config_list": autogen.config_list_from_json("OAI_CONFIG_LIST"),
    "temperature": 0,
}

In [34]:
# -------------------- Helper Functions --------------------
def extract_text_from_docx(docx_path):
    doc = DocxDocument(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    return "\n".join(page.extract_text() for page in reader.pages)

def read_document(file_path):
    if not file_path or not os.path.exists(file_path):
        return None

    if file_path.endswith(".txt"):
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    elif file_path.endswith(".docx"):
        return extract_text_from_docx(file_path)
    elif file_path.endswith(".pdf"):
        return extract_text_from_pdf(file_path)
    else:
        return "Unsupported file format. Please provide a valid TXT, DOCX, or PDF file."

In [45]:
# -------------------- Define Data Collection Agent with Combined Prompts --------------------
data_collection_agent = AssistantAgent(
    name="data_collection_agent",
    system_message=(
        "You are responsible for gathering comprehensive project details. "
        "Your tasks include collecting data for Work Breakdown Structure (WBS), effort estimation, project assumptions, "
        "resource cost estimation, tech stack costs, infrastructure costs, total ownership cost, cost estimation for an Excel artifact, "
        "resource types, and expected user volume. "
        "Ask one question at a time, covering each area systematically. "
        "Ensure you clarify details sufficiently before moving to the next section. "
        "Begin by asking for general project details, then dive into each area step-by-step."
    ),
    llm_config=llm_config,
    human_input_mode="ALWAYS"  # Allows the agent to ask questions interactively, one at a time
)

In [35]:
# -------------------- Define Display-Only Agents for Each Module --------------------
# Each of these agents will process the collected data to show relevant outputs.
# human_input_mode is set to "NEVER" to ensure they only display results based on data collected by data_collection_agent.

wbs_agent = AssistantAgent(
    name="wbs_agent",
    system_message="Generate the Work Breakdown Structure (WBS) and effort estimation based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

assumptions_agent = AssistantAgent(
    name="assumptions_agent",
    system_message="Display the assumptions for project planning based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

resource_cost_agent = AssistantAgent(
    name="resource_cost_agent",
    system_message="Calculate resource cost estimation based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

tech_stack_cost_agent = AssistantAgent(
    name="tech_stack_cost_agent",
    system_message="Estimate tech stack costs based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

infrastructure_cost_agent = AssistantAgent(
    name="infrastructure_cost_agent",
    system_message="Estimate infrastructure costs based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

total_ownership_cost_agent = AssistantAgent(
    name="total_ownership_cost_agent",
    system_message="Calculate the total cost of ownership over three years based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

excel_cost_estimation_agent = AssistantAgent(
    name="excel_cost_estimation_agent",
    system_message="Create a detailed cost estimation artifact in Excel format based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

resource_types_agent = AssistantAgent(
    name="resource_types_agent",
    system_message="Identify resource types required for the project based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)

user_volume_agent = AssistantAgent(
    name="user_volume_agent",
    system_message="Estimate user volume and deployment requirements based on the collected data.",
    llm_config=llm_config,
    human_input_mode="NEVER"
)


In [51]:
# -------------------- Define User Proxy Agent --------------------
# user_proxy = UserProxyAgent(
#     name="user_proxy",
#     human_input_mode="ALWAYS",
#     llm_config=llm_config,
#     system_message="You are a helpful assistant.",
#     code_execution_config={
#         "last_n_messages": 1,
#         "work_dir": "tasks",
#         "use_docker": False,
#     },
# )

user_proxy = UserProxyAgent(
    name="user_proxy",
    human_input_mode="ALWAYS",
    llm_config=llm_config,
    code_execution_config=False,
    system_message="You are a helpful assistant."
)


In [46]:
# -------------------- Main Logic --------------------
def process_document_or_summary(doc_path=None):
    """Process document or initiate with user-provided summary."""
    # Extract content from the document if provided; otherwise, prompt user for a summary
    content = read_document(doc_path)

    if content:
        print(f"\nExtracted Content from '{doc_path}':\n{content}\n")
    else:
        print("No document provided. Please enter a summary.\n")
        content = input("Enter a summary of the process: ")

    # Define tasks, starting with data_collection_agent to ask questions based on the content
    tasks = [
        {
            "recipient": data_collection_agent,
            "message": f"Please ask questions based on this content to gather detailed information: '{content}'",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": wbs_agent,
            "message": "Provide the Work Breakdown Structure (WBS) and effort estimation based on the data collected.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": assumptions_agent,
            "message": "Generate project assumptions based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": resource_cost_agent,
            "message": "Provide resource cost estimation based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": tech_stack_cost_agent,
            "message": "Estimate tech stack costs based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": infrastructure_cost_agent,
            "message": "Estimate infrastructure costs based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": total_ownership_cost_agent,
            "message": "Provide total cost of ownership details based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": excel_cost_estimation_agent,
            "message": "Generate detailed cost estimation data for an Excel artifact based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": resource_types_agent,
            "message": "Identify types of resources required for the project based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
        {
            "recipient": user_volume_agent,
            "message": "Provide user volume and deployment scope estimation based on the collected data.",
            "summary_method": "reflection_with_llm",
        },
    ]

    # Initiate chats sequentially with each agent
    chat_results = user_proxy.initiate_chats(tasks)


In [47]:
# -------------------- Execution --------------------
if __name__ == "__main__":
    doc_path = input("Enter the document path (or press Enter to skip): ").strip()
    process_document_or_summary(doc_path)

No document provided. Please enter a summary.

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33muser_proxy[0m (to data_collection_agent):

Please ask questions based on this content to gather detailed information: 'I want to extract content from HIMSS website, all the news articles should be extracted. And summarize the content and find the sentiment analysis of it. and save the data in cosmosdb'

--------------------------------------------------------------------------------
[31m
>>>>>>>> NO HUMAN INPUT RECEIVED.[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mdata_collection_agent[0m (to user_proxy):

Great! Let's start by gathering some general project details. 

1. **Project Overview**: Can you provide a brief description of the project, including its objectives and any specific goals you have in mind for extr

In [40]:
# Retrieve the final response messages from each agent and store them in the final data dictionary
test = wbs_agent.chat_messages
print(test)

defaultdict(<class 'list'>, {<autogen.agentchat.user_proxy_agent.UserProxyAgent object at 0x000002BCB3C6FD10>: [{'content': 'I want to extract the data from a website. once i extract the data, i want to summarize it and find the sentiment analysis of it. website is HIMSS, extract using beautiful soup, need simple summary, and need basic sentiment analysis like positive, negative and neutral', 'role': 'user', 'name': 'user_proxy'}, {'content': "To create a Work Breakdown Structure (WBS) and estimate the effort for your project, I need to gather more details. Here are some questions to clarify your requirements:\n\n1. **Scope of Data Extraction:**\n   - What specific data do you want to extract from the HIMSS website? (e.g., articles, news, reports, etc.)\n   - Are there specific sections or pages of the website you are interested in, or do you want to scrape the entire site?\n\n2. **Data Volume:**\n   - How much data do you expect to extract? (e.g., number of pages, articles, or data po

In [44]:
print(assumptions_agent.chat_messages)

defaultdict(<class 'list'>, {<autogen.agentchat.user_proxy_agent.UserProxyAgent object at 0x000002BCB3C6FD10>: [{'content': 'Please gather project assumptions based on this data.\nContext: \nThe user wants to extract data from the HIMSS website using Beautiful Soup, summarize the data, and perform basic sentiment analysis. A Work Breakdown Structure (WBS) was provided, outlining tasks such as project planning, data extraction, data processing, output generation, testing, and documentation. The total estimated effort for the project is 34 hours, with a timeline of approximately 2 weeks for completion. The user found the provided information satisfactory.', 'role': 'user', 'name': 'user_proxy'}, {'content': "To gather comprehensive assumptions for your project involving data extraction from the HIMSS website, summarization, and sentiment analysis, I will ask a series of detailed questions. Your responses will help clarify the project scope and ensure all relevant assumptions are document

In [None]:


    # Display final collected and processed data
    final_data = {}
    
    # Collect responses from each agent's chat messages
    #final_data["summary"] = content  # Add the initial content summary

    # Retrieve the final response messages from each agent and store them in the final data dictionary
    test = wbs_agent.chat_messages
    print(test)

    test2 = chat_results
    print(test2)

    test3 = chat_results.summary
    print(test3)
    
    # final_data["WBS"] = wbs_agent.chat_messages#[user_proxy][-2]["content"]
    # final_data["Assumptions"] = assumptions_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Resource Cost"] = resource_cost_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Tech Stack Cost"] = tech_stack_cost_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Infrastructure Cost"] = infrastructure_cost_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Total Ownership Cost"] = total_ownership_cost_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Excel Cost Estimation"] = excel_cost_estimation_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["Resource Types"] = resource_types_agent.chat_messages[user_proxy][-2]["content"]
    # final_data["User Volume"] = user_volume_agent.chat_messages[user_proxy][-2]["content"]

    # Print the final data for verification
    print("\n*************************Final Project Estimation*****************************")

    print(json.dumps(final_data, indent=2))



In [None]:
# Display the results of each agent
    for result in chat_results:
        agent_name = result["recipient"].name
        agent_response = result["response"]
        print(f"\n{agent_name} Results:\n{agent_response}\n")

    return chat_results