#### Automated Project: Planning, Estimation, and Allocation

In [6]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

# Load environment variables
from helper import load_env
load_env()

import os
import yaml
from crewai import Agent, Task, Crew

#### Set OpenAI Model

In [2]:
os.environ['OPENAI_MODEL_NAME'] = 'gpt-4o-mini'

#### Loading Tasks and Agents YAML files

In [11]:
# Define file paths for YAML configurations
files = {
    'agents': '../../configs/agents.yaml',
    'tasks': '../../configs/tasks.yaml'
}

# Load configurations from YAML files
configs = {}
for config_type, file_path in files.items():
    with open(file_path, 'r') as file:
        configs[config_type] = yaml.safe_load(file)

# Assign loaded configurations to specific variables
agents_config = configs['agents']
tasks_config = configs['tasks']

#### Create Pydantic Models for Structured Output

In [12]:
from typing import List
from pydantic import BaseModel, Field

class TaskEstimate(BaseModel):
    task_name: str = Field(..., description="Name of the task")
    estimated_time_hours: float = Field(..., description="Estimated time to complete the task in hours")
    required_resources: List[str] = Field(..., description="List of resources required to complete the task")

class Milestone(BaseModel):
    milestone_name: str = Field(..., description="Name of the milestone")
    tasks: List[str] = Field(..., description="List of task IDs associated with this milestone")

class ProjectPlan(BaseModel):
    tasks: List[TaskEstimate] = Field(..., description="List of tasks with their estimates")
    milestones: List[Milestone] = Field(..., description="List of project milestones")

## Create Crew, Agents and Tasks

In [13]:
# Creating Agents
project_planning_agent = Agent(
  config=agents_config['project_planning_agent']
)

estimation_agent = Agent(
  config=agents_config['estimation_agent']
)

resource_allocation_agent = Agent(
  config=agents_config['resource_allocation_agent']
)

# Creating Tasks
task_breakdown = Task(
  config=tasks_config['task_breakdown'],
  agent=project_planning_agent
)

time_resource_estimation = Task(
  config=tasks_config['time_resource_estimation'],
  agent=estimation_agent
)

resource_allocation = Task(
  config=tasks_config['resource_allocation'],
  agent=resource_allocation_agent,
  output_pydantic=ProjectPlan # This is the structured output we want
)

# Creating Crew
crew = Crew(
  agents=[
    project_planning_agent,
    estimation_agent,
    resource_allocation_agent
  ],
  tasks=[
    task_breakdown,
    time_resource_estimation,
    resource_allocation
  ],
  verbose=True
)

d:\Anaconda\envs\openai-4o\lib\site-packages\pydantic\_internal\_config.py:291: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.9/migration/


## Crew's Inputs

In [14]:
from IPython.display import display, Markdown

project = 'RAG (Retrieval-Augmented Generation) LLM'
industry = 'AI/Technology'
project_objectives = 'Build a RAG-based system to generate contextually accurate and detailed responses by augmenting a language model with external information retrieved in real-time.'
team_members = """
- Jack White (Project Manager)
- Olivia Stone (AI Researcher)
- Ethan Clark (ML Engineer)
- Maria Green (Data Engineer)
- Sofia Patel (NLP Engineer)
- Liam Allen (QA Engineer)
"""
project_requirements = """
- Integrate a large pre-trained language model (e.g., GPT-4, T5) with a retrieval mechanism (e.g., Elasticsearch, FAISS, Pinecone)
- Implement an efficient data retrieval system to fetch contextually relevant documents or knowledge
- Use the retrieved data to augment the model's generation process for more accurate and relevant responses
- Implement document ranking and relevance scoring for retrieved data
- Ensure the system can process large volumes of unstructured text from diverse sources (web, APIs, databases)
- Fine-tune the LLM on specific use-cases, e.g., customer support, technical assistance, etc.
- Evaluate and optimize the retrieval strategy to minimize latency while ensuring accurate and diverse responses
- Test the LLM’s response quality, ensuring the output is context-aware and doesn't rely solely on training data
- Implement an interface to manage the retrieval and generation pipeline for easy user interaction
- Ensure scalability to handle multiple simultaneous user requests and queries
"""

# Format the dictionary as Markdown for a better display in Jupyter Lab
formatted_output = f"""
**Project Type:** {project}

**Project Objectives:** {project_objectives}

**Industry:** {industry}

**Team Members:**
{team_members}

**Project Requirements:**
{project_requirements}
"""

# Display the formatted output as Markdown
display(Markdown(formatted_output))


**Project Type:** RAG (Retrieval-Augmented Generation) LLM

**Project Objectives:** Build a RAG-based system to generate contextually accurate and detailed responses by augmenting a language model with external information retrieved in real-time.

**Industry:** AI/Technology

**Team Members:**

- Jack White (Project Manager)
- Olivia Stone (AI Researcher)
- Ethan Clark (ML Engineer)
- Maria Green (Data Engineer)
- Sofia Patel (NLP Engineer)
- Liam Allen (QA Engineer)


**Project Requirements:**

- Integrate a large pre-trained language model (e.g., GPT-4, T5) with a retrieval mechanism (e.g., Elasticsearch, FAISS, Pinecone)
- Implement an efficient data retrieval system to fetch contextually relevant documents or knowledge
- Use the retrieved data to augment the model's generation process for more accurate and relevant responses
- Implement document ranking and relevance scoring for retrieved data
- Ensure the system can process large volumes of unstructured text from diverse sources (web, APIs, databases)
- Fine-tune the LLM on specific use-cases, e.g., customer support, technical assistance, etc.
- Evaluate and optimize the retrieval strategy to minimize latency while ensuring accurate and diverse responses
- Test the LLM’s response quality, ensuring the output is context-aware and doesn't rely solely on training data
- Implement an interface to manage the retrieval and generation pipeline for easy user interaction
- Ensure scalability to handle multiple simultaneous user requests and queries



## Kicking off the crew

In [15]:
# The given Python dictionary
inputs = {
  'project_type': project,
  'project_objectives': project_objectives,
  'industry': industry,
  'team_members': team_members,
  'project_requirements': project_requirements
}

# Run the crew
result = crew.kickoff(
  inputs=inputs
)

[1m[95m# Agent:[00m [1m[92mThe Ultimate Project Planner[00m
[95m## Task:[00m [92mLook at the RAG (Retrieval-Augmented Generation) LLM project requirements and break them down into smaller tasks. Define what each task involves, set achievable timelines, and make sure to consider all dependencies:

- Integrate a large pre-trained language model (e.g., GPT-4, T5) with a retrieval mechanism (e.g., Elasticsearch, FAISS, Pinecone)
- Implement an efficient data retrieval system to fetch contextually relevant documents or knowledge
- Use the retrieved data to augment the model's generation process for more accurate and relevant responses
- Implement document ranking and relevance scoring for retrieved data
- Ensure the system can process large volumes of unstructured text from diverse sources (web, APIs, databases)
- Fine-tune the LLM on specific use-cases, e.g., customer support, technical assistance, etc.
- Evaluate and optimize the retrieval strategy to minimize latency while ensur

## Usage Metrics and Costs

Let’s see how much it would cost each time if this crew runs at scale.

In [16]:
import pandas as pd

costs = 0.150 * (crew.usage_metrics.prompt_tokens + crew.usage_metrics.completion_tokens) / 1_000_000
print(f"Total costs: ${costs:.4f}")

# Convert UsageMetrics instance to a DataFrame
df_usage_metrics = pd.DataFrame([crew.usage_metrics.dict()])
df_usage_metrics

Total costs: $0.0013


d:\Anaconda\envs\openai-4o\lib\site-packages\pydantic\main.py:1114: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.9/migration/


Unnamed: 0,total_tokens,prompt_tokens,completion_tokens,successful_requests
0,8525,4772,3753,4


## Result

In [17]:
result.pydantic.dict()

d:\Anaconda\envs\openai-4o\lib\site-packages\pydantic\main.py:1114: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.9/migration/


{'tasks': [{'task_name': 'Integrate a Large Pre-trained Language Model',
   'estimated_time_hours': 14.0,
   'required_resources': ['Ethan Clark (ML Engineer)']},
  {'task_name': 'Implement an Efficient Data Retrieval System',
   'estimated_time_hours': 14.0,
   'required_resources': ['Maria Green (Data Engineer)']},
  {'task_name': 'Augment Model Generation Process',
   'estimated_time_hours': 14.0,
   'required_resources': ['Olivia Stone (AI Researcher)',
    'Ethan Clark (ML Engineer)']},
  {'task_name': 'Implement Document Ranking and Relevance Scoring',
   'estimated_time_hours': 14.0,
   'required_resources': ['Sofia Patel (NLP Engineer)']},
  {'task_name': 'Process Unstructured Text from Diverse Sources',
   'estimated_time_hours': 152.0,
   'required_resources': ['Maria Green (Data Engineer)']},
  {'task_name': 'Fine-tune the LLM on Specific Use-Cases',
   'estimated_time_hours': 84.0,
   'required_resources': ['Olivia Stone (AI Researcher)',
    'Ethan Clark (ML Engineer)']},


## Inspect further

In [18]:
tasks = result.pydantic.dict()['tasks']
df_tasks = pd.DataFrame(tasks)

# Display the DataFrame as an HTML table
df_tasks.style.set_table_attributes('border="1"').set_caption("Task Details").set_table_styles(
    [{'selector': 'th, td', 'props': [('font-size', '120%')]}]
)

Unnamed: 0,task_name,estimated_time_hours,required_resources
0,Integrate a Large Pre-trained Language Model,14.0,['Ethan Clark (ML Engineer)']
1,Implement an Efficient Data Retrieval System,14.0,['Maria Green (Data Engineer)']
2,Augment Model Generation Process,14.0,"['Olivia Stone (AI Researcher)', 'Ethan Clark (ML Engineer)']"
3,Implement Document Ranking and Relevance Scoring,14.0,['Sofia Patel (NLP Engineer)']
4,Process Unstructured Text from Diverse Sources,152.0,['Maria Green (Data Engineer)']
5,Fine-tune the LLM on Specific Use-Cases,84.0,"['Olivia Stone (AI Researcher)', 'Ethan Clark (ML Engineer)']"
6,Evaluate and Optimize Retrieval Strategy,84.0,"['Sofia Patel (NLP Engineer)', 'Ethan Clark (ML Engineer)']"
7,Test the LLM’s Response Quality,84.0,['Liam Allen (QA Engineer)']
8,Implement User Interface,56.0,"['Ethan Clark (ML Engineer)', 'Maria Green (Data Engineer)']"
9,Ensure Scalability,56.0,"['Jack White (Project Manager)', 'Ethan Clark (ML Engineer)']"


### Inspecting Milestones

In [19]:
milestones = result.pydantic.dict()['milestones']
df_milestones = pd.DataFrame(milestones)

# Display the DataFrame as an HTML table
df_milestones.style.set_table_attributes('border="1"').set_caption("Task Details").set_table_styles(
    [{'selector': 'th, td', 'props': [('font-size', '120%')]}]
)

d:\Anaconda\envs\openai-4o\lib\site-packages\pydantic\main.py:1114: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.9/migration/


Unnamed: 0,milestone_name,tasks
0,Model Integration and Data Retrieval Setup,"['1', '2']"
1,Model Generation and Document Scoring,"['3', '4']"
2,Data Processing and Model Fine-tuning,"['5', '6']"
3,Evaluation and User Interface Implementation,"['7', '8', '9']"
4,Project Scalability and Conclusion,['10']


In [21]:
res = result.raw

### Extact Gann Chart 

In [25]:
import re
# Split the response into lines, and extract the relevant part (table content)
lines = res.strip().split('\n')

# Skip the first few lines to get to the table data
table_lines = lines[2:]  # Skip "Agent" and "Final Answer" header lines

# Clean up each line to extract task details
tasks_data = []
for line in table_lines:
    if line.startswith("|"):  # Only process lines starting with a table
        # Remove the initial `|` and split by `|` symbol
        columns = [col.strip() for col in line.split('|') if col.strip()]
        
        if len(columns) == 5:  # Ensure the line is correctly formatted
            tasks_data.append({
                "Task": columns[0],
                "Assigned Team Member": columns[1],
                "Start Date": columns[2],
                "End Date": columns[3],
                "Reason for Assignment": columns[4]
            })

# Create a pandas DataFrame from the extracted data
df = pd.DataFrame(tasks_data)
df["Task"] = df["Task"].apply(lambda x: re.sub(r'^\d+\.\s*', '', x))
# Display the DataFrame
print(df)

                                                Task  \
0                                               Task   
1   ------------------------------------------------   
2       Integrate a Large Pre-trained Language Model   
3       Implement an Efficient Data Retrieval System   
4                   Augment Model Generation Process   
5   Implement Document Ranking and Relevance Scoring   
6     Process Unstructured Text from Diverse Sources   
7            Fine-tune the LLM on Specific Use-Cases   
8           Evaluate and Optimize Retrieval Strategy   
9                    Test the LLM’s Response Quality   
10                          Implement User Interface   
11                                Ensure Scalability   

                                 Assigned Team Member      Start Date  \
0                                      Team Member(s)      Start Date   
1    ------------------------------------------------  --------------   
2                           Ethan Clark (ML Engineer

In [28]:
df = df.drop([1])

In [32]:
df = df.drop([0]).reset_index(drop=True)

In [33]:
df

Unnamed: 0,Task,Assigned Team Member,Start Date,End Date,Reason for Assignment
0,Integrate a Large Pre-trained Language Model,Ethan Clark (ML Engineer),2023-10-02,2023-10-16,Ethan is skilled in machine learning and has p...
1,Implement an Efficient Data Retrieval System,Maria Green (Data Engineer),2023-10-02,2023-10-16,Maria has a strong background in data engineer...
2,Augment Model Generation Process,"Olivia Stone (AI Researcher), Ethan Clark (ML ...",2023-10-16,2023-10-30,Both Olivia's and Ethan's combined skills will...
3,Implement Document Ranking and Relevance Scoring,Sofia Patel (NLP Engineer),2023-10-16,2023-10-30,"Sofia specializes in NLP tasks, making her wel..."
4,Process Unstructured Text from Diverse Sources,Maria Green (Data Engineer),2023-10-30,2023-11-20,Maria’s expertise in data handling is crucial ...
5,Fine-tune the LLM on Specific Use-Cases,"Olivia Stone (AI Researcher), Ethan Clark (ML ...",2023-10-30,2023-11-13,Both team members can draw from their previous...
6,Evaluate and Optimize Retrieval Strategy,"Sofia Patel (NLP Engineer), Ethan Clark (ML En...",2023-11-13,2023-11-20,Their collaboration ensures comprehensive opti...
7,Test the LLM’s Response Quality,Liam Allen (QA Engineer),2023-11-13,2023-11-20,Liam's testing expertise makes him the right c...
8,Implement User Interface,"Ethan Clark (ML Engineer), Maria Green (Data E...",2023-11-20,2023-11-27,The combined efforts of Ethan and Maria will e...
9,Ensure Scalability,"Jack White (Project Manager), Ethan Clark (ML ...",2023-11-27,2023-12-04,Jack's project management skills paired with E...
