In [3]:
# Warning control
import warnings
import gc

warnings.filterwarnings('ignore')
gc.collect()

0

In [20]:
%pwd

'/Users/bhargobdeka/Desktop/Projects/GenAI/multi-agent-apps/data-science-project'

In [4]:
from crewai import Crew, Task
from textwrap import dedent

from data_agents import DataAgents


agents = DataAgents()

# Agents
data_collection_agent = agents.data_collection_agent() # data collection agent
data_preprocessing_agent = agents.data_processing_agent() # data processing agent


# Tasks
data_collection_task = Task(
  description="""
  Search for three appropriate datasets on the topic of {topic} and download one using the Kaggle Dataset Downloader.
  You can search for datasets using refined queries. Note that the Kaggle Dataset Downloader only requires one input, i.e., the URL.
  """,
  expected_output = 'Provide the full description of the downloaded dataset.',
  agent=data_collection_agent,
  # human_input=True,
)

data_preprocessing_task = Task(
  description="""
  Load the file, handle missing values, remove duplicates, and convert categorical variables to numerical values to make the dataset model-ready.
  """,
  expected_output='Processed dataset saved successfully',
  agent=data_preprocessing_agent,
  # output_file='processed_data/preprocessed_housing.csv',
  create_directory=True
)

In [None]:
from crewai.process import Process
crew = Crew(
    agents=[data_collection_agent, data_preprocessing_agent],
    tasks=[data_collection_task, data_preprocessing_task],
    process=Process.sequential,
    verbose=2
)

result = crew.kickoff(inputs={'topic': 'Housing'})

In [11]:
## Model Training

# agent
model_training_agent = agents.model_training_agent() # model training agent

# task
target_variable= input(
    dedent('price'))

model_training_task = Task(
    description=dedent(f"""
    Load the processed data from the directory. Train a Random Forest model and save the trained model.
    Note that TrainingModelTool._run() has one positional argument which is file_path and the {target_variable}
    """),
    expected_output="Model trained successfully",
    output_file='reports/training_report.txt',
    agent=model_training_agent,
    create_directory=True
)

crew = Crew(
    agents=[model_training_agent],
    tasks=[model_training_task],
    verbose=2
)

result = crew.kickoff()





[1m[95m [2024-07-20 14:54:12][DEBUG]: == Working Agent: Random Forest Model Trainer[00m
[1m[95m [2024-07-20 14:54:12][INFO]: == Starting Task: 
Load the processed data from the directory. Train a Random Forest model and save the trained model.
Note that TrainingModelTool._run() has one positional argument which is file_path and the 
[00m
[95m 

File paths: 
-processed_data/.csv
[00m
[95m 

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
13300000,7420,4,2,3,1,0,0,0,1,2,1,0
12250000,8960,4,4,4,1,0,0,0,1,3,0,0
12250000,9960,3,2,2,1,0,1,0,0,2,1,1
12215000,7500,4,2,2,1,0,1,0,1,3,1,0
11410000,7420,4,1,2,1,1,1,0,1,2,0,0
10850000,7500,3,3,1,1,0,1,0,1,2,1,1
10150000,8580,4,3,4,1,0,0,0,1,2,1,1
10150000,16200,5,3,2,1,0,0,0,0,0,0,2
9870000,8100,4,1,2,1,1,1,0,1,2,1,0
9800000,5750,3,2,4,1,1,0,0,1,1,1,2
9800000,13200,3,1,2,1,0,1,0,1,2,1,0
9681000,6000,4,3,2,1,1,1,1,0,2,0,1
9310000,6550,4,2,2,1,0,0,0,1,1,1,1
9

In [13]:

# model evaluation

# agent
model_evaluation_agent = agents.model_evaluation_agent()

# task
target_variable= input(
    dedent('price'))

model_evaluation_task = Task(
    description=dedent(f"""Evaluate the saved Random Forest model using the test data and generate a detailed evaluation report.
                       Note that the ModelEvaluationTool._run() has one positional argument which is the {target_variable}.
    """),
    agent=model_evaluation_agent,
    expected_output="Model evaluated successfully and the evaluation report is generated",
    output_file='reports/evaluation_report.txt',
    create_directory=True
)

crew = Crew(
    agents=[model_evaluation_agent],
    tasks=[model_evaluation_task],
    verbose=2
)

result = crew.kickoff()



[1m[95m [2024-07-20 15:21:37][DEBUG]: == Working Agent: Model Evaluator[00m
[1m[95m [2024-07-20 15:21:37][INFO]: == Starting Task: Evaluate the saved Random Forest model using the test data and generate a detailed evaluation report.
                       Note that the ModelEvaluationTool._run() has one positional argument which is the .
[00m
[95m 

Model Evaluation Report:

Root Mean Squared Error (RMSE): $1400765.84
Mean Absolute Error (MAE): $1026699.69
R-squared Score: 0.6118

Residual Statistics:
Mean Residual: $209037.30
Std Deviation of Residuals: $1391478.26
Min Residual: $-3065650.00
Max Residual: $5664330.00

Top 5 Important Features:
- area: 0.4704
- bathrooms: 0.1527
- airconditioning: 0.0623
- parking: 0.0566
- stories: 0.0548

[00m
[1m[92m [2024-07-20 15:21:40][DEBUG]: == [Model Evaluator] Task output: Model evaluated successfully and the evaluation report is generated. Here is the complete content of the evaluation report:

Model Evaluation Report:

Root Mean S

In [13]:
## FastAPI code generation

# agent
code_generating_agent = agents.code_generation_agent()

# task

# Create a task for the agent
code_generation_task = Task(
    description="""
    Analyze the saved Random Forest model and generate a main.py file for a FastAPI instance. 
    Return only the Python code in between ```python and ``` and comment any other generated text.
    """,
    agent=code_generating_agent,
    expected_output="Python code only for the main.py file.",
    output_file='app/main.py',
    create_directory=True
)

# Create a crew with the agent
generation_crew = Crew(
    agents=[code_generating_agent],
    tasks=[code_generation_task],
    verbose=2
)

# Run the generation crew
generation_result = generation_crew.kickoff()




[1m[95m [2024-07-20 16:37:56][DEBUG]: == Working Agent: FastAPI Code Generator[00m
[1m[95m [2024-07-20 16:37:56][INFO]: == Starting Task: 
    Analyze the saved Random Forest model and generate a main.py file for a FastAPI instance. 
    Return only the Python code in between ```python and ``` and comment any other generated text.
    [00m
[91m 

I encountered an error while trying to use the tool. This was the error: ModelEvaluationTool._run() got an unexpected keyword argument 'model_path'.
 Tool Model Evaluator accepts these inputs: Model Evaluator() - Evaluates the saved Random Forest model using the test data and generates an evaluation report
[00m
[1m[92m [2024-07-20 16:38:03][DEBUG]: == [FastAPI Code Generator] Task output: ```python
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pickle
import numpy as np

# Load the saved Random Forest model
model_path = "random_forest_model.pkl"
with open(model_path, "rb") as model_file:
    model = 