<a href="https://colab.research.google.com/github/alexfazio/crewAI-quickstart/blob/main/crewai_sequential_CSVSearchTool_quickstart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# crewai-sequential-CSVSearchTool

📂 Github repo:

Simplified and tested template of a **sequential** CrewAI crew performing **web searches**.


In [1]:
# @title 👨‍🦯 Run this cell to hide all warnings (optional)
# Warning control
import warnings
warnings.filterwarnings('ignore')

# To avoid the restart session warning in Colab, exclude the PIL and
# pydevd_plugins packages from being imported. This is fine because
# we didn't execute the code in the kernel session afterward.

# import sys
# sys.modules.pop('PIL', None)

In [2]:
# @title ⬇️ Install project dependencies by running this cell
# @markdown  🔄 Restart the session and rerun the cell **if Colab requires it**.

# %pip install crewai_tools  --quiet
# print("---")
# %pip show crewAI crewai_tools

In [3]:
import re

PROJECT_ID = !(gcloud config get-value core/project)
PROJECT_ID = PROJECT_ID[0]

SVC_ACC = !(gcloud config get-value core/account)
SVC_ACC = SVC_ACC[0]

PROJECT_NUMBER=str(re.search(r'\d+', SVC_ACC).group())

LOCATION="asia-southeast1"

FOLDER_NAME="."

In [4]:
# @title 🔑 Input API Key by running this cell

from crewai import Agent, Task, Crew, Process
from crewai_tools import tool
from crewai_tools.tools import FileReadTool
import os, requests, re, mdpdf, subprocess

from langchain_community.vectorstores import Chroma
from langchain_community.tools import DuckDuckGoSearchRun

import os
from getpass import getpass
from crewai import Agent, Task, Crew, Process
from textwrap import dedent

# Check if the 'output-files' directory exists, and create it if it doesn't
if not os.path.exists('output-files'):
    os.makedirs('output-files')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/jupyter/.config/sagemaker/config.yaml


In [5]:
from crewai import Agent, Task, Crew, Process
from crewai_tools import tool
# from langchain_vertexai import ChatGemini
from crewai_tools.tools import FileReadTool
import os, requests, re, mdpdf, subprocess
from vertexai.preview.vision_models import ImageGenerationModel
from langchain_google_vertexai import ChatVertexAI
import uuid, os

# Initialize Gemini LLM
llm = ChatVertexAI(
    model_name='gemini-1.5-pro-002', #"gemini-1.0-pro-002", # Replace with your desired Gemini model
    project_id=os.getenv(PROJECT_ID), # Your Vertex AI project ID
    location="us-central1", # Your Vertex AI location
)


In [6]:
# @title ⬇️ Download Sample .csv Dataset

import os
import requests

# Specify the folder path where you want to save the file
folder_path = 'dataset'

# Create the folder if it doesn't exist
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Sample .csv data from IMDB.com
url = 'https://phidata-public.s3.amazonaws.com/demo_data/IMDB-Movie-Data.csv'
response = requests.get(url)

# Specify the file path including the folder
csv_file_path = os.path.join(folder_path, 'IMDB-Movie-Data.csv')

with open(csv_file_path, 'wb') as file:
    file.write(response.content)

print(f"CSV file downloaded and saved to: {csv_file_path}")

CSV file downloaded and saved to: dataset/IMDB-Movie-Data.csv


In [9]:
# @title 🧾 Instantiate `CSVSearchTool` with a `.csv` File

# @markdown By default, the tool uses OpenAI for both embeddings and summarization.

# @markdown To customize the model, you can use a config dictionary. See how [here](https://docs.crewai.com/tools/CSVSearchTool/#installation).

from crewai import Agent, Task, Crew, Process
from crewai_tools import CSVSearchTool

# Instantiate tools

# This tool is used to perform a RAG (Retrieval-Augmented Generation) search within a CSV file's content.

# Initialize the tool with a specific CSV file. This setup allows the agent to only search the given CSV file.

csv_search_tool = CSVSearchTool(csv_file_path,     config=dict(
        llm=dict(
            provider="vertexai", # or google, openai, anthropic, llama2, ...
            config=dict(
                model="gemini-pro",
                temperature=0.5,
                # top_p=1,
                # stream=true,
            ),
        ),           
        embedder=dict(
            provider="vertexai",
            config=dict(
                model="textembedding-gecko",
                # task_type="retrieval_document",
                # title="Embeddings",
            ),
        ),
    )
)

# OR

# Initialize the tool without a specific CSV file. Agent  will need to provide the CSV path at runtime.

# tool = CSVSearchTool()

Inserting batches in chromadb: 100%|██████████| 21/21 [00:21<00:00,  1.04s/it]


## Define Agents
In CrewAI, agents are autonomous entities designed to perform specific roles and achieve particular goals. Each agent uses a language model (LLM) and may have specialized tools to help execute tasks.

In [11]:
# @title 🕵🏻 Define your agents

# from langchain_groq import ChatGroq
# ↑ Uncomment to use Groq's API
# from langchain_anthropic import ChatAnthropic
# ↑ Uncomment to use Anthropic's API

agent_1 = Agent(
    role=dedent((
        """
        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.
        """)), # Think of this as the job title
    backstory=dedent((
        """
        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.
        """)), # This is the backstory of the agent, this helps the agent to understand the context of the task
    goal=dedent((
        """
        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.
        """)), # This is the goal that the agent is trying to achieve
    tools=[csv_search_tool],
    allow_delegation=False,
    verbose=True,
    # ↑ Whether the agent execution should be in verbose mode
    max_iter=3,
    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)
    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model
    llm=llm
    # ↑ uncomment to use OpenAI API + "gpt-4o"
    # llm=ChatGroq(temperature=0.8, model_name="mixtral-8x7b-32768"),
    # ↑ uncomment to use Groq's API + "llama3-70b-8192"
    # llm=ChatGroq(temperature=0.6, model_name="llama3-70b-8192"),
    # ↑ uncomment to use Groq's API + "mixtral-8x7b-32768"
    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),
    # ↑ uncomment to use Anthropic's API + "claude-3-opus-20240229"
)

agent_2 = Agent(
    role=dedent((
        """
        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.
        """)), # Think of this as the job title
    backstory=dedent((
        """
        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.
        """)), # This is the backstory of the agent, this helps the agent to understand the context of the task
    goal=dedent((
        """
        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.
        """)), # This is the goal that the agent is trying to achieve
    tools=[csv_search_tool],
    allow_delegation=False,
    verbose=True,
    # ↑ Whether the agent execution should be in verbose mode
    max_iter=3,
    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)
    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model
    llm=llm
    # ↑ uncomment to use OpenAI API + "gpt-4o"
    # llm=ChatGroq(temperature=0.8, model_name="mixtral-8x7b-32768"),
    # ↑ uncomment to use Groq's API + "llama3-70b-8192"
    # llm=ChatGroq(temperature=0.6, model_name="llama3-70b-8192"),
    # ↑ uncomment to use Groq's API + "mixtral-8x7b-32768"
    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),
    # ↑ uncomment to use Anthropic's API + "claude-3-opus-20240229"
)

agent_3 = Agent(
    role=dedent((
        """
        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.
        """)), # Think of this as the job title
    backstory=dedent((
        """
        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.
        """)), # This is the backstory of the agent, this helps the agent to understand the context of the task
    goal=dedent((
        """
        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.
        """)), # This is the goal that the agent is trying to achieve
    tools=[csv_search_tool],
    allow_delegation=False,
    verbose=True,
    # ↑ Whether the agent execution should be in verbose mode
    max_iter=3,
    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)
    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model
    llm=llm
    # ↑ uncomment to use OpenAI API + "gpt-4o"
    # llm=ChatGroq(temperature=0.8, model_name="mixtral-8x7b-32768"),
    # ↑ uncomment to use Groq's API + "llama3-70b-8192"
    # llm=ChatGroq(temperature=0.6, model_name="llama3-70b-8192"),
    # ↑ uncomment to use Groq's API + "mixtral-8x7b-32768"
    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),
    # ↑ uncomment to use Anthropic's API + "claude-3-opus-20240229"
)

NameError: name 'ChatOpenAI' is not defined

## Define Tasks
Tasks in CrewAI are specific assignments given to agents, detailing the actions they need to perform to achieve a particular goal. Tasks can have dependencies and context, and can be executed asynchronously to ensure an efficient workflow.

In [None]:
# @title 📝 Define your tasks

import datetime

task_1 = Task(
    description=dedent((
        """
        A clear, concise statement of what the task entails.
        ---
        VARIABLE 1: "{var_1}"
        VARIABLE 2: "{var_2}"
        VARIABLE 3: "{var_3}"
        Add more variables if needed...
        """)),
    expected_output=dedent((
        """
        A detailed description of what the task's completion looks like.
        """)),
    agent=agent_1,
    output_file=f'output-files/agent_1-output_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.md'
    # ↑ The output of each task iteration will be saved here
)

task_2 = Task(
    description=dedent((
        """
        A clear, concise statement of what the task entails.
        ---
        VARIABLE 1: "{var_1}"
        VARIABLE 2: "{var_2}"
        VARIABLE 3: "{var_3}"
        Add more variables if needed...
        """)),
    expected_output=dedent((
        """
        A detailed description of what the task's completion looks like.
        """)),
    agent=agent_2,
    context=[task_1],
    # ↑ specify which task's output should be used as context for subsequent tasks
    output_file=f'output-files/agent_2-output_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.md'
    # ↑ The output of each task iteration will be saved here
)

task_3 = Task(
    description=dedent((
        """
        A clear, concise statement of what the task entails.
        ---
        VARIABLE 1: "{var_1}"
        VARIABLE 2: "{var_2}"
        VARIABLE 3: "{var_3}"
        Add more variables if needed...
        """)),
    expected_output=dedent((
        """
        A detailed description of what the task's completion looks like.
        """)),
    agent=agent_3,
    context=[task_2],
    # ↑ specify which task's output should be used as context for subsequent tasks
    output_file=f'output-files/agent_3-output_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.md'
    # ↑ The output of each task iteration will be saved here
)

In [None]:
# @title ⌨️ Define any variables you have and input them
print("## Welcome to the YOUR_CREW_NAME")
print('-------------------------------------------')
var_1 = input("What is the  to pass to your crew?\n"),
var_2 = input("What is the  to pass to your crew?\n"),
var_3 = input("What is the  to pass to your crew?\n"),
print("-------------------------------")

In [None]:
# @title 🚀 Get your crew to work!
def main():
    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[agent_1, agent_2, agent_3],
        tasks=[task_1, task_2, task_3],
        verbose=True,  # You can set it to True or False
        # ↑ indicates the verbosity level for logging during execution.
        process=Process.sequential
        # ↑ the process flow that the crew will follow (e.g., sequential, hierarchical).
    )

    inputs = {
    "var_1": var_1,
    "var_2": var_2,
    "var_3": var_3
    }

    result = crew.kickoff(inputs=inputs)
    print("\n\n########################")
    print("## Here is your custom crew run result:")
    print("########################\n")
    print(result)
    
    return result

if __name__ == "__main__":
  result = main()

In [None]:
# @title 🖥️ Display the results of your crew as markdown
from IPython.display import display, Markdown

markdown_text = result.raw  # Adjust this based on the actual attribute

# Display the markdown content
display(Markdown(markdown_text))