In [1]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [2]:
# local LLM 
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")

In [8]:
import os

from crewai_tools import SerperDevTool, ScrapeWebsiteTool, FileReadTool, DirectoryReadTool
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv()) # important line if cannot load api key

# Getting the api keys from the .env file

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ["SERPER_API_KEY"] = os.getenv('SERPER_API_KEY')
os.environ['KAGGLE_USERNAME'] = os.getenv('KAGGLE_USERNAME')
os.environ['KAGGLE_KEY'] = os.getenv('KAGGLE_KEY')

# load tools
docs_tool = DirectoryReadTool(directory='./data')
serper_search_tool = SerperDevTool()
read_tool = FileReadTool()


In [10]:
from crewai import Agent, Task, Crew
import pandas as pd
import numpy as np
# Create an agent with code execution enabled
data_preprocessing_agent = Agent(
    role="Data Preprocessing Specialist",
    goal="Load, clean, and perform initial transformations on datasets",
    backstory="Expert in data cleaning and preprocessing using pandas and numpy",
    llm=llm,
    tools=[docs_tool, read_tool, serper_search_tool],
    allow_code_execution=True
)

# Create a task that requires code execution
data_analysis_task = Task(
    description="""
    Load the downloaded dataset from {path} using Pandas data loader, clean it, handle missing values, and perform initial data transformations using pandas, if necessary.
    """,
    expected_output='File saved successfully',
    agent=data_preprocessing_agent,
    output_file='processed_data/preprocessed_housing.csv',
    create_directory=True
)

# Create a crew and add the task
analysis_crew = Crew(
    agents=[data_preprocessing_agent],
    tasks=[data_analysis_task],
    verbose=2
)

# Execute the crew
result = analysis_crew.kickoff(inputs={'path': 'data/Housing.csv'})

print(result)



[1m[95m [2024-07-17 22:42:40][DEBUG]: == Working Agent: Data Preprocessing Specialist[00m
[1m[95m [2024-07-17 22:42:40][INFO]: == Starting Task: 
    Load the downloaded dataset from data/Housing.csv using Pandas data loader, clean it, handle missing values, and perform initial data transformations using pandas, if necessary.
    [00m
[95m 

File paths: 
-./data/Housing.csv
[00m
[95m 

Fail to read the file "./data/Housing.csv. Error: [Errno 2] No such file or directory: '"./data/Housing.csv'
[00m
[95m 

Fail to read the file "'./data/Housing.csv'. Error: [Errno 2] No such file or directory: '"\'./data/Housing.csv\''
[00m
[95m 


Search results: Title: pandas.read_csv FileNotFoundError: File b'\xe2\x80\xaa<etc ...
Link: https://stackoverflow.com/questions/42165649/pandas-read-csv-filenotfounderror-file-b-xe2-x80-xaaetc-despite-correct-pat
Snippet: I'm trying to load a .csv file using the pd.read_csv() function when I get an error despite the file path being correct and usi

KeyboardInterrupt: 