In [1]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [6]:
from crewai import Agent, Task, Crew
from crewai_tools import BaseTool
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
# from xgboost import XGBRegressor

class TrainingModelTool(BaseTool):
    name: str = "XGBoost Model Trainer"
    description: str = "Trains an XGBoost model for house price prediction"

    def _run(self, data_path):
        # Load and prepare data
        data = pd.read_csv(data_path)
        X = data.drop('price', axis=1)
        y = data['price']

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Scale the features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train XGBoost model
        model = RandomForestRegressor(random_state=42)
        model.fit(X_train_scaled, y_train)

        # Make predictions
        y_pred = model.predict(X_test_scaled)

        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)

        # Feature importance
        feature_importance = pd.DataFrame({
            'feature': X.columns,
            'importance': model.feature_importances_
        }).sort_values('importance', ascending=False)

        report = f"XGBoost Model Training Report:\n\n"
        report += f"Root Mean Squared Error: ${rmse:.2f}\n"
        report += f"R-squared Score: {r2:.4f}\n\n"
        report += "Top 5 Important Features:\n"
        for _, row in feature_importance.head().iterrows():
            report += f"- {row['feature']}: {row['importance']:.4f}\n"

        return report

model_training_tool = TrainingModelTool()

In [7]:
import os
from langchain_openai import ChatOpenAI

# local LLM 
# llm = Ollama(model="llama3")

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

## LLM model
llm = ChatOpenAI(model="gpt-4o", temperature=0)

In [None]:
# Create the XGBoost model training agent
model_training_agent = Agent(
    role="Random Forest Model Trainer",
    goal="Train an Random Forest model to predict house prices accurately",
    backstory="You are an expert in machine learning, specializing in Random Forest for regression tasks.",
    tools=[model_training_tool],
    llm=llm
)

# Create a task for the agent
model_training_task = Task(
    description="Train an XGBoost model using the housing dataset and provide a performance report",
    expected_output="Model trained successfully",
    agent=model_training_agent
)

# Create a crew with the agent
crew = Crew(
    agents=[model_training_agent],
    tasks=[model_training_task]
)

# Run the crew
result = crew.kickoff()
print(result)