In [1]:
from config import agents_config, tasks_config

In [2]:
print(agents_config.keys())

dict_keys(['schema_analyzer', 'query_builder', 'data_analyst'])


In [3]:
tasks_config['schema_analysis_task'].keys()

dict_keys(['description', 'expected_output', 'agent', 'context'])

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')
groq_key = os.getenv('GROQ_API_KEY')

In [5]:
from langchain_groq import ChatGroq

tool_llm = ChatGroq(model='groq/llama3-groq-70b-8192-tool-use-preview',
                    api_key=groq_key,
                    temperature=0.3,
                    max_tokens=8000)

In [6]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-4o-mini',
                 api_key=openai_api_key,
                 temperature=0.0,
                 max_tokens=16384)

In [7]:
mongodb_uri = os.getenv('mongodb_uri')
database_name = "appetit_db"
collection_names = [
    "categories",
    "delivery_fees",
    "locations",
    "merchant_logs",
    "merchant_settings",
    "orders",
    "product_categories",
    "products",
    "stores",
    "store_tables",
    "users"
]
user_query = "What are sales for Mac n Cheese sales last 7 months?"

In [9]:
from crewai import Agent, Task, Crew
from tools.mongodb_tools import analyze_mongodb_schema
from tools.python_executor import execute_python_code
from tools.items_finder import filter_items
from tools.schema_analysis import analyze_local_schema

In [10]:
# python_tool = PythonREPL()
# 
# code_tool = Tool(
#   name="Python REPL.",
#   func=python_tool.run,
#   description="Simulates a standalone Python REPL.",
# )

In [11]:
schema_analyzer = Agent(goal=agents_config['schema_analyzer']['goal'],
                        role=agents_config['schema_analyzer']['role'],
                        backstory=agents_config['schema_analyzer']['backstory'],
                        # context=agents_config['schema_analyzer']['context'],
                        verbose=True, allow_deligation=True,
                        allow_code_execution=True,
                        tools=[analyze_local_schema, filter_items],
                        llm=llm
                        )

schema_analysis_task = Task(description=tasks_config['schema_analysis_task']['description'],
                            expected_output=tasks_config['schema_analysis_task']['expected_output'],
                            agent=schema_analyzer)

In [12]:
query_builder = Agent(goal=agents_config['query_builder']['goal'],
                      role=agents_config['query_builder']['role'],
                      backstory=agents_config['query_builder']['backstory'],
                      # context=agents_config['query_builder']['context'],
                      # tools=[SerperDevTool(n_results=4)],
                      allow_execution=True,
                      llm=llm,
                      verbose=True, allow_deligation=True)

query_building_task = Task(description=tasks_config['query_building_task']['description'],
                           expected_output=tasks_config['query_building_task']['expected_output'],
                           agent=query_builder,
                           context=[schema_analysis_task])

In [13]:
data_analyst = Agent(goal=agents_config['data_analyst']['goal'],
                     role=agents_config['data_analyst']['role'],
                     backstory=agents_config['data_analyst']['backstory'],
                     verbose=True, allow_deligation=True,
                     llm=llm,
                     # allow_code_execution=True,
                     tools=[execute_python_code]
                     )

data_analysis_task = Task(description=tasks_config['data_analysis_task']['description'],
                          expected_output=tasks_config['data_analysis_task']['expected_output'],
                          agent=data_analyst,
                          context=[schema_analysis_task, query_building_task])


In [14]:
crew = Crew(
            agents=[schema_analyzer, 
                    query_builder, 
                    data_analyst],
            tasks=[
                schema_analysis_task, 
                   query_building_task, 
                   data_analysis_task
            ],
            # process=Process.hierarchical,
            # manager_llm=schema_analyzer.llm,
            verbose=True,
        )

result = crew.kickoff(inputs={'user_query': user_query,
                              'mongodb_uri': mongodb_uri,
                              'database_name': database_name,
                              'collection_names': collection_names})

[1m[95m# Agent:[00m [1m[92mExpert MongoDB Schema Analyst and Python Developer[00m
[95m## Task:[00m [92mAnalyze the MongoDB collections of the 'Order Appetit' application to identify the necessary collections and fields required to address the following user query:
"What are sales for Mac n Cheese sales last 7 months?"

**Context:** - 'Order Appetit' is a local food delivery app in Buffalo. - The associated database is named 'appetit_db'. - Available collections are:
  ['categories', 'delivery_fees', 'locations', 'merchant_logs', 'merchant_settings', 'orders', 'product_categories', 'products', 'stores', 'store_tables', 'users']
- **MongoDB URI**: mongodb://UB_6872023:UB_2024%21@atlas-sql-643e3030d126783c73656b9e-qrodv.a.query.mongodb.net/?ssl=true&authSource=admin&appName=atlas-sql-643e3030d126783c73656b9e
**Instructions:** 1. **Identify Relevant Collections and Fields**:
   - From the provided list, determine which collections and fields are necessary to answer the user query.





[1m[95m# Agent:[00m [1m[92mSenior Data Analyst Specializing in MongoDB Data Interpretation[00m
[95m## Thought:[00m [92mI need to execute the provided Python code to run the MongoDB query that retrieves sales data for "Mac n Cheese" and its variations over the last 7 months. This will allow me to gather the necessary information to present in a detailed, granular, and tabular format.[00m
[95m## Using tool:[00m [92mPython Code Executor[00m
[95m## Tool Input:[00m [92m
"{\"code\": \"from pymongo import MongoClient\\nfrom datetime import datetime, timedelta\\nimport json\\n\\n# MongoDB connection\\nclient = MongoClient(\\\"mongodb://UB_6872023:UB_2024%21@atlas-sql-643e3030d126783c73656b9e-qrodv.a.query.mongodb.net/?ssl=true&authSource=admin&appName=atlas-sql-643e3030d126783c73656b9e\\\")\\ndb = client['appetit_db']\\n\\n# Define the date range for the last 7 months\\nend_date = datetime.now()\\nstart_date = end_date - timedelta(days=7*30)  # Approximation of 7 months\\n\\