# BD Agent Notebook

Notebook for BD Agent. Let's do this starting from search & summary node as tool to call, BD agent to decide what tools to use and advisor chain

## Prepare data for BD Agents (Schema Context, Business Profile)

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import os
import glob

import sys

from dotenv import load_dotenv
load_dotenv()

_path = os.getcwd()
while "graphs" not in os.listdir(_path):

	_path = os.path.dirname(_path)

	if _path.replace("/", "").strip() == "":

		raise FileNotFoundError()
 
	if _path not in sys.path:

		sys.path.append(_path)

		print(f"Appending {_path} to path")

from graphs.answer_graph import build_answer_graph
from graphs.understand_graph import build_understand_graph  # if needed in later phases
from graphs.state import AgentState

import nest_asyncio
nest_asyncio.apply()

from utils.scraper import scrape_urls
from utils.sanitize import sanitize

from logger_config import setup_logger
logger = setup_logger()
logger.info("StratPilot started.")

Appending /mnt/d/Agentic Hackathon/agentic-app/workspace to path
Appending /mnt/d/Agentic Hackathon/agentic-app to path


2025-04-26 20:13:32,942 - stratpilot - INFO - StratPilot started.


In [2]:
#Prepare data (Read)

uploaded_files = glob.glob("/mnt/d/Agentic Hackathon/agentic-app/data/coffee-shop-sample-data/*")
datasets = {}
care = ["customer", "201904 sales reciepts", "product"]

for file in uploaded_files:
    if not any(c in file for c in care):
        continue

    df = pd.read_csv(file)
    dataset_label = os.path.basename(file).split(".")[0]

    inferred = {col: col for col in df.columns}
    column_descriptions = {}
    for col, guess in inferred.items():
        column_descriptions[col] = f"This column likely represents {guess}."

    datasets[sanitize(dataset_label)] = {
        "data": df,
        "description": dataset_label,
        "column_descriptions": column_descriptions
    }

all_dataframes = {}
all_column_schemas = {}

for dataset_name, bundle in datasets.items():
    description = bundle.get("description", "").strip()
    df = bundle["data"]
    column_descriptions = bundle.get("column_descriptions", {})

    all_dataframes[dataset_name] = df
    all_column_schemas[dataset_name] = column_descriptions

business_profile = {
  'type': 'Cafe',
  'details': 'Independent neighborhood cafe located in a busy urban area, serving specialty coffee and light meals. Targets young professionals and students. Medium foot traffic, strong competition from chain coffee shops. Focused on in-store experience and Instagram-friendly ambiance. Operates 7 days a week with peak hours in the morning and late afternoon. Interested in increasing repeat customers and promoting seasonal items.'
}

In [3]:
#Run business understanding graph
#Explore dataset agentically
state = AgentState(
    business_profile=business_profile,
    datasets={},
    new_datasets=datasets,
    schema_context="",
    explored_datasets=[],
    memory_log=[]
)

understand_graph = build_understand_graph()
result = understand_graph.invoke(state)

state = AgentState(**result)

## Try with search tools

In [4]:
from langchain_tavily import TavilySearch
from utils.scraper import postprocess_tavily

from agents.llm_config import bd_llm
from typing import List, Dict, Any
from agents.bd_agent.prompt.web_summary_prompt import web_summary_prompt, WebSummaryOutput
from langchain_core.tools import tool

# Initialize Tavily Search Tool
tavily_search_tool = TavilySearch(
    max_results=5,
    topic="general",
)

args = {"query": "What's a trendy menu now in coffee shop?"}

web_summary_chain = web_summary_prompt | bd_llm.with_structured_output(WebSummaryOutput)

In [5]:
from agents.bd_agent.graph.tools import search_summary

queries = [
    "What's a trendy drink now in coffee shop?",
    "What bakery is popular now in a cafe?",]

result = search_summary.invoke({"queries": queries, "show_progress": True})

100%|██████████| 2/2 [03:19<00:00, 99.80s/it] 


In [6]:
result

{'messages': "Question: What's a trendy drink now in coffee shop?\nSummaries: Some of the trendy drinks in coffee shops in 2024 include Sea Salt Latte, Dalgona Coffee (whipped coffee), Blue Butterfly Pea Tea, Cold Brew coffee, Iced Matcha Cortado, Nitro Cold Brews, Snapp-chilled Coffee, and Mushroom Coffee. These drinks are popular for their unique flavors, textures, and health benefits, making them fashionable choices among coffee lovers this year.\n  - A trendy drink now in coffee shops is cold coffee, especially cold brew and creative cold brew variations like draft cold brew combined with draft latte. Ready-to-drink cold espresso and non-alcoholic coffee cocktails are also gaining popularity, along with superfood-infused coffees featuring ingredients like turmeric, matcha, and adaptogenic mushrooms. Cafes are focusing on innovative and specialty drinks to offer unique experiences to customers.\n  - Trendy drinks in coffee shops right now include cold brew coffee, known for its smoo

## Understand ToolNode and How the ReACT agent would call it

In [None]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.tools import tool

from langgraph.prebuilt import ToolNode

@tool
def get_weather(location: str):
    """Call to get the current weather."""
    if location.lower() in ["sf", "san francisco"]:
        return {"messages": "It's 60 degrees and foggy.", "args": "dummy"}
    else:
        return "It's 90 degrees and sunny."


@tool
def get_coolest_cities():
    """Get a list of coolest cities"""
    return "nyc, sf"

tools = [get_weather, get_coolest_cities]
tool_node = ToolNode(tools)

message_with_single_tool_call = AIMessage(
    content="",
    tool_calls=[
        {
            "name": "get_weather",
            "args": {"location": "sf"},
            "id": "tool_call_id",
            "type": "tool_call",
        }
    ],
)

tmp = tool_node.invoke({"messages": [message_with_single_tool_call]})

In [18]:
tmp

{'messages': [ToolMessage(content='["It\'s 60 degrees and foggy.", {"args": "dummy"}]', name='get_weather', tool_call_id='tool_call_id')]}

In [9]:
tool_result = tmp["messages"][0]
actual_tool_result = eval(tool_result.content)
tool_result.content = actual_tool_result.pop("messages")


In [10]:
from IPython.display import display

display(tool_result)
display(actual_tool_result)

ToolMessage(content="It's 60 degrees and foggy.", name='get_weather', tool_call_id='tool_call_id')

{'args': 'dummy'}

In [15]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph import StateGraph
from langgraph.graph.message import AnyMessage, add_messages
from operator import add

# Step 1: Define State
class BDState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    data_insights: Annotated[list[str], add]
    search_insights: Annotated[list[str], add]

# Step 2: Define Node Functions
def extract_data_insights(state: BDState) -> dict:
    print("🔍 Extracting data insights...")
    return {
        "data_insights": ["Sales increased 15% in Q1."]
    }

def search_case_studies(state: BDState) -> dict:
    print("🌐 Searching external case studies...")
    return {
        "search_insights": ["Bundling led to 25% more purchases."]
    }

def respond_to_user(state: BDState) -> dict:
    print("💬 Responding to user...")
    msg = AIMessage(content="Here are the insights I've found.")
    return {
        "messages": [msg]
    }

# Step 3: Create Graph
builder = StateGraph(BDState)

builder.add_node("extract_data", extract_data_insights)
builder.add_node("search_case_studies", search_case_studies)
builder.add_node("respond", respond_to_user)

# You can run in sequence or parallel (we'll do sequential for simplicity)
builder.set_entry_point("extract_data")
builder.add_edge("extract_data", "search_case_studies")
builder.add_edge("search_case_studies", "respond")

graph = builder.compile()

# Step 4: Run Graph
initial_state = {
    "messages": [HumanMessage(content = "Hello World")],
    "data_insights": [],
    "search_insights": []
}

final_state = graph.invoke(initial_state)

# Step 5: Show Final State
print("\n📊 Final State:")
print(final_state)

🔍 Extracting data insights...
🌐 Searching external case studies...
💬 Responding to user...

📊 Final State:
{'messages': [HumanMessage(content='Hello World', additional_kwargs={}, response_metadata={}, id='a02c8247-de8f-4952-b617-375a7374fda8'), AIMessage(content="Here are the insights I've found.", additional_kwargs={}, response_metadata={}, id='4b9a466b-48b7-4544-9c07-c52968619a54')], 'data_insights': ['Sales increased 15% in Q1.'], 'search_insights': ['Bundling led to 25% more purchases.']}


## Try with actual BD Agents

What it needs to do is -> Select tool calls. The agent can either call ask_da, do search_and_summarise, advice or answer.



In [7]:
from langchain_core.messages import HumanMessage

user_prompt = "What are trendy menus in Cafe recently"

from agents.bd_agent.graph.state import BDState

bd_state = BDState({"messages": [HumanMessage(content =  f"<question>{user_prompt}</question>")], "schema_context": state.schema_context, "business_profile": state.business_profile})

from agents.bd_agent.graph.nodes import call_model
result = call_model(bd_state)
result["messages"].tool_calls[0]["args"]["queries"] = result["messages"].tool_calls[0]["args"]["queries"][:2]
bd_state["messages"] += [result["messages"]]

In [9]:
from agents.bd_agent.graph.nodes import route_to_tools, call_tools

route_to_tools(bd_state)

'tools'

In [11]:
search_result = call_tools(bd_state)

{'name': 'search_summary', 'args': {'thought': 'To identify trendy menus in cafes recently, I will search for current popular cafe menu trends, focusing on specialty coffee and light meals, as well as seasonal and Instagram-friendly items that attract young professionals and students. This will help provide relevant and up-to-date insights for the cafe business.', 'queries': ['trendy cafe menus 2024', 'popular specialty coffee drinks 2024'], 'graph_state': {'messages': [HumanMessage(content='<question>What are trendy menus in Cafe recently</question>', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_smql0Swu3JSn1e2mE7rC45uH', 'function': {'arguments': '{"thought":"To identify trendy menus in cafes recently, I will search for current popular cafe menu trends, focusing on specialty coffee and light meals, as well as seasonal and Instagram-friendly items that attract young professionals and students. This will help provide 

100%|██████████| 2/2 [03:20<00:00, 100.43s/it]

[ToolMessage(content='{"messages": "Question: trendy cafe menus 2024\\nSummaries: Trendy cafe menus in 2024 feature several key trends: low- and no-alcohol drinks crafted with sophistication; increased snacking options blurring traditional meal times; immersive dining experiences with interactive and elevated presentations; the rise of Korean cuisine with ingredients like gochujang and kimchi and interactive dining formats; a shift towards mid-week and earlier dining times; and the popular use of sweet heat flavors such as hot honey and sweet-spicy sauces. These trends reflect consumer preferences for mindful drinking, convenience, experiential dining, global flavors, and bold tastes.\\n  - The top trendy cafe menu trends for 2024 include: 1) Flavor Shock - bold fusion and eclectic cooking styles; 2) Plant-Powered Proteins - flexitarian dishes focusing on plant-rich proteins; 3) Local Abundance - using and celebrating locally sourced ingredients; 4) Low-Waste Menus - creative use of in




In [54]:
tool_node

tools(tags=None, recurse=True, explode_args=False, func_accepts_config=True, func_accepts={'store': ('__pregel_store', None)}, tools_by_name={'search_summary': StructuredTool(name='search_summary', description='Search and summarize web content for a list of queries.', args_schema=<class 'langchain_core.utils.pydantic.search_summary'>, func=<function search_summary at 0x7ff7af92cd30>)}, tool_to_state_args={'search_summary': {'graph_state': None}}, tool_to_store_arg={'search_summary': None}, handle_tool_errors=True, messages_key='messages')

## Build BD Agent

In [None]:
from langgraph.graph import StateGraph
from agents.bd_agent.graph.nodes import call_model, call_tools, route_to_tools
from agents.bd_agent.graph.state import BDState

class BDAgent(object):
    def __init__(self):
        super().__init__()
        self.graph = self.create_graph()
        
    def create_graph(self):
        workflow = StateGraph(AgentState)
        workflow.add_node('agent', call_model)
        workflow.add_node('tools', call_tools)

        workflow.add_conditional_edges('agent', route_to_tools)

        workflow.add_edge('tools', 'agent')
        workflow.set_entry_point('agent')
        return workflow.compile()
    
bd_agent = BDAgent()

In [6]:
from agents.bd_agent.graph.state import BDState
from langchain_core.messages import HumanMessage


user_prompt = "What are the trendy menu I should add to my cafe's"
bd_state = BDState({"messages": [HumanMessage(content =  f"<question>{user_prompt}</question>")], "schema_context": state.schema_context, "business_profile": state.business_profile})

In [9]:
bd_state

{'messages': [HumanMessage(content="<question>What are the trendy menu I should add to my cafe's</question>", additional_kwargs={}, response_metadata={})],
 'schema_context': '📁 `df_201904_sales_reciepts` — 201904 sales reciepts\nLoaded datasets variable: df_201904_sales_reciepts_df\nDataframe columns:\n- `transaction_id`\n  • Description: This column likely represents transaction_id.\n  • Type: int64\n  • Nulls: 0\n  • Unique: 4203\n  • Min: 1.00, Max: 4203.00\n  • Sample: [7, 11, 19]\n- `transaction_date`\n  • Description: This column likely represents transaction_date.\n  • Type: object\n  • Nulls: 0\n  • Unique: 29\n  • Sample: [\'2019-04-01\', \'2019-04-02\', \'2019-04-03\']\n- `transaction_time`\n  • Description: This column likely represents transaction_time.\n  • Type: object\n  • Nulls: 0\n  • Unique: 26074\n  • Sample: [\'12:04:43\', \'15:54:39\', \'14:34:59\']\n- `sales_outlet_id`\n  • Description: This column likely represents sales_outlet_id.\n  • Type: int64\n  • Nulls: 0

In [8]:
bd_agent.graph.invoke({"state":bd_state})

2025-04-26 20:15:08,401 - stratpilot - INFO - [Current State] []


KeyError: 'schema_context'