# BD Agent Notebook

Notebook for BD Agent. Let's do this starting from search & summary node as tool to call, BD agent to decide what tools to use and advisor chain

## Prepare data for BD Agents (Schema Context, Business Profile)

In [5]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import os
import glob

import sys

from dotenv import load_dotenv
load_dotenv()

_path = os.getcwd()
while "graphs" not in os.listdir(_path):

	_path = os.path.dirname(_path)

	if _path.replace("/", "").strip() == "":

		raise FileNotFoundError()
 
	if _path not in sys.path:

		sys.path.append(_path)

		print(f"Appending {_path} to path")

from graphs.answer_graph import build_answer_graph
from graphs.understand_graph import build_understand_graph  # if needed in later phases
from graphs.state import AgentState

import nest_asyncio
nest_asyncio.apply()

from utils.scraper import scrape_urls
from utils.sanitize import sanitize

from logger_config import setup_logger
logger = setup_logger()
logger.info("StratPilot started.")

2025-04-25 00:40:53,283 - stratpilot - INFO - StratPilot started.


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
#Prepare data (Read)

uploaded_files = glob.glob("/mnt/d/Agentic Hackathon/agentic-app/data/coffee-shop-sample-data/*")
datasets = {}
care = ["customer", "201904 sales reciepts", "product"]

for file in uploaded_files:
    if not any(c in file for c in care):
        continue

    df = pd.read_csv(file)
    dataset_label = os.path.basename(file).split(".")[0]

    inferred = {col: col for col in df.columns}
    column_descriptions = {}
    for col, guess in inferred.items():
        column_descriptions[col] = f"This column likely represents {guess}."

    datasets[sanitize(dataset_label)] = {
        "data": df,
        "description": dataset_label,
        "column_descriptions": column_descriptions
    }

all_dataframes = {}
all_column_schemas = {}

for dataset_name, bundle in datasets.items():
    description = bundle.get("description", "").strip()
    df = bundle["data"]
    column_descriptions = bundle.get("column_descriptions", {})

    all_dataframes[dataset_name] = df
    all_column_schemas[dataset_name] = column_descriptions

business_profile = {
  'type': 'Cafe',
  'details': 'Independent neighborhood cafe located in a busy urban area, serving specialty coffee and light meals. Targets young professionals and students. Medium foot traffic, strong competition from chain coffee shops. Focused on in-store experience and Instagram-friendly ambiance. Operates 7 days a week with peak hours in the morning and late afternoon. Interested in increasing repeat customers and promoting seasonal items.'
}

In [7]:
#Run business understanding graph
#Explore dataset agentically
state = AgentState(
    business_profile=business_profile,
    datasets={},
    new_datasets=datasets,
    schema_context="",
    explored_datasets=[],
    memory_log=[]
)

understand_graph = build_understand_graph()
result = understand_graph.invoke(state)

state = AgentState(**result)

## Try with search tools

In [19]:
from langchain_tavily import TavilySearch
from utils.scraper import postprocess_tavily

from agents.llm_config import bd_llm
from typing import List, Dict, Any
from agents.bd_agent.prompt.web_summary_prompt import web_summary_prompt, WebSummaryOutput
from langchain_core.tools import tool

# Initialize Tavily Search Tool
tavily_search_tool = TavilySearch(
    max_results=5,
    topic="general",
)

args = {"query": "What's a trendy menu now in coffee shop?"}

web_summary_chain = web_summary_prompt | bd_llm.with_structured_output(WebSummaryOutput)

In [23]:
from tqdm import tqdm

def search_summary_single(query: str) -> List[str]:
    result = tavily_search_tool.invoke({"query": query})
    scraped = postprocess_tavily(result)

    summaries = []
    for s in scraped:
        tmp = web_summary_chain.invoke({
            "question": args["query"],
            "web_title": s["title"],
            "web_content": s["page_content"]
        })

        summaries.append(tmp.summary)

    return summaries

@tool(parse_docstring=True)
def search_summary(queries: List[str], show_progress: bool = False) -> List[Dict[str, Any]]:
    """
    Search and summarize web content for a list of queries.

    Args:
        queries (List[str]): A list of search queries.
        show_progress (bool, optional): Whether to show progress bar. Defaults to False.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing web content summaries for each query. The keys include 'question' (The search query), 'summaries' (List of summaries)
    """

    if show_progress:
        queries = tqdm(queries)
        
    summaries = []
    for query in queries:
        summaries.append({
            "question": query,
            "summaries": search_summary_single(query)
        })

    return summaries

In [24]:
queries = [
    "What's a trendy drink now in coffee shop?",
    "What bakery is popular now in a cafe?",]

result = search_summary({"queries": queries, "show_progress": True})

100%|██████████| 2/2 [03:40<00:00, 110.32s/it]


In [25]:
result

[{'question': "What's a trendy drink now in coffee shop?",
  'summaries': ['Trendy menu items in coffee shops for 2024 include hot drinks like Sea Salt Latte, Dalgona Coffee (whipped coffee), and Blue Butterfly Pea Tea. Popular cold or alcoholic trendy drinks include various Cold Brew styles, Iced Matcha Cortado, Nitro Cold Brews, Snapp-chilled™ Coffee, and Mushroom Coffee. These drinks reflect current consumer interests in unique flavors, textures, and health-conscious options.',
   'Trendy coffee shop menus in 2025 feature smart espresso machines and tech-driven brewing methods, a surge in ready-to-drink cold coffee and non-alcoholic coffee cocktails, and a strong focus on cold coffee options favored by younger consumers. Superfood-infused coffees with ingredients like turmeric and mushrooms are gaining popularity, alongside creative and customizable DIY-style coffee drinks. Sustainability and ethically sourced coffee are increasingly important, and cafes are offering more innovative

## Try with actual BD Agents

What it needs to do is -> Select tool calls. The agent can either call ask_da, do search_and_summarise, advice or answer.



In [None]:
BD_SYSTEM_PROMPT = """You are a helpful business development. You are very good at deciding what to do to improve the business based on user's queries or answer the user's questions.
Your choice of actions always involve 4 things: 
1. Search and summarize web content to gather information
2. Gather information from business internal data
3. If the information is enough and the user asks for recommendation, give the recommendation based on the information
4. If the information is enough and the user asks for direct answer, give the direct answer based on the information.
"""

BD_INSTRUCTION_PROMPT = """Take a deep breath and think step-by-step. Think in gradually increasing complexity.

You are given a schema context, a business profile and a question or query posed by user under the tag <schema_context>, <business_profile> and <question>.
Your task is to decide what to do with the current information and decide what to do next using the provided tools.
First, you need to decide whether you should retrieve more information. If so, you need to decide whether you should search and summarize web content or gather information from business internal data.
Otherwise, you need to decide whether you should give the recommendation based on the information or give the direct answer based on the information. If the user asks for question with direct answer such as 'How much sales were generated last year?', you need to give the direct answer based on the information.
However, if the user asks for recommendation such as 'I want to add new menu to my cafe', you need to give the recommendation based on the information.

If you do this task well, I will tip you 200 US Dollars"""

In [27]:
from langchain_core.prompts import ChatPromptTemplate
from datetime import datetime

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful customer support assistant for Swiss Airlines. "
            " Use the provided tools to search for flights, company policies, and other information to assist the user's queries. "
            " When searching, be persistent. Expand your query bounds if the first search returns no results. "
            " If a search comes up empty, expand your search before giving up."
            "\n\nCurrent user:\n<User>\n{user_info}\n</User>"
            "\nCurrent time: {time}.",
        ),
        ("placeholder", "{messages}"),
    ]
).partial(time=datetime.now)

In [33]:
messages = [("assistant", "dummy")]

primary_assistant_prompt.invoke({"messages": messages, "user_info": "dummy"}).messages

[SystemMessage(content="You are a helpful customer support assistant for Swiss Airlines.  Use the provided tools to search for flights, company policies, and other information to assist the user's queries.  When searching, be persistent. Expand your query bounds if the first search returns no results.  If a search comes up empty, expand your search before giving up.\n\nCurrent user:\n<User>\ndummy\n</User>\nCurrent time: 2025-04-25 03:09:15.602935.", additional_kwargs={}, response_metadata={}),
 AIMessage(content='dummy', additional_kwargs={}, response_metadata={})]