In [1]:
import sys
import pprint

sys.path.append('../')

In [5]:
from langchain_tavily import TavilySearch
from langchain.chat_models import init_chat_model
from langgraph.graph import StateGraph
from config import settings
from src import prompts
from typing import TypedDict
from pydantic import BaseModel
import pandas as pd
import ast
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage, ChatMessage



# define model to use
model = init_chat_model(
    "gemini-2.0-flash",
    model_provider="google_genai",
    temperature=0,
)


# define tool(s) that are available to use
tavily = TavilySearch(
    max_results=settings.MAX_SEARCH_RESULTS_PER_QUESTION,
    topic="general",
)


# define main state object
class AgentState(TypedDict):
    initial_request: str
    search_queries: list[str]
    search_results: list[str]
    df:pd.DataFrame
    insight_summary: str
    # TODO: expand state object as needed for other nodes


# define structured output formats
class SearchQueries(BaseModel):
    queries: list[str]


# define graph nodes
def query_generator_node(state: AgentState):
    queries = model.with_structured_output(SearchQueries).invoke([
        SystemMessage(content=prompts.QUERY_PLANNER_PROMPT),
        HumanMessage(content=state['initial_request'])
    ])
    return {'search_queries': queries.queries}


def search_executor_node(state: AgentState):
    results = []
    for q in state['search_queries']:
        response = tavily.invoke({'query': q})
        for r in response['results']:
            results.append(r['content'])
    return {'search_results': results} 

def data_summarizer_node(state: AgentState):
    df = state['df']
    initial_request = state['initial_request']

    # Step 1: Extract category values
    all_categories = df["Category"].dropna().unique().tolist()
    all_subcategories = df["Sub-Category"].dropna().unique().tolist()

    # Step 2: Use LLM to map prompt to subcategories
    category_prompt = ChatPromptTemplate.from_template("""
You are helping analyze a sales dataset. The dataset includes:

Main Categories: {categories}
Sub-Categories: {subcategories}

Given this campaign prompt:
"{initial_request}"

Return a dedupped Python list of relevant Sub-Categories that match the prompt.
Example: ["Binders", "Art", "Appliances"]
""")

    messages = category_prompt.format_messages(
        categories=all_categories,
        subcategories=all_subcategories,
        initial_request=initial_request
    )
    response = model.invoke(messages).content

    try:
        matched_subcategories = ast.literal_eval(response)
        if not isinstance(matched_subcategories, list):
            raise ValueError("Parsed response is not a list.")
    except Exception:
        return {"insights_summary": f"LLM response could not be parsed:\n{response}"}

    if not matched_subcategories:
        return {"insights_summary": "No relevant sub-categories found by the LLM."}

    # Step 3: Filter data
    df_filtered = df[df["Sub-Category"].isin(matched_subcategories)].copy()
    df_filtered["Order_Date"] = pd.to_datetime(df_filtered["Order_Date"])
    df_filtered["year"] = df_filtered["Order_Date"].dt.year

    # Step 4: Identify the latest two years in the dataset
    latest_years = sorted(df_filtered["year"].dropna().unique())[-2:]
    if len(latest_years) < 2:
        return {"insights_summary": "Not enough years of data for YoY comparison."}

    year_new, year_old = latest_years[1], latest_years[0]

    # Step 5: YoY aggregation
    grouped = df_filtered.groupby(["Sub-Category", "Region", "year"]).agg({
        "Sales": "sum",
        "Profit": "sum"
    }).reset_index()

    df_new = grouped[grouped["year"] == year_new].set_index(["Sub-Category", "Region"])
    df_old = grouped[grouped["year"] == year_old].set_index(["Sub-Category", "Region"])

    yoy = df_new.join(df_old, lsuffix=f"_{year_new}", rsuffix=f"_{year_old}", how="inner")
    yoy["sales_yoy"] = ((yoy[f"Sales_{year_new}"] - yoy[f"Sales_{year_old}"]) / yoy[f"Sales_{year_old}"]) * 100
    yoy["profit_yoy"] = ((yoy[f"Profit_{year_new}"] - yoy[f"Profit_{year_old}"]) / yoy[f"Profit_{year_old}"]) * 100
    yoy.reset_index(inplace=True)

    # Step 6: Generate insight summary with LLM
    summary_prompt = ChatPromptTemplate.from_template("""
You are a BI analyst. Summarize the following YoY data in clear bullet points.
Focus on major % increases or decreases in sales or profit by sub-category and region.

Data:
{yoy_data}
""")
    summary_input = summary_prompt.format_messages(yoy_data=yoy.to_csv(index=False))
    insight_summary = model.invoke(summary_input).content

    return {"insights_summary": insight_summary}


def insight_aggregator_node(state: AgentState):
    search_results = state['search_results']
    insight_summary = state['insights_summary']
    initial_request = state['initial_request']

    prompt = f"""
You're a marketing analyst. Create a one-page summary report combining web insights and sales data insight summary for a {initial_request}.
Focus on trends, regional patterns, and recommended actions.

Web insights:
{chr(10).join(f"- {ws}" for ws in search_results)}

Sales data summaries:
- {insight_summary}

Return output as:
1. Executive Summary
2. Key Insights
3. Recommended Actions
"""
    messages = [HumanMessage(content=prompt)]
    response = model.invoke(messages)

    return {"final_report": response.content}



# build graph
graph_builder = StateGraph(AgentState)

graph_builder.add_node('query_generator', query_generator_node)
graph_builder.add_node('search_executor', search_executor_node)
graph_builder.add_node('data_summarizer', data_summarizer_node)
graph_builder.add_node('insight_aggregator', insight_aggregator_node)

graph_builder.add_edge('query_generator', 'search_executor')
graph_builder.add_edge('search_executor', 'data_summarizer')
graph_builder.add_edge('data_summarizer', 'insight_aggregator')

graph_builder.set_entry_point('query_generator')
graph = graph_builder.compile()

#execute
if __name__ == "__main__":

    df = pd.read_excel(r'..\data\Superstore.xlsx')

    initial_state = {
        "initial_request": "plan a campaign on Furniture in northeast America",
        "df": df
    }

    final_state = graph.invoke(initial_state)
    print(final_state["final_report"])

AttributeError: partially initialized module 'pandas' from 'c:\Users\ChristopherEdwards\OneDrive - Blend 360\Documents\projects\other\marketing-campaign-agent\venv\Lib\site-packages\pandas\__init__.py' has no attribute 'core' (most likely due to a circular import)

In [7]:
import pandas as pd
df = pd.read_excel('..\data\Superstore.xlsx')


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\SherryShe

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\SherryShe

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\SherryShe

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\SherryShen\anaconda3\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\SherryShen\anaconda3\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\SherryShe

AttributeError: _ARRAY_API not found