In [3]:
from openai import OpenAI
import os
from IPython.display import Markdown, display
import pandas as pd
from helper_cot import (
    # Helpers
    # Synthetic data
    get_competitor_pricing_data,
    get_promotions_data,
    get_sales_data,
    get_weather_data,
    print_in_box,
    display,
    display_responses,
    get_completion,
    get_completion_messages,
    call_weather_api
)

In [4]:
MODEL = "gpt-4o-mini"

client = OpenAI(
    base_url = "https://openai.vocareum.com/v1",
    api_key="")

In [5]:
sales_data = get_sales_data()
sales_df = pd.DataFrame(sales_data)

promotions_data = get_promotions_data()
promotions_df = pd.DataFrame(promotions_data)

weather_data = get_weather_data()
weather_df = pd.DataFrame(weather_data)

competitor_pricing_data = get_competitor_pricing_data()
competitor_pricing_df = pd.DataFrame(competitor_pricing_data)

In [7]:
# Let's work on our calculator function!
# TODO: Replace parts marked with a **********

import re

import ast
import operator


def safe_eval(expr):
    """
    Evaluate a mathematical expression safely.

    We normally don't want to use eval() because it can execute arbitrary code, unless we are in a
    properly sandboxed environment. This function is a safe alternative for evaluating mathematical
    expressions.
    """
    operators = {
        ast.Add: operator.add,
        ast.Sub: operator.sub,
        ast.Mult: operator.mul,
        ast.Div: operator.truediv,
        ast.USub: operator.neg,
    }

    def eval_node(node):
        if isinstance(node, ast.Constant):
            return node.value
        elif isinstance(node, ast.BinOp):
            return operators[type(node.op)](eval_node(node.left), eval_node(node.right))
        elif isinstance(node, ast.UnaryOp):
            return operators[type(node.op)](eval_node(node.operand))
        elif isinstance(node, ast.Expr):
            return eval_node(node.value)
        else:
            raise TypeError(f"Unsupported type: {type(node)}")

    result = eval_node(ast.parse(expr, mode="eval").body)

    if isinstance(result, float):
        return round(result, 2)
    elif isinstance(result, int):
        return result
    else:
        raise RuntimeError(f"Unsupported result type: {type(result)}")


def calculator(expression: str) -> float:
    """
    Evaluate a mathematical expression safely.
    """
    return float(safe_eval(expression))


assert (actual := calculator("10 + 10")) == 20.0, f" ❌ Expected 20.0, got {actual}"

In [6]:

react_system_prompt = """
You are a meticulous Retail Demand Analyst that can solve any TASK in a multi-step process using tool calls and reasoning.

## Instructions:
- You will use step-by-step reasoning by
    - THINKING the next steps to take to complete the task and what next tool call to take to get one step closer to the final answer
    - ACTING on the single next tool call to take
- You will always respond with a single THINK/ACT message of the following format:
    THINK:
    [Carry out any reasoning needed to solve the problem not requiring a tool call]
    [Conclusion about what next tool call to take based on what data is needed and what tools are available]
    ACT:
    [Tool to use and arguments]
- As soon as you know the final answer, call the `final_answer` tool in an `ACT` message.
- ALWAYS provide a tool call, after ACT:, else you will fail.

## Available Tools
* `calculator(expression: str)`: Perform an arithmetic calculation
    - Example:
        - Input: `ACT: calculator(expression="(10 + 20) / 2.0")`
        - Output: `OBSERVE: 15.0`
* `get_sales_data()`: Get the sales data
    - Example:
        - Input: `ACT: get_sales_data()`
        - Output: `OBSERVE: {"date": "2024-01-10", "product_id": "P001", "product_name": "Product 1", "quantity": 255, "revenue": 15547.35}`
* `call_weather_api(date: str)`: Get weather data for a specific date. Call this for the date of each spike.
    - Example:
        - Input: `ACT: call_weather_api(date="2024-01-10")`
        - Output: `OBSERVE: {"date": "2024-01-10", "weather": "Sunny", "temperature": 72}`

* `final_answer(amount_after_spike: str, causes: list[str], date: str, percentage_spike: str)`: Return the final answer
    - Example:
        - Input: `ACT: final_answer(amount_after_spike="32", causes=["Competitor X offering a 29 discount boosting category interest", ...], date="2020-06-12", percentage_spike="20.00%")`
        - Output: `OBSERVE: {"amount_after_spike": "32", "causes": ["Competitor X offering a 29 discount boosting category interest", ...], "date": "2020-06-12", "percentage_spike": "20.00%"}`

You will not use any other tools.

Example:

```
--USER MESSAGE--
TASK:
Respond to the query "What was the weather one week ago?". Today is 2024-01-17.

--ASSISTANT MESSAGE--
THINK:
* I need to calculate the date one week ago from 2024-01-17.
* If today is 2024-01-17, then 7 days ago is 2024-01-10.
* I can call the `call_weather_api` tool to get the weather data for 2024-01-10.
* After that, if I have the weather data, I can return the final answer using the `final_answer` tool.
* Tool call needed: Call the `call_weather_api` tool for 2024-01-10.
ACT:
call_weather_api(date="2024-01-10")

--USER MESSAGE--
OBSERVE:
{"date": "2024-01-10", "weather": "Sunny"}

--ASSISTANT MESSAGE--
THINK:
* I have the weather data for 2024-01-10.
* I can return the final answer using the `final_answer` tool.
* Tool call needed: Call the `final_answer` tool with the weather data.
ACT:
final_answer("The weather on 2024-01-10 was sunny.")

--USER MESSAGE--
OBSERVE:
The weather on 2024-01-10 was sunny.
```
"""

user_prompt_analyze = """
TASK: Find the single largest sales spike according to the percentage increase with a short explanation for it
based on factors such as weather.
"""

print(f"Sending prompt to {MODEL} model...")

messages = []
messages.append({"role": "system", "content": react_system_prompt})
messages.append({"role": "user", "content": user_prompt_analyze})

react_response = get_completion_messages(messages=messages, model=MODEL, client=client)

messages.append({"role": "assistant", "content": react_response})
print("Response received!\n")


for message in messages:
    if message["role"] == "system":
        continue
    print_in_box(message["content"], title=f"{message['role'].capitalize()}")

assert "ACT:" in messages[-1]["content"], (
    " ❌ No ACT message found in response. Looking for: \n\n ACT:"
)

Sending prompt to gpt-4o-mini model...
Response received!


╔═════════════════════════════════════════════[ User ]═════════════════════════════════════════════╗
║ TASK: Find the single largest sales spike according to the percentage increase with a short      ║
║ explanation for it                                                                               ║
║ based on factors such as weather.                                                                ║
╚══════════════════════════════════════════════════════════════════════════════════════════════════╝

╔══════════════════════════════════════════[ Assistant ]═══════════════════════════════════════════╗
║ THINK:                                                                                           ║
║ * To find the single largest sales spike, I need to obtain the sales data first.                 ║
║ * Once I have the sales data, I can analyze it to identify any significant percentage increases  ║
║ in sales.                   

In [8]:
def get_observation_message(response: str) -> str:
    """
    Take a THINK/ACT response, run the tool call, and return the observation message.

    Args:
        response (str): The THINK/ACT response.

    Returns:
        str: The observation message.

    Uses regular expressions to match the tool call and run the corresponding tool.

    If the response is invalid, return an error message as a string that the agent can understand.
    """
    from ast import literal_eval

    observation_message = None

    SALES_DATA_REGEX = r"ACT:\nget_sales_data\(\)"
    WEATHER_REGEX = r"ACT:\ncall_weather_api\(date=\"(.*)\"\)"
    CALCULATOR_REGEX = r"ACT:\ncalculator\(expression=\"(.*)\"\)"
    FINAL_ANSWER_REGEX = r"ACT:\nfinal_answer\(amount_after_spike=\"(.*)\", causes=(.*), date=\"(.*)\", percentage_spike=\"(.*)\"\)"

    # TOOL 1: get_sales_data
    if re.search(SALES_DATA_REGEX, response):
        sales_data = get_sales_data(products=["P005"])
        # filter sales data to Product 5
        sales_data = [
            item for item in sales_data if item["product_name"] == "Product 5"
        ]
        observation_message = f"OBSERVE:\n{sales_data}"

    # TOOL 2: call_weather_api
    elif re.search(WEATHER_REGEX, response):
        date = re.search(WEATHER_REGEX, response).groups()[0]
        weather_data = call_weather_api(date)
        observation_message = f"OBSERVE:\n{weather_data}"

    # TOOL 3: calculator
    elif re.search(CALCULATOR_REGEX, response):
        expression = re.search(CALCULATOR_REGEX, response).groups()[0]
        observation_message = f"OBSERVE:\n{calculator(expression)}"

    # TOOL 4: final_answer
    elif re.search(FINAL_ANSWER_REGEX, response):
        amount_after_spike, causes, date, percentage_spike = re.search(
            FINAL_ANSWER_REGEX,
            response,
        ).groups()
        causes = literal_eval(causes)
        observation_message = f"OBSERVE:\namount_after_spike: {amount_after_spike}\ndate: {date}\npercentage_spike: {percentage_spike}\ncauses: {causes}"

    # Error
    else:
        observation_message = "OBSERVE:\nInvalid tool call or tool not supported."

    return observation_message


# Test cases
assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT:
get_sales_data()
""")
) == (expected := "OBSERVE:\n" + str(get_sales_data(products=["P005"]))), (
    f"{actual} != {expected}"
)

assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT:
call_weather_api(date="2024-01-12")
""")
) == (expected := "OBSERVE:\n" + str(call_weather_api("2024-01-12"))), (
    f"{actual} != {expected}"
)

assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT:
final_answer(amount_after_spike="10", causes=["cause1", "cause2"], date="2024-01-12", percentage_spike="10%")
""")
) == (
    expected
    := "OBSERVE:\namount_after_spike: 10\ndate: 2024-01-12\npercentage_spike: 10%\ncauses: ['cause1', 'cause2']"
), f"{actual} != {expected}"

assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT:
calculator(expression="10 + 10")
""")
) == (expected := "OBSERVE:\n20.0"), f"{actual} != {expected}"

assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT:
invalid_tool()
""")
) == (expected := "OBSERVE:\nInvalid tool call or tool not supported."), (
    f"{actual} != {expected}"
)

assert (
    actual := get_observation_message("""
THINK:
[thinking here]
ACT_TYPO:
get_sales_data()
""")
) == (expected := "OBSERVE:\nInvalid tool call or tool not supported."), (
    f"{actual} != {expected}"
)

In [10]:
# Let's make the ReACT loop!
# TODO: Replace instances of ********** where specified

messages = []
messages.append({"role": "system", "content": react_system_prompt})
messages.append({"role": "user", "content": user_prompt_analyze})


for message in messages:
    if message["role"] == "system":
        continue
    print_in_box(message["content"], title=f"{message['role'].capitalize()}")

num_react_steps = 0

observation_message = None
while True:

    react_response = get_completion_messages(messages=messages, model=MODEL, client=client)
    observation_message = get_observation_message(react_response)

    messages.append({"role": "assistant", "content": react_response})
    
    print_in_box(
        react_response, title=f"Assistant (Think + Act). Step {num_react_steps + 1}"
    )

    messages.append({"role": "user", "content": observation_message})

    if "ACT:\nfinal_answer" in react_response:
        print_in_box(observation_message, title="FINAL ANSWER")
        break

    print_in_box(
        observation_message, title=f"User (Observe). Step {num_react_steps + 1}"
    )

    num_react_steps += 1
    if num_react_steps > 10:
        print("ERROR: Max number of React steps exceeded. Breaking.")
        break

assert "date: 2024-01-12" in observation_message, "ReACT Loop did not find the spike date"
assert "percentage_spike: 200" in observation_message, "ReACT Loop did not find the spike percentage increase"


╔═════════════════════════════════════════════[ User ]═════════════════════════════════════════════╗
║ TASK: Find the single largest sales spike according to the percentage increase with a short      ║
║ explanation for it                                                                               ║
║ based on factors such as weather.                                                                ║
╚══════════════════════════════════════════════════════════════════════════════════════════════════╝

╔═══════════════════════════════[ Assistant (Think + Act). Step 1 ]════════════════════════════════╗
║ THINK:                                                                                           ║
║ * To find the single largest sales spike, I need to access sales data to identify the sales      ║
║ quantities and their corresponding dates.                                                        ║
║ * I will also need to analyze the weather data for those dates to see if weather condit

In [11]:
# Let's add some more components to our CoT Prompt
# TODO: Replace parts marked with a **********

system_prompt_cot = """
You are a meticulous Retail Demand Analyst.
Your task is to analyze provided sales data and promotion schedules to identify and explain significant sales spikes for specific SKUs.

Think in steps.
"""

user_prompt_analyze = f"""
## INSTRUCTIONS:

Analyze the data provided below and hypothesize causes for any observed sales spikes.

Instructions:
* Find all sales spikes for each product
* For each product, identify the following:
    * Date of the sales spike
    * Amount of the sales spike and percentage increase
    * Possible causes of the sales spike according to the provided
        * sales data
        * promotion schedule
        * weather conditions
        * competitor pricing data
* Start with your analysis
* Conclude with the single largest spike according to the percentage increase with a short explanation for it.

--

## OUTPUT FORMAT:

STRUCTURED ANALYSIS:
[Structured Analysis]

LARGEST SPIKE:
```json
{{
    "date": "YYYY-MM-DD",
    "amount_before_increase": "X.XX",
    "amount_after_increase": "X.XX",
    "percentage_increase": "X.XX%",
    "causes": [
        "Cause 1",
        "Cause 2",
        "Cause 3"
    ]
}}
```

---

## CONTEXT

Sales Data:
{sales_data}

Promotions Calendar:
{promotions_data}

Weather Data:
{weather_data}

Competitor Pricing Data:
{competitor_pricing_data}
"""

print(f"Sending prompt to {MODEL} model...")
cot_response = get_completion(system_prompt=system_prompt_cot, user_prompt=user_prompt_analyze,
                              model=MODEL, client=client)
print("Response received!\n")


def parse_analysis_and_largest_spike(response):
    import json

    if "```json" not in response:
        print()
        print(response)
        raise RuntimeError(
            " ❌ No LARGEST SPIKE found in response. Looking for: \n\n```json"
        )

    analysis = response.split("```json")[0].strip()
    json_str = response.split("```json")[1].split("```")[0].strip()
    return analysis, json.loads(json_str)


analysis, largest_spike = parse_analysis_and_largest_spike(cot_response)

display(Markdown(analysis))

Sending prompt to gpt-4o-mini model...
Response received!



### STRUCTURED ANALYSIS:

#### Product 1:
- **Sales Data Summary**:
  - 2024-01-10: 255 units
  - 2024-01-11: 235 units
  - 2024-01-12: 310 units
  - 2024-01-13: 302 units
  - 2024-01-14: 305 units
  - 2024-01-15: 301 units
  - 2024-01-16: 226 units

- **Sales Spikes**:
  - **Date**: 2024-01-12
    - **Amount Before Increase**: 235
    - **Amount After Increase**: 310
    - **Percentage Increase**: 31.91%
    - **Possible Causes**:
      - Heavy Rain and Flood Warning may have led consumers to stay indoors and shop online.
      - No promotions directly affecting this product.
      - Competitor pricing was mixed; however, competitors had lower prices that day.

#### Product 2:
- **Sales Data Summary**:
  - 2024-01-10: 65 units
  - 2024-01-11: 86 units
  - 2024-01-12: 80 units
  - 2024-01-13: 68 units
  - 2024-01-14: 84 units
  - 2024-01-15: 73 units
  - 2024-01-16: 80 units

- **Sales Spikes**:
  - **Date**: 2024-01-11
    - **Amount Before Increase**: 65
    - **Amount After Increase**: 86
    - **Percentage Increase**: 32.31%
    - **Possible Causes**:
      - Competitor B had a significant discount which likely drew more attention to the product.
      - A lack of promotion likely means consumers were motivated by competitor sales.

#### Product 3:
- **Sales Data Summary**:
  - 2024-01-10: 90 units
  - 2024-01-11: 79 units
  - 2024-01-12: 108 units
  - 2024-01-13: 96 units
  - 2024-01-14: 99 units
  - 2024-01-15: 89 units
  - 2024-01-16: 83 units

- **Sales Spikes**:
  - **Date**: 2024-01-12
    - **Amount Before Increase**: 79
    - **Amount After Increase**: 108
    - **Percentage Increase**: 36.71%
    - **Possible Causes**:
      - Similar to Product 1, the heavy rain may have led to more online shopping.
      - No promotions affecting this product, but competitor discounts were in play.

#### Product 4:
- **Sales Data Summary**:
  - 2024-01-10: 171 units
  - 2024-01-11: 145 units
  - 2024-01-12: 143 units
  - 2024-01-13: 130 units
  - 2024-01-14: 167 units
  - 2024-01-15: 126 units
  - 2024-01-16: 175 units

- **Sales Spikes**:
  - **Date**: 2024-01-14
    - **Amount Before Increase**: 130
    - **Amount After Increase**: 167
    - **Percentage Increase**: 28.46%
    - **Possible Causes**:
      - Clear weather conditions may have encouraged more shoppers in-store.
      - No promotions or competitive pricing influenced this spike.

#### Product 5:
- **Sales Data Summary**:
  - 2024-01-10: 96 units
  - 2024-01-11: 114 units
  - 2024-01-12: 342 units
  - 2024-01-13: 103 units
  - 2024-01-14: 104 units
  - 2024-01-15: 100 units
  - 2024-01-16: 125 units

- **Sales Spikes**:
  - **Date**: 2024-01-12
    - **Amount Before Increase**: 114
    - **Amount After Increase**: 342
    - **Percentage Increase**: 200.00%
    - **Possible Causes**:
      - Heavy rain likely led to an increase in online shopping.
      - Strong competitor pricing on similar products might have driven interest.
      - No promotion directly affecting this product.

### LARGEST SPIKE: