Chatbot for non-technical users for easy and quick interpretaion of Sales forecasts

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# import openai
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import anthropic

# Initialize Anthropic client
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

In [None]:
# # Custom Tool: Aggregate tomorrow’s quantities

def get_tomorrow_quantities(df, date_col="DATE", qty_col="Predicted_Next_Purchase_Volume", product_col="PRODUCT_ID"):
    """
    Filters the DataFrame for rows with date = 2024-12-31 (tomorrow),
    then sums the Predicted_Next_Purchase_Volume per PRODUCT_ID. Returns a multi-line string.
    """
    # Ensure the date column is datetime
    df[date_col] = pd.to_datetime(df[date_col])
    
    # Considering “today” as December 30, 2024 (for testing purposes), so tomorrow = 2024-12-31
    today = datetime(2024, 12, 30).date()
    tomorrow = today + timedelta(days=1)
    print("Tomorrow: ", tomorrow)
    
    # Filter rows where date == tomorrow
    df_tomorrow = df[df[date_col].dt.date == tomorrow]
    
    if df_tomorrow.empty:
        return f"No sales data found for {tomorrow}."
    
    
    # Aggregate (sum) quantities by product
    summary = (
        df_tomorrow
        .groupby(product_col)[qty_col]
        .sum()
        .reset_index()
    )
    
    print("Aggregated df shape:", summary.shape)

    # Build a multi-line string like: "Product A: 120 units\nProduct B: 75 units"
    lines = [
        f"Product {row[product_col]}: {row[qty_col]} units"
        for _, row in summary.iterrows()
    ]
    return "\n".join(lines)

In [None]:
# RAG Agent Function Definition
# Build prompt & call Claude
def rag_agent(csv_path):
    """
    Loads sales CSV, aggregates quantities for 2024-12-31, and asks Claude
    to generate concise stock recommendations.
    """
    df = pd.read_csv(csv_path)
    print("Loaded DF shape:", df.shape)
    
    agg_info = get_tomorrow_quantities(
        df,
        date_col="DATE",            
        qty_col="Predicted_Next_Purchase_Volume",
        product_col="PRODUCT_ID"
    )

    human_text = (
        "Below is the aggregated sales data for tomorrow (2024-12-31):\n"
        f"{agg_info}\n\n"
        "User question: Which product quantities should be stocked tomorrow?\n"
        "Preserve the same line-by-line format in your answer. Round up all quantities to the nearest integer.\n"
    )

    response = client.messages.create(
        model="claude-3-haiku-20240307", 
        system="You are a retail demand forecasting assistant.",
        messages=[{"role": "user", "content": human_text}],
        max_tokens=500
    )

    # Extract text from response (Claude returns a list of TextBlock objects)
    blocks = response.content
    if blocks and hasattr(blocks[0], "text"):
        return blocks[0].text.strip()
    else:
        return "Error: Unable to parse Claude’s response."

In [7]:
# Read forecasts
if __name__ == "__main__":
    csv_path = "/Users/cypacyt/Documents/IAIAProject/data/train_df.csv"
    result = rag_agent(csv_path)
    print("Agent Response:\n", result)


Loaded DF shape: (4627, 44)
Tomorrow:  2024-12-31
Aggregated df shape: (18, 2)
Agent Response:
 Based on the provided sales data, here are the product quantities that should be stocked tomorrow:

Product COCA COLA_250 ML: 10 units
Product DASANI WATER_1000 ML: 10 units
Product DASANI WATER_2250 ML: 17 units
Product DASANI WATER_250 ML: 14 units
Product DASANI WATER_300 ML: 19 units
Product DASANI WATER_500 ML: 8 units
Product DASANI WATER_750 ML: 9 units
Product DOVE BODY WASH_100 ML: 19 units
Product DOVE BODY WASH_1000 ML: 49 units
Product KELLOGS CORN FLAKES_500 G: 23 units
Product MOUNTAIN DEW_200 ML: 8 units
Product MOUNTAIN DEW_250 ML: 9 units
Product MOUNTAIN DEW_300 ML: 10 units
Product MOUNTAIN DEW_750 ML: 12 units
Product SPRITE_200 ML: 8 units
Product SPRITE_2250 ML: 13 units
Product SPRITE_300 ML: 9 units
Product SPRITE_750 ML: 11 units
