In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from langchain.sql_database import SQLDatabase
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import AgentExecutor
import google.generativeai as genai
from google.oauth2 import service_account
import google.auth
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType
from langchain_google_genai import ChatGoogleGenerativeAI


In [3]:
GOOGLE_CREDENTIALS_PATH = "euphoric-hull-441616-m7-3436c5674cc8.json"
credentials = service_account.Credentials.from_service_account_file(
    GOOGLE_CREDENTIALS_PATH,
    scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_CREDENTIALS_PATH
creds, project = google.auth.default()
print(f"Authenticated with Google Cloud Project: {project}")

GOOGLE_API_KEY = "AIzaSyBelGeSo459AIM9On_ko9YtyMwIbjO1hUE"
genai.configure(api_key=GOOGLE_API_KEY)

Authenticated with Google Cloud Project: euphoric-hull-441616-m7


In [5]:
DATABASE_URI = "postgresql://postgres:postgre@localhost:5432/sales_db"
engine = create_engine(DATABASE_URI)
db = SQLDatabase.from_uri(DATABASE_URI)

In [7]:
llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash", temperature=0.3)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [12]:
def summarize_segment_profiles(_: str) -> str:
    df = pd.read_sql('SELECT * FROM segment_profiles ORDER BY "Cluster"', engine)
    lines = []
    for _, r in df.iterrows():
        lines.append(
            f"Cluster {int(r.Cluster)} ({r.Customer_Type}): "
            f"{r.Region} region, Recency: {r.Recency:.1f} days, Frequency: {r.Frequency}, "
            f"Monetary: €{r.Monetary:.2f}, AOV: €{r.AOV:.2f}, "
            f"Avg Discount: {r.Avg_Discount:.2f}, Product Variety: {r.Product_Variety}, "
            f"Avg Purchase Interval: {r.Avg_Purchase_Interval:.1f} days, "
            f"Top Category: {r.Top_Category}."
        )
    return "\n".join(lines)

summarize_profiles_tool = Tool(
    name="summarize_segment_profiles",
    func=summarize_segment_profiles,
    description="Summarizes customer segments using updated RFM and behavioral metrics."
)

In [14]:
def best_selling(tool_input: str) -> str:
    region = tool_input.strip() or None
    df = pd.read_sql("SELECT * FROM forecast_aggregates", engine)
    if region:
        df = df[df.Region.str.lower() == region.lower()]
        if df.empty:
            return f"No forecast data for region '{region}'."
    best = df.loc[df.predicted_quantity.idxmax()]
    target = f"in {region}" if region else "overall"
    return (
        f"Best selling product {target}: "
        f"{best.Product} with {best.predicted_quantity:.0f} units "
        f"(€{best.predicted_revenue:,.2f})."
    )

best_selling_tool = Tool(
    name="best_selling",
    func=best_selling,
    description="Returns the best selling product and forecasted quantity/revenue for a given region."
)

In [28]:
def calculate_revenue(_: str) -> str:
    df = pd.read_sql("SELECT * FROM sales", engine)
    total_revenue = df["Total_Price"].sum()
    top_regions = df.groupby("Region")["Total_Price"].sum().nlargest(3)
    top_products = df.groupby("Product")["Total_Price"].sum().nlargest(3)
    return (
        f"Total revenue: €{total_revenue:,.2f}\n"
        f"Top regions by revenue:\n{top_regions.to_string()}\n\n"
        f"Top products by revenue:\n{top_products.to_string()}"
    )

revenue_tool = Tool(
    name="calculate_revenue",
    func=calculate_revenue,
    description="Calculates total revenue and shows top products and regions by earnings."
)

In [30]:
def customer_insight(customer_id: str) -> str:
    try:
        customer_id = int(customer_id)
    except ValueError:
        return "Invalid customer ID."
    df = pd.read_sql('SELECT * FROM customer_segmentation_rfm', engine)
    row = df[df.Customer_ID == customer_id]
    if row.empty:
        return "Customer not found."
    r = row.iloc[0]
    return (
        f"Customer {customer_id} is in Segment {r.Cluster} ({r.Segment_Label}).\n"
        f"Type: {r.Customer_Type}, Region: {r.Region}, Churn Flag: {r.Churn_Flag}\n"
        f"Recency: {r.Recency} days, Frequency: {r.Frequency}, Monetary: €{r.Monetary:.2f}\n"
        f"AOV: €{r.AOV:.2f}, Avg Discount: {r.Avg_Discount:.2f}, "
        f"Product Variety: {r.Product_Variety}, Avg Purchase Interval: {r.Avg_Purchase_Interval:.1f} days.\n"
        f"Top Category: {r.Top_Category}"
    )

customer_insight_tool = Tool(
    name="customer_insight",
    func=customer_insight,
    description="Provides detailed profile and RFM info for a given customer ID."
)

In [32]:
def smart_recommendations(_: str) -> str:
    df = pd.read_sql('SELECT * FROM segment_profiles', engine)
    lines = []
    for _, r in df.iterrows():
        if r.Recency > 30:
            action = "Send re-engagement emails."
        elif r.AOV > 500:
            action = "Promote high-end offers."
        elif r.Product_Variety < 3:
            action = "Offer bundle deals to encourage cross-sell."
        else:
            action = "Maintain engagement with loyalty rewards."
        lines.append(
            f"Cluster {int(r.Cluster)} ({r.Customer_Type}) in {r.Region}: {action}"
        )
    return "\n".join(lines)

recommendation_tool = Tool(
    name="smart_recommendations",
    func=smart_recommendations,
    description="Gives marketing recommendations per segment based on recency, AOV, and product variety."
)

In [34]:
prefix = """
You are SalesGPT, a smart assistant trained on sales, customer segmentation, forecasting, and product analytics.
Use the provided tools and database knowledge to generate clear, actionable, and accurate insights.
"""

tools = [
    *toolkit.get_tools(),
    summarize_profiles_tool,
    best_selling_tool,
    revenue_tool,
    customer_insight_tool,
    recommendation_tool
]

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False,
    agent_kwargs={"prefix": prefix}
)

In [40]:

while True:
    query = input("You: ")
    if query.lower() in ("exit", "quit"):
        print("Goodbye 👋")
        break
    answer = agent.run(query)
    print("Agent:", answer)

You:  Give me marketing suggestions based on the segment profiles


Agent: Cluster 0 (B2C) in Sachsen: Maintain engagement with loyalty rewards.
Cluster 1 (B2C) in Bremen: Maintain engagement with loyalty rewards.
Cluster 2 (B2C) in Nordrhein-Westfalen: Maintain engagement with loyalty rewards.
Cluster 0 (B2B) in Bremen: Promote high-end offers.
Cluster 1 (B2B) in Bremen: Promote high-end offers.
Cluster 2 (B2B) in Hamburg: Promote high-end offers.


You:  Which segment should we target with a discount campaign?


Agent: The B2C segments, particularly those in Sachsen, Bremen, and Nordrhein-Westfalen, should be targeted with a discount campaign.


You:  desciribe each segment and give it an appropriate name 


Agent: *   **Sachsen's Beverage Enthusiasts (Cluster 0 B2C):** Sachsen region, Recency: 2.0 days, Frequency: 208.0, Monetary: €30794.31, AOV: €149.15, Avg Discount: 0.00, Product Variety: 47.0, Avg Purchase Interval: 1.2 days, Top Category: Alcoholic Beverages.
*   **Bremen's Hydration Partners (Cluster 0 B2B):** Bremen region, Recency: 2.0 days, Frequency: 206.0, Monetary: €161464.59, AOV: €780.47, Avg Discount: 0.08, Product Variety: 47.0, Avg Purchase Interval: 1.2 days, Top Category: Water.
*   **Bremen's Everyday Drinkers (Cluster 1 B2C):** Bremen region, Recency: 2.0 days, Frequency: 192.0, Monetary: €25285.41, AOV: €131.09, Avg Discount: 0.00, Product Variety: 47.0, Avg Purchase Interval: 1.3 days, Top Category: Water.
*   **Bremen's Juice Suppliers (Cluster 1 B2B):** Bremen region, Recency: 2.0 days, Frequency: 187.0, Monetary: €149599.47, AOV: €801.93, Avg Discount: 0.08, Product Variety: 47.0, Avg Purchase Interval: 1.3 days, Top Category: Juices.
*   **NRW's Juice Lovers (Cl

KeyboardInterrupt: Interrupted by user