In [None]:
# Re-import necessary libraries as execution state was reset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate a synthetic dataset of customer transactions
np.random.seed(42)
num_customers = 200
num_transactions = 1000

# Generate random customer IDs
customer_ids = [f"CUST{str(i).zfill(4)}" for i in range(1, num_customers + 1)]

# Generate random transaction data
data = []
for _ in range(num_transactions):
    customer_id = np.random.choice(customer_ids)
    transaction_date = pd.Timestamp("2024-01-01") + pd.to_timedelta(np.random.randint(0, 90), unit='D')
    transaction_amount = round(np.random.uniform(5, 500), 2)
    data.append([customer_id, transaction_date, transaction_amount])

# Create DataFrame
df = pd.DataFrame(data, columns=["Customer_ID", "Transaction_Date", "Transaction_Amount"])

# Ensure data is sorted
df = df.sort_values(by=["Customer_ID", "Transaction_Date"])

# 1. Classify High-Spending vs Occasional Customers
customer_spending = df.groupby("Customer_ID")["Transaction_Amount"].sum().reset_index()
customer_spending["Customer_Type"] = np.where(customer_spending["Transaction_Amount"] > 2000, "High-Spender", "Occasional")

# 2. Identify Customer Buying Patterns (Avg Transactions per Month)
df["Month"] = df["Transaction_Date"].dt.to_period("M")
customer_monthly_purchases = df.groupby(["Customer_ID", "Month"]).size().reset_index(name="Transactions_Per_Month")
avg_transactions_per_month = customer_monthly_purchases.groupby("Customer_ID")["Transactions_Per_Month"].mean().reset_index()
avg_transactions_per_month.columns = ["Customer_ID", "Avg_Transactions_Per_Month"]

# 3. Find VIP Customers for Marketing Campaigns (Top 10% Spenders)
vip_threshold = customer_spending["Transaction_Amount"].quantile(0.90)
customer_spending["VIP_Customer"] = np.where(customer_spending["Transaction_Amount"] >= vip_threshold, 1, 0)

# Merge the classifications into the main dataset
customer_analysis = customer_spending.merge(avg_transactions_per_month, on="Customer_ID")

# Display the generated dataset with customer insights
import ace_tools as tools
tools.display_dataframe_to_user(name="Customer Transaction Insights", dataframe=customer_analysis)

# Plot Spending Distribution
plt.figure(figsize=(10, 5))
plt.hist(customer_spending["Transaction_Amount"], bins=20, color="skyblue", edgecolor="black")
plt.axvline(vip_threshold, color='red', linestyle='dashed', linewidth=2, label="VIP Threshold")
plt.xlabel("Total Transaction Amount ($)")
plt.ylabel("Number of Customers")
plt.title("Customer Spending Distribution")
plt.legend()
plt.show()
