In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

n_customers = 10000
customer_ids = [f"CUST{i:04d}" for i in range(n_customers)]

# Base features
recency_days = np.random.exponential(scale=100, size=n_customers).astype(int)
frequency = np.random.poisson(lam=5, size=n_customers)
monetary = np.round(np.random.gamma(shape=2, scale=150, size=n_customers), 2)
last_purchase_dates = [datetime(2025, 7, 28) - timedelta(days=int(r)) for r in recency_days]

# Add distractor (uninformative) features
region_code = np.random.choice(['North', 'South', 'East', 'West'], size=n_customers)
channel = np.random.choice(['Mobile', 'Web', 'InStore'], size=n_customers)
preferred_discount = np.random.uniform(0, 0.5, size=n_customers)
loyalty_score = np.clip(np.random.normal(loc=0.5, scale=0.2, size=n_customers), 0, 1)

# Nonlinear interaction + noise for target probability
z = (
    0.03 * recency_days
    - 0.4 * np.sqrt(frequency + 1)
    - 0.002 * monetary
    + 2 * (preferred_discount ** 1.5)
    - 1.5 * loyalty_score
    + np.random.normal(0, 1.5, size=n_customers)  # noise
)

# Convert to probabilities using sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

p_dormant = sigmoid(z)

# Final binary target using thresholded probability
is_dormant = (np.random.rand(n_customers) < p_dormant).astype(int)

# Assemble dataframe
df = pd.DataFrame({
    "customer_id": customer_ids,
    "last_purchase_date": last_purchase_dates,
    "total_orders": frequency,
    "total_spent": monetary,
    "region": region_code,
    "channel": channel,
    "preferred_discount": preferred_discount,
    "loyalty_score": loyalty_score,
    "recency_days": recency_days,
    "is_dormant": is_dormant
})

df.to_csv("customer_data.csv", index=False)
df.head()


Unnamed: 0,customer_id,last_purchase_date,total_orders,total_spent,region,channel,preferred_discount,loyalty_score,recency_days,is_dormant
0,CUST0000,2025-06-12,4,318.04,North,Mobile,0.14566,0.03772,46,0
1,CUST0001,2024-09-30,6,181.54,East,InStore,0.311224,0.544539,301,1
2,CUST0002,2025-03-19,3,314.99,South,Web,0.166893,0.537934,131,1
3,CUST0003,2025-04-28,1,871.96,North,Mobile,0.34971,0.571506,91,0
4,CUST0004,2025-07-12,2,180.3,South,Web,0.176844,0.637859,16,0
