In [6]:
import pandas as pd
import numpy as np
from faker import Faker
# Generate Customers

fake = Faker()
np.random.seed(42)

n_customers = 10000

# Create customer DataFrame

customers = pd.DataFrame({
    "customer_id": range(1, n_customers + 1),
    "signup_date": pd.to_datetime(
        np.random.choice(
            pd.date_range("2022-01-01", "2024-01-01"),
            n_customers
        )
    ),
    "country": np.random.choice(
        ["USA", "UK", "Germany", "France", "Canada"],
        n_customers
    ),
    "acquisition_channel": np.random.choice(
        ["Google Ads", "Referral", "LinkedIn", "Organic"],
        n_customers,
        p=[0.4, 0.2, 0.2, 0.2]
    )
})
# Save to CSV
customers.to_csv("churn-retention-project/data/customers.csv", index=False)
customers.head()


Unnamed: 0,customer_id,signup_date,country,acquisition_channel
0,1,2022-04-13,Canada,Google Ads
1,2,2023-03-12,France,Organic
2,3,2022-09-28,USA,LinkedIn
3,4,2022-04-17,France,Organic
4,5,2022-03-13,Canada,Google Ads


In [None]:
# Subscription Plans
plans = {
    "Basic": 29.99,
    "Pro": 59.99,
    "Premium": 99.99
}
# Generate Subscriptions
subscriptions = []
# For each customer, assign a plan and simulate churn
for cid in customers["customer_id"]:
    plan = np.random.choice(list(plans.keys()), p=[0.5, 0.3, 0.2])
    start_date = customers.loc[customers.customer_id == cid, "signup_date"].values[0]

    # Simulate churn: 25% chance of churn within 6 months to 20 months
    
    churned = np.random.rand() < 0.25  # 25% churn
    churn_date = (
        start_date + pd.to_timedelta(np.random.randint(90, 600), unit="D")
        if churned else pd.NaT
    )
    # Append to subscriptions list
    subscriptions.append([
        cid, plan, plans[plan], start_date, churn_date, churned
    ])

# Create Subscriptions DataFrame

subscriptions = pd.DataFrame(
    subscriptions,
    columns=[
        "customer_id", "plan_type", "monthly_fee",
        "start_date", "churn_date", "churned"
    ]
)
# Save to CSV
subscriptions.to_csv("churn-retention-project/data/subscriptions.csv", index=False)
subscriptions.head()


Unnamed: 0,customer_id,plan_type,monthly_fee,start_date,churn_date,churned
0,1,Basic,29.99,2022-04-13,NaT,False
1,2,Basic,29.99,2023-03-12,NaT,False
2,3,Pro,59.99,2022-09-28,NaT,False
3,4,Basic,29.99,2022-04-17,NaT,False
4,5,Premium,99.99,2022-03-13,NaT,False
