In [3]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import sqlite3
from datetime import datetime, timedelta

In [4]:
fake = Faker()
np.random.seed(42)
random.seed(42)

In [5]:
# --- Customers Table ---
n_customers = 500
customer_ids = list(range(1, n_customers + 1))

customers = pd.DataFrame({
    'customer_id': customer_ids,
    'name': [fake.name() for _ in customer_ids],
    'signup_date': [fake.date_between(start_date='-2y', end_date='-1d') for _ in customer_ids],
    'plan_type': np.random.choice(['Free', 'Basic', 'Premium'], size=n_customers, p=[0.2, 0.5, 0.3]),
    'country': np.random.choice(['US', 'DE', 'IN', 'BR', 'UK'], size=n_customers)
})

In [6]:
# --- Activity Logs ---
activity_logs = []
for cid in customer_ids:
    for _ in range(random.randint(5, 30)):
        activity_logs.append({
            'log_id': len(activity_logs) + 1,
            'customer_id': cid,
            'event_type': random.choice(['login', 'stream', 'browse', 'cancel']),
            'event_date': fake.date_between(start_date='-1y', end_date='today')
        })
activity_logs = pd.DataFrame(activity_logs)

In [7]:
# --- Payments ---
payments = []
for cid in customer_ids:
    for _ in range(random.randint(1, 6)):
        success = random.choice([True]*9 + [False])
        payments.append({
            'payment_id': len(payments) + 1,
            'customer_id': cid,
            'payment_date': fake.date_between(start_date='-1y', end_date='today'),
            'amount': random.choice([10, 15, 20, 25]),
            'success': success
        })
payments = pd.DataFrame(payments)


In [8]:
# --- Churned Customers ---
churned_customers = customers.sample(frac=0.3, random_state=42)[['customer_id']]
churned_customers['churn_date'] = churned_customers['customer_id'].apply(lambda _: fake.date_between(start_date='-6m', end_date='today'))

In [9]:
# Save to SQLite for SQL querying
conn = sqlite3.connect('customer_churn.db')
customers.to_sql('customers', conn, index=False, if_exists='replace')
activity_logs.to_sql('activity_logs', conn, index=False, if_exists='replace')
payments.to_sql('payments', conn, index=False, if_exists='replace')
churned_customers.to_sql('churned_customers', conn, index=False, if_exists='replace')

conn.commit()
conn.close()

print("✅ Synthetic data generated and saved to 'customer_churn.db'")

✅ Synthetic data generated and saved to 'customer_churn.db'
