# 01 — Synthetic Data Generation

This notebook generates a modern **subscription churn** synthetic dataset and saves it to `data/synthetic/`.

In [1]:
# Notebook setup
import os, sys
from pathlib import Path

# Add project root to PYTHONPATH
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print('Project root:', PROJECT_ROOT)


Project root: /mnt/data/churn_fix/churn-reduction-end-to-end


In [2]:
import pandas as pd
from src.config import Config
from src.data_generation import generate_synthetic_subscription_churn

cfg = Config()
df = generate_synthetic_subscription_churn(cfg)
df.head()

Unnamed: 0,customer_id,signup_date,age,country,device,acquisition_channel,plan,billing_cycle,payment_method,base_price,...,content_diversity,engagement_trend,support_tickets_6m,avg_ticket_resolution_hours,had_streaming_issues,refund_requests_12m,tenure_months,churn_probability_true,churned,churned_month
0,1,2021-06-12,19,CA,Mobile,Affiliate,Standard,Annual,Debit Card,13.99,...,3,1.06,0,10.9,1,0,55,0.7345,1,5.0
1,2,2024-11-14,55,SE,Mobile,Referral,Basic,Annual,PayPal,9.99,...,11,-1.11,0,29.4,0,0,14,0.7376,1,7.0
2,3,2024-04-10,29,IT,Mobile,Affiliate,Standard,Monthly,Debit Card,13.99,...,9,-0.92,1,28.3,0,0,21,0.8265,1,12.0
3,4,2023-03-13,54,UK,Mobile,Email,Standard,Monthly,Credit Card,13.99,...,9,0.76,1,5.2,0,0,34,0.619,1,2.0
4,5,2023-03-02,47,US,SmartTV,Social,Standard,Monthly,Debit Card,13.99,...,5,0.06,0,17.2,0,1,34,0.814,1,6.0


In [3]:
df.shape, df['churned'].mean()

((20000, 25), 0.71775)

In [4]:
# Save
out_dir = PROJECT_ROOT / cfg.SYNTHETIC_DIR
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / cfg.SYNTHETIC_FILENAME
df.to_csv(out_path, index=False)
print('Saved:', out_path)

  values = values.astype(str)


Saved: /mnt/data/churn_fix/churn-reduction-end-to-end/data/synthetic/subscription_churn_synthetic.csv


In [5]:
# Quick sanity checks
assert df['customer_id'].is_unique
assert set(df['churned'].unique()).issubset({0,1})
print('OK: basic checks passed')

OK: basic checks passed
