In [6]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# ------------------------
# إعدادات عامة
# ------------------------
n_customers = 1000
start_date = datetime(2024, 1, 1)
end_date = datetime(2025, 6, 30)  # سنة ونصف

plan_options = {
    "Basic": {"monthly_fee": 50, "cac": 30},
    "Pro": {"monthly_fee": 200, "cac": 100},
    "Enterprise": {"monthly_fee": 500, "cac": 200}
}

# ------------------------
# توليد جدول العملاء
# ------------------------
customers = pd.DataFrame({
    "customer_id": range(1001, 1001 + n_customers),
    "signup_date": [start_date + timedelta(days=random.randint(0, (end_date-start_date).days)) for _ in range(n_customers)],
    "plan_type": np.random.choice(list(plan_options.keys()), n_customers)
})

# إضافة قيم الـ monthly_fee و CAC حسب الخطة
customers["monthly_fee"] = customers["plan_type"].apply(lambda x: plan_options[x]["monthly_fee"])
customers["acquisition_cost"] = customers["plan_type"].apply(lambda x: plan_options[x]["cac"])

# churn عشوائي: 15% من العملاء يغادرون خلال فترة البيانات
churn_flags = np.random.rand(n_customers) < 0.15
customers["churn_date"] = [
    signup + timedelta(days=random.randint(30, max(30, (end_date - signup).days))) if churn and (end_date - signup).days >= 30 else end_date if churn else None
    for signup, churn in zip(customers["signup_date"], churn_flags)
]

# ------------------------
# توليد جدول الاشتراكات الشهرية
# ------------------------
subscriptions = []
for _, row in customers.iterrows():
    current_date = row["signup_date"]
    while current_date <= end_date and (row["churn_date"] is None or current_date <= row["churn_date"]):
        subscriptions.append({
            "subscription_id": f"S-{row['customer_id']}-{current_date.strftime('%Y%m')}",
            "customer_id": row["customer_id"],
            "month": current_date.strftime("%Y-%m"),
            "monthly_fee": row["monthly_fee"]
        })
        # الانتقال للشهر التالي
        current_date = current_date + pd.DateOffset(months=1)


subscriptions_df = pd.DataFrame(subscriptions)

# ------------------------
# توليد جدول الإيرادات (MRR)
# ------------------------
revenue = subscriptions_df.copy()
revenue["revenue_type"] = "MRR"
revenue["amount"] = revenue["monthly_fee"]

# ------------------------
# حفظ البيانات كـ CSV
# ------------------------
customers.to_csv("customers_fake.csv", index=False)
subscriptions_df.to_csv("subscriptions_fake.csv", index=False)
revenue.to_csv("revenue_fake.csv", index=False)

print("تم توليد البيانات وحفظها بنجاح! ✅")
print("ملفات CSV: customers_fake.csv | subscriptions_fake.csv | revenue_fake.csv")

تم توليد البيانات وحفظها بنجاح! ✅
ملفات CSV: customers_fake.csv | subscriptions_fake.csv | revenue_fake.csv


In [7]:
customers_df = pd.read_csv('customers_fake.csv')
display(customers_df.head())

Unnamed: 0,customer_id,signup_date,plan_type,monthly_fee,acquisition_cost,churn_date
0,1001,2024-11-07,Basic,50,30,
1,1002,2024-06-06,Basic,50,30,
2,1003,2024-12-31,Basic,50,30,
3,1004,2024-11-21,Pro,200,100,
4,1005,2024-08-16,Pro,200,100,
