In [6]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text

engine = create_engine("postgresql+psycopg2://postgres:Fizuli050@localhost:5432/churn_retention")

# 1) reset tables (safe during dev)
with engine.begin() as conn:
    conn.execute(text("""
        TRUNCATE TABLE
        payments, usage_metrics, subscriptions, customers
        RESTART IDENTITY CASCADE;
    """))

# 2) load customers (no customer_id column)
customers = pd.read_csv("churn-retention-project/data/customers.csv")
customers.to_sql("customers", engine, if_exists="append", index=False)

# 3) fetch real ids back
db_customers = pd.read_sql("SELECT customer_id, signup_date FROM customers ORDER BY customer_id", engine)

plans = {"Basic": 29.99, "Pro": 59.99, "Premium": 99.99}
np.random.seed(42)

subs_rows = []
for _, row in db_customers.iterrows():
    cid = int(row["customer_id"])
    start_date = pd.to_datetime(row["signup_date"])
    plan = np.random.choice(list(plans.keys()), p=[0.5, 0.3, 0.2])
    churned = np.random.rand() < 0.25
    churn_date = (start_date + pd.to_timedelta(np.random.randint(90, 600), unit="D")) if churned else pd.NaT

    subs_rows.append([cid, plan, plans[plan], start_date.date(), churn_date.date() if pd.notna(churn_date) else None, churned])

subscriptions = pd.DataFrame(subs_rows, columns=["customer_id","plan_type","monthly_fee","start_date","churn_date","churned"])
subscriptions.to_sql("subscriptions", engine, if_exists="append", index=False)

print("Loaded customers:", len(customers))
print("Loaded subscriptions:", len(subscriptions))


Loaded customers: 10000
Loaded subscriptions: 10000
