In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# ----------------------------
# 1. Simulated Data Pipeline
# ----------------------------
# Load raw data (could be from SQL, CSV, API, etc.)
raw_data = {
    'user_id': [1, 2, 3, 4, 5, 6, 7],
    'signup_date': pd.to_datetime([
        '2024-05-01', '2024-05-02', '2024-05-02', '2024-05-03', '2024-05-05', '2024-05-06', '2024-05-07'
    ]),
    'first_action_date': pd.to_datetime([
        '2024-05-02', '2024-05-03', None, '2024-05-05', '2024-05-06', None, '2024-05-08'
    ]),
    'purchase_date': pd.to_datetime([
        '2024-05-10', None, None, '2024-05-06', '2024-05-10', None, '2024-05-12'
    ])
}

df = pd.DataFrame(raw_data)

# Streamlined pipeline: cleaning & feature engineering
df['activated'] = df['first_action_date'].notnull()
df['purchased'] = df['purchase_date'].notnull()

# ----------------------------
# 2. KPI Dashboard Automation
# ----------------------------
kpis = {
    'total_users': df['user_id'].nunique(),
    'activation_rate': df['activated'].mean() * 100,
    'conversion_rate': df['purchased'].mean() * 100
}

print("\nKPI Dashboard:")
for k, v in kpis.items():
    print(f"{k}: {v:.2f}")

fig = px.bar(
    x=list(kpis.keys()), y=list(kpis.values()),
    title="Key Performance Indicators", text=[f"{val:.2f}%" if 'rate' in key else f"{int(val)}" for key, val in kpis.items()]
)
fig.show()

# ----------------------------
# 3. Funnel Analysis
# ----------------------------
funnel = pd.DataFrame({
    'stage': ['Signed Up', 'Activated', 'Purchased'],
    'users': [
        len(df),
        df['activated'].sum(),
        df['purchased'].sum()
    ]
})

fig2 = px.funnel(funnel, x='users', y='stage', title="User Funnel Analysis")
fig2.show()

# ----------------------------
# 4. Cohort Analysis
# ----------------------------
df['signup_month'] = df['signup_date'].dt.to_period('M')
cohort = df.groupby(['signup_month']).agg({'activated': 'mean', 'purchased': 'mean'}) * 100

print("\nCohort Analysis (Rates by Signup Month):")
print(cohort)

cohort.plot(kind='bar', figsize=(6,4))
plt.title("Cohort Analysis: Activation & Conversion Rates")
plt.ylabel("%")
plt.show()
