# SQL Lab: 02 window functions

In [None]:
import duckdb, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')


In [None]:
# Initialize DuckDB in-memory database and load schema + seed data
conn = duckdb.connect(database=':memory:')
conn.execute(open('sql/schema.sql').read())
conn.execute(open('sql/seed.sql').read())
print('Tables loaded:', conn.execute('SHOW TABLES').fetchall())


In [None]:
    # Peek at each table to validate load
    tables = ['customers','products','orders','order_items','events','marketing_experiments']
    for t in tables:
        print(f"
Preview of {t}:")
        display(conn.execute(f'SELECT * FROM {t} LIMIT 5').df())


In [None]:
# Revenue with moving averages
query = '''
    WITH daily AS (
        SELECT date_trunc('day', order_ts) AS day,
               SUM(revenue_usd) AS revenue
        FROM orders
        GROUP BY 1
    ), metrics AS (
        SELECT day, revenue,
               AVG(revenue) OVER (ORDER BY day ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS ma_7d,
               AVG(revenue) OVER (ORDER BY day ROWS BETWEEN 27 PRECEDING AND CURRENT ROW) AS ma_28d
        FROM daily
    )
    SELECT * FROM metrics
    ORDER BY day
'''
moving_avg = conn.execute(query).df()
moving_avg.head()


In [None]:
# Plot moving averages
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(moving_avg['day'], moving_avg['revenue'], color='gray', alpha=0.4, label='Daily')
ax.plot(moving_avg['day'], moving_avg['ma_7d'], label='7d MA')
ax.plot(moving_avg['day'], moving_avg['ma_28d'], label='28d MA')
ax.legend()
ax.set_title('Revenue with Moving Averages')
plt.tight_layout()


In [None]:
# Cohort analysis: first purchase month and retention over 3 months
query = '''
    WITH first_purchase AS (
        SELECT customer_id, date_trunc('month', MIN(order_ts)) AS cohort_month
        FROM orders
        GROUP BY 1
    ), activity AS (
        SELECT o.customer_id, cohort_month,
               date_trunc('month', o.order_ts) AS active_month
        FROM orders o
        JOIN first_purchase f USING (customer_id)
        WHERE o.order_ts >= cohort_month
    ), cohorts AS (
        SELECT cohort_month, active_month,
               COUNT(DISTINCT customer_id) AS active_users
        FROM activity
        GROUP BY 1,2
    )
    SELECT cohort_month, active_month, active_users,
           ROW_NUMBER() OVER (PARTITION BY cohort_month ORDER BY active_month) AS month_number
    FROM cohorts
    WHERE month_number <= 3
    ORDER BY cohort_month, active_month
'''
cohorts = conn.execute(query).df()
cohorts.head()


In [None]:
# Plot retention heatmap
heatmap = cohorts.pivot(index='cohort_month', columns='month_number', values='active_users')
plt.figure(figsize=(8,6))
sns.heatmap(heatmap, annot=False, cmap='magma')
plt.title('3-Month Retention by Cohort')
plt.ylabel('Cohort Month')
plt.xlabel('Month Number')
plt.tight_layout()
