# SQL Lab: 04 ab test marketing

In [None]:
import duckdb, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')


In [None]:
# Initialize DuckDB in-memory database and load schema + seed data
conn = duckdb.connect(database=':memory:')
conn.execute(open('sql/schema.sql').read())
conn.execute(open('sql/seed.sql').read())
print('Tables loaded:', conn.execute('SHOW TABLES').fetchall())


In [None]:
    # Peek at each table to validate load
    tables = ['customers','products','orders','order_items','events','marketing_experiments']
    for t in tables:
        print(f"
Preview of {t}:")
        display(conn.execute(f'SELECT * FROM {t} LIMIT 5').df())


In [None]:
# Summary stats by group
query = '''
    SELECT group, COUNT(*) AS users, SUM(converted) AS conversions, AVG(converted)::DOUBLE AS conversion_rate
    FROM marketing_experiments
    GROUP BY 1
'''
ab_summary = conn.execute(query).df()
ab_summary


In [None]:
# Two-proportion z-test using statsmodels
import statsmodels.api as sm
import numpy as np
count = ab_summary['conversions'].values
nobs = ab_summary['users'].values
stat, pval = sm.stats.proportions_ztest(count, nobs)
lower, upper = sm.stats.proportion_confint(count, nobs, method='normal')
result = pd.DataFrame({
    'group': ['A','B'],
    'conversion_rate': ab_summary['conversion_rate'],
    'ci_lower': lower,
    'ci_upper': upper,
})
print('z-statistic:', stat, 'p-value:', pval)
result


In [None]:
# Visualize conversion rates with error bars
fig, ax = plt.subplots(figsize=(6,4))
ax.errorbar(result['group'], result['conversion_rate'],
            yerr=[result['conversion_rate']-result['ci_lower'], result['ci_upper']-result['conversion_rate']],
            fmt='o', capsize=6, color='#5DA5DA')
ax.set_ylim(0, result['ci_upper'].max() + 0.05)
ax.set_title('Marketing Experiment Conversion Rates (95% CI)')
ax.set_ylabel('Conversion Rate')
plt.tight_layout()


In [None]:
# Interpretation
if pval < 0.05:
    print('Result: statistically significant difference between groups.')
else:
    print('Result: no statistically significant difference detected at 95% confidence.')
