# SQL Lab: 01 joins

In [None]:
import duckdb, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')


In [None]:
# Initialize DuckDB in-memory database and load schema + seed data
conn = duckdb.connect(database=':memory:')
conn.execute(open('sql/schema.sql').read())
conn.execute(open('sql/seed.sql').read())
print('Tables loaded:', conn.execute('SHOW TABLES').fetchall())


In [None]:
    # Peek at each table to validate load
    tables = ['customers','products','orders','order_items','events','marketing_experiments']
    for t in tables:
        print(f"
Preview of {t}:")
        display(conn.execute(f'SELECT * FROM {t} LIMIT 5').df())


In [None]:
# Join customers to orders and order_items for revenue by country and month
query = '''
    SELECT c.country, date_trunc('month', o.order_ts) AS month,
           SUM(oi.qty * oi.unit_price_usd) AS revenue
    FROM orders o
    JOIN customers c ON o.customer_id = c.customer_id
    JOIN order_items oi ON o.order_id = oi.order_id
    GROUP BY 1,2
    ORDER BY 2,1
'''
revenue_country_month = conn.execute(query).df()
revenue_country_month.head()


In [None]:
# Plot revenue trend by country
fig, ax = plt.subplots(figsize=(10,5))
for country, grp in revenue_country_month.groupby('country'):
    ax.plot(grp['month'], grp['revenue'], label=country, alpha=0.6)
ax.set_title('Revenue by Country and Month')
ax.set_ylabel('Revenue (USD)')
ax.legend(ncol=3)
plt.tight_layout()


In [None]:
# Top categories per country
query = '''
    SELECT c.country, p.category,
           SUM(oi.qty * oi.unit_price_usd) AS revenue,
           RANK() OVER (PARTITION BY c.country ORDER BY SUM(oi.qty * oi.unit_price_usd) DESC) AS rnk
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    JOIN customers c ON o.customer_id = c.customer_id
    JOIN products p ON oi.product_id = p.product_id
    GROUP BY 1,2
    QUALIFY rnk <= 3
    ORDER BY country, rnk
'''
top_categories = conn.execute(query).df()
top_categories


In [None]:
# Visualize top categories by country
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(data=top_categories, x='country', y='revenue', hue='category', ax=ax)
ax.set_title('Top Categories by Country (Revenue)')
plt.xticks(rotation=45)
plt.tight_layout()
