# Temporary Tables and CTEs

This notebook demonstrates effective use of temporary tables and Common Table Expressions (CTEs):
* When to use temp tables vs CTEs
* Performance implications
* Optimization techniques
* Best practices

## 1. Basic Temporary Tables

In [None]:
-- Create temporary table for customer order summary
CREATE TEMPORARY TABLE customer_order_summary AS
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name,
    c.country,
    c.segment,
    COUNT(o.order_id) as total_orders,
    COALESCE(SUM(o.total_amount), 0) as total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.first_name, c.last_name, c.country, c.segment;

-- Add indexes to temp table
CREATE INDEX idx_temp_customer_id ON customer_order_summary(customer_id);
CREATE INDEX idx_temp_country_segment ON customer_order_summary(country, segment);

-- Query using temp table
SELECT 
    country,
    segment,
    COUNT(*) as num_customers,
    AVG(total_orders) as avg_orders,
    AVG(total_spent) as avg_spent
FROM customer_order_summary
GROUP BY country, segment
ORDER BY avg_spent DESC;

## 2. Basic CTEs

In [None]:
-- Simple CTE example
WITH order_summary AS (
    SELECT 
        customer_id,
        COUNT(*) as num_orders,
        SUM(total_amount) as total_spent
    FROM orders
    WHERE status = 'Completed'
    GROUP BY customer_id
)
SELECT 
    c.segment,
    COUNT(*) as num_customers,
    AVG(os.num_orders) as avg_orders,
    AVG(os.total_spent) as avg_spent
FROM customers c
LEFT JOIN order_summary os ON c.customer_id = os.customer_id
GROUP BY c.segment;

## 3. Performance Comparison

In [None]:
-- Approach 1: Using subquery
EXPLAIN ANALYZE
SELECT 
    p.category,
    p.product_name,
    (
        SELECT COUNT(DISTINCT o.order_id)
        FROM orders o
        JOIN order_items oi ON o.order_id = oi.order_id
        WHERE oi.product_id = p.product_id
        AND o.order_date >= '2022-01-01'
    ) as num_orders,
    (
        SELECT COALESCE(SUM(oi.quantity), 0)
        FROM order_items oi
        JOIN orders o ON oi.order_id = o.order_id
        WHERE oi.product_id = p.product_id
        AND o.order_date >= '2022-01-01'
    ) as total_quantity
FROM products p;

-- Approach 2: Using CTE
EXPLAIN ANALYZE
WITH product_metrics AS (
    SELECT 
        oi.product_id,
        COUNT(DISTINCT o.order_id) as num_orders,
        SUM(oi.quantity) as total_quantity
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    WHERE o.order_date >= '2022-01-01'
    GROUP BY oi.product_id
)
SELECT 
    p.category,
    p.product_name,
    COALESCE(pm.num_orders, 0) as num_orders,
    COALESCE(pm.total_quantity, 0) as total_quantity
FROM products p
LEFT JOIN product_metrics pm ON p.product_id = pm.product_id;

-- Approach 3: Using temporary table
CREATE TEMPORARY TABLE product_metrics AS
SELECT 
    oi.product_id,
    COUNT(DISTINCT o.order_id) as num_orders,
    SUM(oi.quantity) as total_quantity
FROM order_items oi
JOIN orders o ON oi.order_id = o.order_id
WHERE o.order_date >= '2022-01-01'
GROUP BY oi.product_id;

CREATE INDEX idx_temp_product ON product_metrics(product_id);

EXPLAIN ANALYZE
SELECT 
    p.category,
    p.product_name,
    COALESCE(pm.num_orders, 0) as num_orders,
    COALESCE(pm.total_quantity, 0) as total_quantity
FROM products p
LEFT JOIN product_metrics pm ON p.product_id = pm.product_id;

## 4. Complex CTEs

In [None]:
-- Complex analysis using multiple CTEs
WITH monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', o.order_date) as sale_month,
        p.category,
        SUM(oi.quantity * oi.unit_price) as revenue
    FROM orders o
    JOIN order_items oi ON o.order_id = oi.order_id
    JOIN products p ON oi.product_id = p.product_id
    WHERE o.status = 'Completed'
    GROUP BY DATE_TRUNC('month', o.order_date), p.category
),
category_stats AS (
    SELECT 
        category,
        AVG(revenue) as avg_monthly_revenue,
        STDDEV(revenue) as revenue_stddev
    FROM monthly_sales
    GROUP BY category
),
monthly_analysis AS (
    SELECT 
        ms.*,
        cs.avg_monthly_revenue,
        (ms.revenue - cs.avg_monthly_revenue) / NULLIF(cs.revenue_stddev, 0) as z_score
    FROM monthly_sales ms
    JOIN category_stats cs ON ms.category = cs.category
)
SELECT *
FROM monthly_analysis
WHERE ABS(z_score) > 2
ORDER BY ABS(z_score) DESC;

## 5. Recursive CTEs

In [None]:
-- Create a sample hierarchical structure
CREATE TEMPORARY TABLE employee_hierarchy (
    employee_id INT PRIMARY KEY,
    name VARCHAR(100),
    manager_id INT
);

INSERT INTO employee_hierarchy VALUES
(1, 'CEO', NULL),
(2, 'VP Sales', 1),
(3, 'VP Marketing', 1),
(4, 'Sales Manager 1', 2),
(5, 'Sales Manager 2', 2),
(6, 'Marketing Manager', 3),
(7, 'Sales Rep 1', 4),
(8, 'Sales Rep 2', 4),
(9, 'Sales Rep 3', 5),
(10, 'Marketing Specialist', 6);

-- Query organizational hierarchy using recursive CTE
WITH RECURSIVE org_tree AS (
    -- Base case: top-level employees (no manager)
    SELECT 
        employee_id,
        name,
        manager_id,
        1 as level,
        name as path,
        ARRAY[employee_id] as employee_path
    FROM employee_hierarchy
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive case: employees with managers
    SELECT 
        e.employee_id,
        e.name,
        e.manager_id,
        t.level + 1,
        t.path || ' -> ' || e.name,
        t.employee_path || e.employee_id
    FROM employee_hierarchy e
    JOIN org_tree t ON t.employee_id = e.manager_id
)
SELECT 
    employee_id,
    name,
    level,
    path
FROM org_tree
ORDER BY employee_path;

## Best Practices

1. **When to Use Temporary Tables**
   - Large intermediate results
   - Results needed multiple times
   - Need to create indexes
   - Complex aggregations

2. **When to Use CTEs**
   - Query readability
   - One-time use of results
   - Recursive queries
   - Simple transformations

3. **Performance Optimization**
   - Index temporary tables
   - Clean up temp tables when done
   - Consider materialized CTEs
   - Monitor memory usage

4. **Common Pitfalls**
   - Not cleaning up temp tables
   - Overusing CTEs for large datasets
   - Missing indexes on temp tables
   - Complex recursive CTEs without termination