In [1]:
# %% Imports and Configuration
import pandas as pd
from sqlalchemy import create_engine, text
from IPython.display import display, Markdown

# PostgreSQL Configuration
PG_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'dbname': 'olap',
    'user': 'postgres',
    'password': 'aa'
}

# Create SQLAlchemy engine
conn_str = f"postgresql://{PG_CONFIG['user']}:{PG_CONFIG['password']}@{PG_CONFIG['host']}:{PG_CONFIG['port']}/{PG_CONFIG['dbname']}"
engine = create_engine(conn_str)

print('‚úÖ Connected to PostgreSQL')
print(f"üóÑÔ∏è  Database: {PG_CONFIG['dbname']} @ {PG_CONFIG['host']}:{PG_CONFIG['port']}")

‚úÖ Connected to PostgreSQL
üóÑÔ∏è  Database: olap @ localhost:5432


## Task 5: Basic Aggregate Queries

Using SUM, AVG, COUNT, MIN, MAX functions.

In [2]:
# %% Task 5.1: Overall Sales Aggregates
print('=' * 80)
print('üìä TASK 5.1: Overall Sales Aggregates')
print('=' * 80)

query = """
SELECT 
    COUNT(*) AS total_transactions,
    COUNT(DISTINCT order_id) AS unique_orders,
    ROUND(SUM(sales)::numeric, 2) AS total_sales,
    ROUND(AVG(sales)::numeric, 2) AS avg_sales,
    ROUND(MIN(sales)::numeric, 2) AS min_sales,
    ROUND(MAX(sales)::numeric, 2) AS max_sales,
    SUM(quantity) AS total_quantity
FROM fact_sales;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 5.1 Complete')

üìä TASK 5.1: Overall Sales Aggregates


Unnamed: 0,total_transactions,unique_orders,total_sales,avg_sales,min_sales,max_sales,total_quantity
0,9800,4922,2261536.97,230.77,0.44,22638.48,9800



‚úÖ Task 5.1 Complete


In [3]:
# %% Task 5.2: Sales by Category
print('\n' + '=' * 80)
print('üìä TASK 5.2: Sales Aggregates by Category')
print('=' * 80)

query = """
SELECT 
    p.category,
    COUNT(*) AS transaction_count,
    COUNT(DISTINCT f.order_id) AS order_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    ROUND(AVG(f.sales)::numeric, 2) AS avg_sales,
    ROUND(MIN(f.sales)::numeric, 2) AS min_sales,
    ROUND(MAX(f.sales)::numeric, 2) AS max_sales
FROM fact_sales f
LEFT JOIN dim_product p ON f.product_key = p.product_key
GROUP BY p.category
ORDER BY total_sales DESC NULLS LAST;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 5.2 Complete')


üìä TASK 5.2: Sales Aggregates by Category


Unnamed: 0,category,transaction_count,order_count,total_sales,avg_sales,min_sales,max_sales
0,Technology,1813,1519,827455.94,456.4,0.99,22638.48
1,Furniture,2078,1727,728658.75,350.65,1.89,4416.17
2,Office Supplies,5909,3676,705422.28,119.38,0.44,9892.74



‚úÖ Task 5.2 Complete


In [4]:
# %% Task 5.3: Sales by Region and Segment
print('\n' + '=' * 80)
print('üìä TASK 5.3: Sales Aggregates by Region and Segment')
print('=' * 80)

query = """
SELECT 
    g.region,
    c.segment,
    COUNT(*) AS transaction_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    ROUND(AVG(f.sales)::numeric, 2) AS avg_sales
FROM fact_sales f
LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
LEFT JOIN dim_customer c ON f.customer_key = c.customer_key
GROUP BY g.region, c.segment
ORDER BY g.region, total_sales DESC NULLS LAST;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 5.3 Complete')


üìä TASK 5.3: Sales Aggregates by Region and Segment


Unnamed: 0,region,segment,transaction_count,total_sales,avg_sales
0,Central,Consumer,1188,250210.52,210.61
1,Central,Corporate,661,152031.47,230.0
2,Central,Home Office,428,90404.91,211.23
3,East,Consumer,1444,347906.61,240.93
4,East,Corporate,850,195897.53,230.47
5,East,Home Office,491,125714.71,256.04
6,South,Consumer,831,194702.16,234.3
7,South,Corporate,500,120546.89,241.09
8,South,Home Office,267,73902.4,276.79
9,West,Consumer,1638,355241.22,216.87



‚úÖ Task 5.3 Complete


In [5]:
# %% Task 5.4: Sales by Year and Quarter
print('\n' + '=' * 80)
print('üìä TASK 5.4: Sales Aggregates by Year and Quarter')
print('=' * 80)

query = """
SELECT 
    t.year,
    t.quarter_name,
    COUNT(*) AS transaction_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    ROUND(AVG(f.sales)::numeric, 2) AS avg_sales,
    ROUND(MAX(f.sales)::numeric, 2) AS max_sales
FROM fact_sales f
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY t.year, t.quarter, t.quarter_name
ORDER BY t.year, t.quarter;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 5.4 Complete')


üìä TASK 5.4: Sales Aggregates by Year and Quarter


Unnamed: 0,year,quarter_name,transaction_count,total_sales,avg_sales,max_sales
0,2015,Q1,277,73931.46,266.9,22638.48
1,2015,Q2,382,85874.1,224.8,4164.05
2,2015,Q3,555,142522.57,256.8,9449.95
3,2015,Q4,739,177528.14,240.23,6999.96
4,2016,Q1,249,62357.68,250.43,6354.95
5,2016,Q2,431,87713.46,203.51,3812.97
6,2016,Q3,579,128560.14,222.04,4228.7
7,2016,Q4,796,180804.66,227.14,4899.93
8,2017,Q1,333,92686.38,278.34,8749.95
9,2017,Q2,585,135061.19,230.87,9099.93



‚úÖ Task 5.4 Complete


## Task 6: GROUP BY ROLLUP Query

ROLLUP creates subtotals for hierarchical groups.

In [6]:
# %% Task 6: ROLLUP - Sales by Category and Sub-Category with Subtotals
print('=' * 80)
print('üìä TASK 6: GROUP BY ROLLUP - Category and Sub-Category Sales')
print('=' * 80)

query = """
SELECT 
    COALESCE(p.category, 'GRAND TOTAL') AS category,
    COALESCE(p.sub_category, 'Category Subtotal') AS sub_category,
    COUNT(*) AS transaction_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    ROUND(AVG(f.sales)::numeric, 2) AS avg_sales
FROM fact_sales f
LEFT JOIN dim_product p ON f.product_key = p.product_key
GROUP BY ROLLUP(p.category, p.sub_category)
ORDER BY 
    CASE WHEN p.category IS NULL THEN 1 ELSE 0 END,
    p.category,
    CASE WHEN p.sub_category IS NULL THEN 1 ELSE 0 END,
    p.sub_category;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° Explanation: ROLLUP creates subtotals for each category and a grand total.')
print('‚úÖ Task 6 Complete')

üìä TASK 6: GROUP BY ROLLUP - Category and Sub-Category Sales


Unnamed: 0,category,sub_category,transaction_count,total_sales,avg_sales
0,Furniture,Bookcases,226,113813.25,503.6
1,Furniture,Chairs,607,322822.75,531.83
2,Furniture,Furnishings,931,89211.98,95.82
3,Furniture,Tables,314,202810.77,645.89
4,Furniture,Category Subtotal,2078,728658.75,350.65
5,Office Supplies,Appliances,459,104618.38,227.93
6,Office Supplies,Art,785,26705.42,34.02
7,Office Supplies,Binders,1492,200028.82,134.07
8,Office Supplies,Envelopes,248,16128.02,65.03
9,Office Supplies,Fasteners,214,3001.93,14.03



üí° Explanation: ROLLUP creates subtotals for each category and a grand total.
‚úÖ Task 6 Complete


In [7]:
# %% Task 6 (Bonus): ROLLUP - Sales by Year, Quarter, Month
print('\n' + '=' * 80)
print('üìä TASK 6 (Bonus): GROUP BY ROLLUP - Year, Quarter, Month Hierarchy')
print('=' * 80)

query = """
SELECT 
    COALESCE(t.year::text, 'GRAND TOTAL') AS year,
    COALESCE(t.quarter_name, 'Year Subtotal') AS quarter,
    COALESCE(t.month_name, 'Quarter Subtotal') AS month,
    COUNT(*) AS transactions,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales
FROM fact_sales f
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY ROLLUP(t.year, t.quarter_name, t.month_name)
ORDER BY 
    CASE WHEN t.year IS NULL THEN 1 ELSE 0 END,
    t.year,
    CASE WHEN t.quarter_name IS NULL THEN 1 ELSE 0 END,
    t.quarter_name,
    CASE WHEN t.month_name IS NULL THEN 1 ELSE 0 END,
    t.month_name
LIMIT 50;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 6 Bonus Complete')


üìä TASK 6 (Bonus): GROUP BY ROLLUP - Year, Quarter, Month Hierarchy


Unnamed: 0,year,quarter,month,transactions,total_sales
0,2015,Q1,February,46,4519.92
1,2015,Q1,January,77,14205.71
2,2015,Q1,March,154,55205.83
3,2015,Q1,Quarter Subtotal,277,73931.46
4,2015,Q2,April,130,27906.86
5,2015,Q2,June,131,34322.94
6,2015,Q2,May,121,23644.3
7,2015,Q2,Quarter Subtotal,382,85874.1
8,2015,Q3,August,146,27117.53
9,2015,Q3,July,142,33781.52



‚úÖ Task 6 Bonus Complete


## Task 7: GROUP BY CUBE Query

CUBE creates subtotals for all possible combinations of dimensions.

In [8]:
# %% Task 7: CUBE - Sales by Region and Segment
print('=' * 80)
print('üìä TASK 7: GROUP BY CUBE - Region and Segment Sales')
print('=' * 80)

query = """
SELECT 
    COALESCE(g.region, 'ALL REGIONS') AS region,
    COALESCE(c.segment, 'ALL SEGMENTS') AS segment,
    COUNT(*) AS transaction_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    ROUND(AVG(f.sales)::numeric, 2) AS avg_sales
FROM fact_sales f
LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
LEFT JOIN dim_customer c ON f.customer_key = c.customer_key
GROUP BY CUBE(g.region, c.segment)
ORDER BY 
    CASE WHEN g.region IS NULL THEN 1 ELSE 0 END,
    g.region,
    CASE WHEN c.segment IS NULL THEN 1 ELSE 0 END,
    c.segment;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° Explanation: CUBE creates aggregates for:')
print('   - Each region + segment combination')
print('   - Each region (all segments)')
print('   - Each segment (all regions)')
print('   - Grand total (all regions and segments)')
print('‚úÖ Task 7 Complete')

üìä TASK 7: GROUP BY CUBE - Region and Segment Sales


Unnamed: 0,region,segment,transaction_count,total_sales,avg_sales
0,Central,Consumer,1188,250210.52,210.61
1,Central,Corporate,661,152031.47,230.0
2,Central,Home Office,428,90404.91,211.23
3,Central,ALL SEGMENTS,2277,492646.9,216.36
4,East,Consumer,1444,347906.61,240.93
5,East,Corporate,850,195897.53,230.47
6,East,Home Office,491,125714.71,256.04
7,East,ALL SEGMENTS,2785,669518.85,240.4
8,South,Consumer,831,194702.16,234.3
9,South,Corporate,500,120546.89,241.09



üí° Explanation: CUBE creates aggregates for:
   - Each region + segment combination
   - Each region (all segments)
   - Each segment (all regions)
   - Grand total (all regions and segments)
‚úÖ Task 7 Complete


In [9]:
# %% Task 7 (Bonus): CUBE - Sales by Category and Year
print('\n' + '=' * 80)
print('üìä TASK 7 (Bonus): GROUP BY CUBE - Category and Year')
print('=' * 80)

query = """
SELECT 
    COALESCE(p.category, 'ALL CATEGORIES') AS category,
    COALESCE(t.year::text, 'ALL YEARS') AS year,
    COUNT(*) AS transactions,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales
FROM fact_sales f
LEFT JOIN dim_product p ON f.product_key = p.product_key
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY CUBE(p.category, t.year)
ORDER BY 
    CASE WHEN p.category IS NULL THEN 1 ELSE 0 END,
    p.category,
    CASE WHEN t.year IS NULL THEN 1 ELSE 0 END,
    t.year;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 7 Bonus Complete')


üìä TASK 7 (Bonus): GROUP BY CUBE - Category and Year


Unnamed: 0,category,year,transactions,total_sales
0,Furniture,2015,414,156477.92
1,Furniture,2016,440,164053.88
2,Furniture,2017,547,195813.15
3,Furniture,2018,677,212313.8
4,Furniture,ALL YEARS,2078,728658.75
5,Office Supplies,2015,1192,149512.81
6,Office Supplies,2016,1210,133124.36
7,Office Supplies,2017,1537,182417.65
8,Office Supplies,2018,1970,240367.46
9,Office Supplies,ALL YEARS,5909,705422.28



‚úÖ Task 7 Bonus Complete


## Task 8: GROUPING SETS Query

GROUPING SETS allows you to specify exactly which groupings you want.

In [10]:
# %% Task 8: GROUPING SETS - Custom Aggregations
print('=' * 80)
print('üìä TASK 8: GROUPING SETS - Custom Sales Aggregations')
print('=' * 80)

query = """
SELECT 
    COALESCE(p.category, 'N/A') AS category,
    COALESCE(g.region, 'N/A') AS region,
    COALESCE(t.year::text, 'N/A') AS year,
    COUNT(*) AS transaction_count,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    CASE 
        WHEN p.category IS NOT NULL AND g.region IS NOT NULL AND t.year IS NOT NULL THEN 'Category + Region + Year'
        WHEN p.category IS NOT NULL AND g.region IS NOT NULL THEN 'Category + Region'
        WHEN p.category IS NOT NULL AND t.year IS NOT NULL THEN 'Category + Year'
        WHEN g.region IS NOT NULL AND t.year IS NOT NULL THEN 'Region + Year'
        ELSE 'Grand Total'
    END AS grouping_level
FROM fact_sales f
LEFT JOIN dim_product p ON f.product_key = p.product_key
LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY GROUPING SETS (
    (p.category, g.region, t.year),  -- Three dimensions
    (p.category, g.region),          -- Category and Region
    (p.category, t.year),            -- Category and Year
    (g.region, t.year),              -- Region and Year
    ()                               -- Grand Total
)
ORDER BY 
    grouping_level,
    p.category,
    g.region,
    t.year
LIMIT 50;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° Explanation: GROUPING SETS allows custom grouping combinations.')
print('‚úÖ Task 8 Complete')

üìä TASK 8: GROUPING SETS - Custom Sales Aggregations


Unnamed: 0,category,region,year,transaction_count,total_sales,grouping_level
0,Furniture,Central,,470,160317.56,Category + Region
1,Furniture,East,,591,206461.36,Category + Region
2,Furniture,South,,326,116531.47,Category + Region
3,Furniture,West,,691,245348.36,Category + Region
4,Office Supplies,Central,,1399,163590.15,Category + Region
5,Office Supplies,East,,1667,199940.9,Category + Region
6,Office Supplies,South,,983,124424.77,Category + Region
7,Office Supplies,West,,1860,217466.46,Category + Region
8,Technology,Central,,408,168739.19,Category + Region
9,Technology,East,,527,263116.59,Category + Region



üí° Explanation: GROUPING SETS allows custom grouping combinations.
‚úÖ Task 8 Complete


## Task 9: RANK and DENSE_RANK Queries

Ranking functions assign ranks to rows based on ordering.

In [11]:
# %% Task 9.1: RANK - Top Products by Sales
print('=' * 80)
print('üìä TASK 9.1: RANK - Top 20 Products by Sales')
print('=' * 80)

query = """
SELECT 
    p.product_name,
    p.category,
    p.sub_category,
    ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
    COUNT(*) AS transactions,
    RANK() OVER (ORDER BY SUM(f.sales) DESC) AS sales_rank,
    DENSE_RANK() OVER (ORDER BY SUM(f.sales) DESC) AS dense_sales_rank
FROM fact_sales f
LEFT JOIN dim_product p ON f.product_key = p.product_key
GROUP BY p.product_name, p.category, p.sub_category
ORDER BY sales_rank
LIMIT 20;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° RANK skips numbers after ties, DENSE_RANK does not.')
print('‚úÖ Task 9.1 Complete')

üìä TASK 9.1: RANK - Top 20 Products by Sales


Unnamed: 0,product_name,category,sub_category,total_sales,transactions,sales_rank,dense_sales_rank
0,Canon imageCLASS 2200 Advanced Copier,Technology,Copiers,61599.83,5,1,1
1,Fellowes PB500 Electric Punch Plastic Comb Bin...,Office Supplies,Binders,27453.38,10,2,2
2,Cisco TelePresence System EX90 Videoconferenci...,Technology,Machines,22638.48,1,3,3
3,HON 5400 Series Task Chairs for Big and Tall,Furniture,Chairs,21870.57,8,4,4
4,GBC DocuBind TL300 Electric Binding System,Office Supplies,Binders,19823.48,11,5,5
5,GBC Ibimaster 500 Manual ProClick Binding System,Office Supplies,Binders,19024.5,9,6,6
6,Hewlett Packard LaserJet 3310 Copier,Technology,Copiers,18839.68,8,7,7
7,HP Designjet T520 Inkjet Large Format Printer ...,Technology,Machines,18374.9,3,8,8
8,GBC DocuBind P400 Electric Binding System,Office Supplies,Binders,17965.07,6,9,9
9,High Speed Automatic Electric Letter Opener,Office Supplies,Supplies,17030.31,3,10,10



üí° RANK skips numbers after ties, DENSE_RANK does not.
‚úÖ Task 9.1 Complete


In [12]:
# %% Task 9.2: RANK - Top Customers by Region
print('\n' + '=' * 80)
print('üìä TASK 9.2: RANK - Top 3 Customers per Region')
print('=' * 80)

query = """
WITH customer_sales AS (
    SELECT 
        g.region,
        c.customer_name,
        c.segment,
        ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
        COUNT(*) AS transactions,
        RANK() OVER (PARTITION BY g.region ORDER BY SUM(f.sales) DESC) AS rank_in_region
    FROM fact_sales f
    LEFT JOIN dim_customer c ON f.customer_key = c.customer_key
    LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
    GROUP BY g.region, c.customer_name, c.segment
)
SELECT 
    region,
    customer_name,
    segment,
    total_sales,
    transactions,
    rank_in_region
FROM customer_sales
WHERE rank_in_region <= 3
ORDER BY region, rank_in_region;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° PARTITION BY creates separate rankings for each region.')
print('‚úÖ Task 9.2 Complete')


üìä TASK 9.2: RANK - Top 3 Customers per Region


Unnamed: 0,region,customer_name,segment,total_sales,transactions,rank_in_region
0,Central,Tamara Chand,Corporate,18437.14,7,1
1,Central,Adrian Barton,Consumer,12181.6,11,2
2,Central,Becky Martin,Consumer,10539.9,7,3
3,East,Tom Ashbrook,Home Office,13723.5,5,1
4,East,Hunter Lopez,Consumer,10522.55,2,2
5,East,Bill Shonely,Corporate,10022.29,4,3
6,South,Sean Miller,Home Office,23669.21,8,1
7,South,Sanjit Engle,Consumer,8805.04,3,2
8,South,Grant Thornton,Corporate,8167.42,2,3
9,West,Raymond Buch,Consumer,14345.28,6,1



üí° PARTITION BY creates separate rankings for each region.
‚úÖ Task 9.2 Complete


In [13]:
# %% Task 9.3: RANK - Top States by Sales with Category Breakdown
print('\n' + '=' * 80)
print('üìä TASK 9.3: RANK - Top 5 States per Category')
print('=' * 80)

query = """
WITH state_category_sales AS (
    SELECT 
        p.category,
        g.state,
        g.region,
        ROUND(SUM(f.sales)::numeric, 2) AS total_sales,
        COUNT(*) AS transactions,
        DENSE_RANK() OVER (PARTITION BY p.category ORDER BY SUM(f.sales) DESC) AS state_rank
    FROM fact_sales f
    LEFT JOIN dim_product p ON f.product_key = p.product_key
    LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
    GROUP BY p.category, g.state, g.region
)
SELECT 
    category,
    state,
    region,
    total_sales,
    transactions,
    state_rank
FROM state_category_sales
WHERE state_rank <= 5
ORDER BY category, state_rank;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 9.3 Complete')


üìä TASK 9.3: RANK - Top 5 States per Category


Unnamed: 0,category,state,region,total_sales,transactions,state_rank
0,Furniture,California,West,152216.59,430,1
1,Furniture,New York,East,92504.53,231,2
2,Furniture,Texas,Central,59633.69,197,3
3,Furniture,Washington,West,44626.48,113,4
4,Furniture,Pennsylvania,East,39354.94,125,5
5,Office Supplies,California,West,139405.69,1167,1
6,Office Supplies,New York,East,86953.87,662,2
7,Office Supplies,Texas,Central,44282.52,599,3
8,Office Supplies,Washington,West,40043.68,291,4
9,Office Supplies,Michigan,Central,37688.2,158,5



‚úÖ Task 9.3 Complete


## Task 10: PIVOT Query

Pivot tables transform rows into columns.

In [14]:
# %% Task 10.1: PIVOT - Sales by Category and Year
print('=' * 80)
print('üìä TASK 10.1: PIVOT - Sales by Category across Years')
print('=' * 80)

query = """
SELECT 
    category,
    ROUND(SUM(CASE WHEN year = 2014 THEN sales ELSE 0 END)::numeric, 2) AS "2014",
    ROUND(SUM(CASE WHEN year = 2015 THEN sales ELSE 0 END)::numeric, 2) AS "2015",
    ROUND(SUM(CASE WHEN year = 2016 THEN sales ELSE 0 END)::numeric, 2) AS "2016",
    ROUND(SUM(CASE WHEN year = 2017 THEN sales ELSE 0 END)::numeric, 2) AS "2017",
    ROUND(SUM(sales)::numeric, 2) AS total
FROM (
    SELECT 
        p.category,
        t.year,
        f.sales
    FROM fact_sales f
    LEFT JOIN dim_product p ON f.product_key = p.product_key
    LEFT JOIN dim_time t ON f.time_key = t.time_key
) AS sales_data
GROUP BY category
ORDER BY total DESC NULLS LAST;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° PostgreSQL uses CASE WHEN for pivoting.')
print('‚úÖ Task 10.1 Complete')

üìä TASK 10.1: PIVOT - Sales by Category across Years


Unnamed: 0,category,2014,2015,2016,2017,total
0,Technology,0.0,173865.54,162257.7,221962.0,827455.94
1,Furniture,0.0,156477.92,164053.88,195813.15,728658.75
2,Office Supplies,0.0,149512.81,133124.36,182417.65,705422.28



üí° PostgreSQL uses CASE WHEN for pivoting.
‚úÖ Task 10.1 Complete


In [15]:
# %% Task 10.2: PIVOT - Sales by Region and Segment
print('\n' + '=' * 80)
print('üìä TASK 10.2: PIVOT - Sales by Region across Segments')
print('=' * 80)

query = """
SELECT 
    region,
    ROUND(SUM(CASE WHEN segment = 'Consumer' THEN sales ELSE 0 END)::numeric, 2) AS "Consumer",
    ROUND(SUM(CASE WHEN segment = 'Corporate' THEN sales ELSE 0 END)::numeric, 2) AS "Corporate",
    ROUND(SUM(CASE WHEN segment = 'Home Office' THEN sales ELSE 0 END)::numeric, 2) AS "Home Office",
    ROUND(SUM(sales)::numeric, 2) AS total
FROM (
    SELECT 
        g.region,
        c.segment,
        f.sales
    FROM fact_sales f
    LEFT JOIN dim_geography g ON f.geo_key = g.geo_key
    LEFT JOIN dim_customer c ON f.customer_key = c.customer_key
) AS sales_data
GROUP BY region
ORDER BY total DESC NULLS LAST;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 10.2 Complete')


üìä TASK 10.2: PIVOT - Sales by Region across Segments


Unnamed: 0,region,Consumer,Corporate,Home Office,total
0,West,355241.22,220018.25,134960.3,710219.77
1,East,347906.61,195897.53,125714.71,669518.85
2,Central,250210.52,152031.47,90404.91,492646.9
3,South,194702.16,120546.89,73902.4,389151.45



‚úÖ Task 10.2 Complete


In [16]:
# %% Task 10.3: PIVOT - Quarterly Sales by Category
print('\n' + '=' * 80)
print('üìä TASK 10.3: PIVOT - Quarterly Sales by Category')
print('=' * 80)

query = """
SELECT 
    category,
    ROUND(SUM(CASE WHEN quarter_name = 'Q1' THEN sales ELSE 0 END)::numeric, 2) AS "Q1",
    ROUND(SUM(CASE WHEN quarter_name = 'Q2' THEN sales ELSE 0 END)::numeric, 2) AS "Q2",
    ROUND(SUM(CASE WHEN quarter_name = 'Q3' THEN sales ELSE 0 END)::numeric, 2) AS "Q3",
    ROUND(SUM(CASE WHEN quarter_name = 'Q4' THEN sales ELSE 0 END)::numeric, 2) AS "Q4",
    ROUND(SUM(sales)::numeric, 2) AS total
FROM (
    SELECT 
        p.category,
        t.quarter_name,
        f.sales
    FROM fact_sales f
    LEFT JOIN dim_product p ON f.product_key = p.product_key
    LEFT JOIN dim_time t ON f.time_key = t.time_key
) AS sales_data
GROUP BY category
ORDER BY total DESC NULLS LAST;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 10.3 Complete')


üìä TASK 10.3: PIVOT - Quarterly Sales by Category


Unnamed: 0,category,Q1,Q2,Q3,Q4,total
0,Technology,150360.15,158916.56,200658.67,317520.56,827455.94
1,Furniture,93316.22,139004.96,196700.79,299636.78,728658.75
2,Office Supplies,107560.01,138285.81,205595.51,253980.95,705422.28



‚úÖ Task 10.3 Complete


## Task 11: Window Functions

Window functions for moving averages and cumulative sums.

In [17]:
# %% Task 11.1: Cumulative Sales by Date
print('=' * 80)
print('üìä TASK 11.1: Cumulative Sales Over Time')
print('=' * 80)

query = """
SELECT 
    t.date_full,
    COUNT(*) AS daily_transactions,
    ROUND(SUM(f.sales)::numeric, 2) AS daily_sales,
    ROUND(SUM(SUM(f.sales)) OVER (ORDER BY t.date_full ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)::numeric, 2) AS cumulative_sales,
    ROUND(AVG(SUM(f.sales)) OVER (ORDER BY t.date_full ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)::numeric, 2) AS moving_avg_7_days
FROM fact_sales f
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY t.date_full
ORDER BY t.date_full
LIMIT 50;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° Cumulative sales sum all previous days.')
print('üí° Moving average uses last 7 days (including current).')
print('‚úÖ Task 11.1 Complete')

üìä TASK 11.1: Cumulative Sales Over Time


Unnamed: 0,date_full,daily_transactions,daily_sales,cumulative_sales,moving_avg_7_days
0,2015-01-03,1,16.45,16.45,16.45
1,2015-01-04,3,288.06,304.51,152.26
2,2015-01-05,1,19.54,324.05,108.02
3,2015-01-06,9,4407.1,4731.15,1182.79
4,2015-01-07,2,87.16,4818.31,963.66
5,2015-01-09,2,40.54,4858.85,809.81
6,2015-01-10,2,54.83,4913.68,701.95
7,2015-01-11,1,9.94,4923.62,701.02
8,2015-01-13,11,3553.8,8477.42,1167.56
9,2015-01-14,1,61.96,8539.38,1173.62



üí° Cumulative sales sum all previous days.
üí° Moving average uses last 7 days (including current).
‚úÖ Task 11.1 Complete


In [18]:
# %% Task 11.2: Monthly Sales with Moving Average
print('\n' + '=' * 80)
print('üìä TASK 11.2: Monthly Sales with 3-Month Moving Average')
print('=' * 80)

query = """
SELECT 
    t.year,
    t.month,
    t.month_name,
    COUNT(*) AS transactions,
    ROUND(SUM(f.sales)::numeric, 2) AS monthly_sales,
    ROUND(AVG(SUM(f.sales)) OVER (
        ORDER BY t.year, t.month 
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    )::numeric, 2) AS moving_avg_3_months,
    ROUND(SUM(SUM(f.sales)) OVER (
        ORDER BY t.year, t.month 
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::numeric, 2) AS cumulative_sales
FROM fact_sales f
LEFT JOIN dim_time t ON f.time_key = t.time_key
GROUP BY t.year, t.month, t.month_name
ORDER BY t.year, t.month;
"""

result = pd.read_sql(query, engine)
display(result)
print('\n‚úÖ Task 11.2 Complete')


üìä TASK 11.2: Monthly Sales with 3-Month Moving Average


Unnamed: 0,year,month,month_name,transactions,monthly_sales,moving_avg_3_months,cumulative_sales
0,2015,1,January,77,14205.71,14205.71,14205.71
1,2015,2,February,46,4519.92,9362.82,18725.63
2,2015,3,March,154,55205.83,24643.82,73931.46
3,2015,4,April,130,27906.86,29210.87,101838.32
4,2015,5,May,121,23644.3,35585.66,125482.62
5,2015,6,June,131,34322.94,28624.7,159805.56
6,2015,7,July,142,33781.52,30582.92,193587.08
7,2015,8,August,146,27117.53,31740.66,220704.61
8,2015,9,September,267,81623.52,47507.52,302328.13
9,2015,10,October,159,31453.37,46731.47,333781.5



‚úÖ Task 11.2 Complete


In [19]:
# %% Task 11.3: Category Sales with Running Total and Percentage
print('\n' + '=' * 80)
print('üìä TASK 11.3: Category Sales with Running Total and Contribution %')
print('=' * 80)

query = """
WITH category_sales AS (
    SELECT 
        p.category,
        ROUND(SUM(f.sales)::numeric, 2) AS total_sales
    FROM fact_sales f
    LEFT JOIN dim_product p ON f.product_key = p.product_key
    GROUP BY p.category
)
SELECT 
    category,
    total_sales,
    ROUND(SUM(total_sales) OVER (ORDER BY total_sales DESC)::numeric, 2) AS running_total,
    ROUND(
        100.0 * total_sales / SUM(total_sales) OVER (),
        2
    ) AS pct_of_total,
    ROUND(
        100.0 * SUM(total_sales) OVER (ORDER BY total_sales DESC) / SUM(total_sales) OVER (),
        2
    ) AS cumulative_pct
FROM category_sales
ORDER BY total_sales DESC;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° Shows which categories contribute most to total sales.')
print('‚úÖ Task 11.3 Complete')


üìä TASK 11.3: Category Sales with Running Total and Contribution %


Unnamed: 0,category,total_sales,running_total,pct_of_total,cumulative_pct
0,Technology,827455.94,827455.94,36.59,36.59
1,Furniture,728658.75,1556114.69,32.22,68.81
2,Office Supplies,705422.28,2261536.97,31.19,100.0



üí° Shows which categories contribute most to total sales.
‚úÖ Task 11.3 Complete


In [20]:
# %% Task 11.4: Customer Sales with LAG and LEAD
print('\n' + '=' * 80)
print('üìä TASK 11.4: Customer Sales with Previous/Next Month Comparison')
print('=' * 80)

query = """
WITH monthly_category_sales AS (
    SELECT 
        p.category,
        t.year,
        t.month,
        t.month_name,
        ROUND(SUM(f.sales)::numeric, 2) AS monthly_sales
    FROM fact_sales f
    LEFT JOIN dim_product p ON f.product_key = p.product_key
    LEFT JOIN dim_time t ON f.time_key = t.time_key
    GROUP BY p.category, t.year, t.month, t.month_name
)
SELECT 
    category,
    year,
    month_name,
    monthly_sales,
    LAG(monthly_sales) OVER (PARTITION BY category ORDER BY year, month) AS prev_month_sales,
    LEAD(monthly_sales) OVER (PARTITION BY category ORDER BY year, month) AS next_month_sales,
    ROUND(
        monthly_sales - LAG(monthly_sales) OVER (PARTITION BY category ORDER BY year, month),
        2
    ) AS mom_change
FROM monthly_category_sales
ORDER BY category, year, month
LIMIT 50;
"""

result = pd.read_sql(query, engine)
display(result)
print('\nüí° LAG gets previous row, LEAD gets next row.')
print('üí° mom_change = Month-over-Month change.')
print('‚úÖ Task 11.4 Complete')


üìä TASK 11.4: Customer Sales with Previous/Next Month Comparison


Unnamed: 0,category,year,month_name,monthly_sales,prev_month_sales,next_month_sales,mom_change
0,Furniture,2015,January,6217.28,,1839.66,
1,Furniture,2015,February,1839.66,6217.28,14243.39,-4377.62
2,Furniture,2015,March,14243.39,1839.66,7944.83,12403.73
3,Furniture,2015,April,7944.83,14243.39,6912.8,-6298.56
4,Furniture,2015,May,6912.8,7944.83,13144.58,-1032.03
5,Furniture,2015,June,13144.58,6912.8,10821.07,6231.78
6,Furniture,2015,July,10821.07,13144.58,7125.21,-2323.51
7,Furniture,2015,August,7125.21,10821.07,23816.48,-3695.86
8,Furniture,2015,September,23816.48,7125.21,12304.24,16691.27
9,Furniture,2015,October,12304.24,23816.48,21471.03,-11512.24



üí° LAG gets previous row, LEAD gets next row.
üí° mom_change = Month-over-Month change.
‚úÖ Task 11.4 Complete


## Summary

All OLAP queries completed successfully!

In [21]:
# %% Summary
print('=' * 80)
print('üéâ ALL TASKS COMPLETE')
print('=' * 80)

print('\n‚úÖ Task 5: Basic Aggregates (SUM, AVG, COUNT, MIN, MAX)')
print('‚úÖ Task 6: GROUP BY ROLLUP')
print('‚úÖ Task 7: GROUP BY CUBE')
print('‚úÖ Task 8: GROUPING SETS')
print('‚úÖ Task 9: RANK and DENSE_RANK')
print('‚úÖ Task 10: PIVOT Queries')
print('‚úÖ Task 11: Window Functions (Moving Averages, Cumulative Sums)')

print('\nüåü OLAP Analysis Complete!')

üéâ ALL TASKS COMPLETE

‚úÖ Task 5: Basic Aggregates (SUM, AVG, COUNT, MIN, MAX)
‚úÖ Task 6: GROUP BY ROLLUP
‚úÖ Task 7: GROUP BY CUBE
‚úÖ Task 8: GROUPING SETS
‚úÖ Task 9: RANK and DENSE_RANK
‚úÖ Task 10: PIVOT Queries
‚úÖ Task 11: Window Functions (Moving Averages, Cumulative Sums)

üåü OLAP Analysis Complete!
