In [2]:
%load_ext pydough.jupyter_extensions

In [3]:
import pydough
import datetime

import pandas as pd
from pandas.testing import assert_frame_equal, assert_series_equal
import re
import dfcompare

import collections
import numpy as np
import sqlite3 as sql
import os

# Setup demo metadata
pydough.active_session.load_metadata_graph("../metadata/tpch_demo_graph.json", "TPCH");
pydough.active_session.connect_database("sqlite", database="../../tpch.db");
pydough.active_session.metadata

db_path = "../../tpch.db" # variable to pass the path in python function

connection = sql.connect("../../tpch.db")

# Avoid scientific notation
pd.options.display.float_format = '{:.6f}'.format

# TPCH Testing New Queries

The idea of this notebook is to create new ideas in the TPCH business context. Then, we proceed to create their respective SQL query, to try to perform it in PyDough. 

## Bussines Context 1

This consultation aims to identify which three regions generate the highest total sales and who are the main suppliers contributing to those sales. The objective is to assess sales performance by geographic region and to understand which suppliers dominate each region.

Total sales is defined as the extended price times one less discount.

SQL:

In [11]:
query = '''
SELECT
    R_NAME AS region_name,
    S_NAME AS supplier_name,
    SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS total_sales
FROM
    REGION
JOIN
    NATION ON R_REGIONKEY = N_REGIONKEY
JOIN
    SUPPLIER ON N_NATIONKEY = S_NATIONKEY
JOIN
    PARTSUPP ON S_SUPPKEY = PS_SUPPKEY
JOIN
    LINEITEM ON PS_PARTKEY = L_PARTKEY AND PS_SUPPKEY = L_SUPPKEY
GROUP BY
    R_NAME, S_NAME
ORDER BY
    total_sales DESC
LIMIT 3;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,region_name,supplier_name,total_sales
0,AFRICA,Supplier#000005994,28698575.6313
1,EUROPE,Supplier#000006490,28579287.0004
2,MIDDLE EAST,Supplier#000005991,28542342.9984


PyDough solution:

In [3]:
%%pydough

supplier = suppliers(region_name=nation.region.name, supplier_name=name)

output = PARTITION(supplier, name="part", by=(region_name, supplier_name))(
    region_name,
    supplier_name,
    total_sales=SUM(part.lines.extended_price * (1 - part.lines.discount))
).TOP_K(3, by=total_sales.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,region_name,supplier_name,total_sales
0,AFRICA,Supplier#000005994,28698575.6313
1,EUROPE,Supplier#000006490,28579287.0004
2,MIDDLE EAST,Supplier#000005991,28542342.9984


Compare results in SQL and PyDough:

In [18]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [19]:
%%pydough

pydough.to_sql(output)

'SELECT region_name, supplier_name, total_sales FROM (SELECT supplier_name, region_name, total_sales, ordering_1 FROM (SELECT supplier_name, region_name, COALESCE(agg_0, 0) AS total_sales, COALESCE(agg_0, 0) AS ordering_1 FROM (SELECT _table_alias_6.supplier_name AS supplier_name, _table_alias_6.region_name AS region_name, agg_0 FROM (SELECT supplier_name, region_name FROM (SELECT name_3 AS region_name, name AS supplier_name FROM (SELECT s_name AS name, s_nationkey AS nation_key FROM main.SUPPLIER) LEFT JOIN (SELECT _table_alias_0.key AS key, name AS name_3 FROM (SELECT n_nationkey AS key, n_regionkey AS region_key FROM main.NATION) AS _table_alias_0 INNER JOIN (SELECT r_name AS name, r_regionkey AS key FROM main.REGION) AS _table_alias_1 ON region_key = _table_alias_1.key) ON nation_key = key) GROUP BY supplier_name, region_name) AS _table_alias_6 LEFT JOIN (SELECT region_name, supplier_name, SUM(extended_price * (1 - discount)) AS agg_0 FROM (SELECT region_name, supplier_name, discou

## Bussines Context 2

This query seeks to analyze which are the five customers that return the most orders. The idea is to find the name of the customer, the segment to which it belongs and the total of returned orders.

SQL:

In [None]:
query = '''
SELECT
    C_NAME AS customer_name,
    C_MKTSEGMENT AS market_segment,
    COUNT(*) AS return_count
FROM
    CUSTOMER
JOIN
    ORDERS ON C_CUSTKEY = O_CUSTKEY
JOIN
    LINEITEM ON O_ORDERKEY = L_ORDERKEY
WHERE
    L_RETURNFLAG = 'R'  -- 'R' indicates a returned/cancelled item
GROUP BY
    C_NAME, C_MKTSEGMENT
ORDER BY
    return_count DESC
LIMIT 5;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,customer_name,market_segment,return_count
0,Customer#000050605,HOUSEHOLD,60
1,Customer#000121909,AUTOMOBILE,60
2,Customer#000021016,MACHINERY,57
3,Customer#000042619,MACHINERY,57
4,Customer#000075160,HOUSEHOLD,57


PyDough solution:

In [28]:
%%pydough

output = PARTITION(customers, name="cust", by=(name, mktsegment))(
    customer_name=name,
    market_segment=mktsegment,
    return_count=COUNT(cust.orders.lines.WHERE(return_flag == "R"))
).TOP_K(5, by=return_count.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,customer_name,market_segment,return_count
0,Customer#000121909,AUTOMOBILE,60
1,Customer#000050605,HOUSEHOLD,60
2,Customer#000075160,HOUSEHOLD,57
3,Customer#000021016,MACHINERY,57
4,Customer#000042619,MACHINERY,57


Compare results in SQL and PyDough:

In [29]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

np.True_

PyDough to SQL:

In [30]:
%%pydough

pydough.to_sql(output)

"SELECT customer_name, market_segment, return_count FROM (SELECT customer_name, market_segment, return_count, ordering_1 FROM (SELECT name AS customer_name, mktsegment AS market_segment, COALESCE(agg_0, 0) AS return_count, COALESCE(agg_0, 0) AS ordering_1 FROM (SELECT _table_alias_2.mktsegment AS mktsegment, _table_alias_2.name AS name, agg_0 FROM (SELECT mktsegment, name FROM (SELECT c_name AS name, c_mktsegment AS mktsegment FROM main.CUSTOMER) GROUP BY mktsegment, name) AS _table_alias_2 LEFT JOIN (SELECT name, mktsegment, COUNT() AS agg_0 FROM (SELECT name, mktsegment FROM (SELECT name, mktsegment, return_flag FROM (SELECT name, mktsegment, _table_alias_1.key AS key_2 FROM (SELECT c_name AS name, c_custkey AS key, c_mktsegment AS mktsegment FROM main.CUSTOMER) AS _table_alias_0 INNER JOIN (SELECT o_orderkey AS key, o_custkey AS customer_key FROM main.ORDERS) AS _table_alias_1 ON _table_alias_0.key = customer_key) INNER JOIN (SELECT l_returnflag AS return_flag, l_orderkey AS order_k

## Bussines Context 3

The idea is to select the 20 customers who have bought the most according to the money invested. The customer is identified by his id and name, and we also want to know the nation and region to which he belongs, as well as the total number of orders and how much he has spent on them. 

SQL:

In [50]:
query = '''
WITH CustomerPurchases AS (
    SELECT 
        O_CUSTKEY, 
        COUNT(DISTINCT O_ORDERKEY) AS TotalOrders, 
        SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS TotalSpent
    FROM ORDERS
    JOIN LINEITEM ON O_ORDERKEY = L_ORDERKEY
    GROUP BY O_CUSTKEY
), CustomerInfo AS (
    SELECT 
        C_CUSTKEY, 
        C_NAME,
        N_NAME AS Nation,
        R_NAME AS Region
    FROM CUSTOMER
    JOIN NATION ON C_NATIONKEY = N_NATIONKEY
    JOIN REGION ON N_REGIONKEY = R_REGIONKEY
)
SELECT 
    C.C_CUSTKEY AS customer_id, 
    C.C_NAME AS customer_name, 
    C.Nation, 
    C.Region, 
    P.TotalOrders,
    P.TotalSpent
FROM CustomerPurchases P
JOIN CustomerInfo C ON P.O_CUSTKEY = C.C_CUSTKEY
ORDER BY P.TotalSpent DESC
LIMIT 20;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,customer_id,customer_name,Nation,Region,TotalOrders,TotalSpent
0,143500,Customer#000143500,IRAN,MIDDLE EAST,39,6757566.0218
1,95257,Customer#000095257,BRAZIL,AMERICA,36,6294115.334
2,87115,Customer#000087115,KENYA,AFRICA,34,6184649.5176
3,131113,Customer#000131113,ETHIOPIA,AFRICA,37,6080943.8305
4,134380,Customer#000134380,ALGERIA,AFRICA,37,6075141.9635
5,103834,Customer#000103834,IRAQ,MIDDLE EAST,31,6059770.3232
6,69682,Customer#000069682,MOZAMBIQUE,AFRICA,39,6057779.0348
7,102022,Customer#000102022,INDONESIA,ASIA,41,6039653.6335
8,98587,Customer#000098587,CHINA,ASIA,37,6027021.5855
9,64660,Customer#000064660,MOZAMBIQUE,AFRICA,31,5905659.6159


PyDough solution:

In [51]:
%%pydough
output = customers(
    customer_id=key,
    customer_name=name,
    Nation=nation.name,
    Region=nation.region.name,
    TotalOrders=COUNT(orders),
    TotalSpent=SUM(orders.lines.extended_price * (1 - orders.lines.discount))
).TOP_K(20, TotalSpent.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,customer_id,customer_name,Nation,Region,TotalOrders,TotalSpent
0,143500,Customer#000143500,IRAN,MIDDLE EAST,39,6757566.0218
1,95257,Customer#000095257,BRAZIL,AMERICA,36,6294115.334
2,87115,Customer#000087115,KENYA,AFRICA,34,6184649.5176
3,131113,Customer#000131113,ETHIOPIA,AFRICA,37,6080943.8305
4,134380,Customer#000134380,ALGERIA,AFRICA,37,6075141.9635
5,103834,Customer#000103834,IRAQ,MIDDLE EAST,31,6059770.3232
6,69682,Customer#000069682,MOZAMBIQUE,AFRICA,39,6057779.0348
7,102022,Customer#000102022,INDONESIA,ASIA,41,6039653.6335
8,98587,Customer#000098587,CHINA,ASIA,37,6027021.5855
9,64660,Customer#000064660,MOZAMBIQUE,AFRICA,31,5905659.6159


Compare results in SQL and PyDough:

In [52]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [53]:
%%pydough

pydough.to_sql(output)

'SELECT customer_id, customer_name, Nation, Region, TotalOrders, TotalSpent FROM (SELECT Nation, Region, TotalOrders, TotalSpent, customer_id, customer_name, ordering_2 FROM (SELECT name_3 AS Nation, name_6 AS Region, COALESCE(agg_0, 0) AS TotalOrders, COALESCE(agg_1, 0) AS TotalSpent, key AS customer_id, name AS customer_name, COALESCE(agg_1, 0) AS ordering_2 FROM (SELECT name, key, name_3, name_6, agg_0, agg_1 FROM (SELECT name, key, name_3, name_6, agg_0 FROM (SELECT name, _table_alias_4.key AS key, name_3, name_6 FROM (SELECT _table_alias_0.name AS name, _table_alias_0.key AS key, nation_key, _table_alias_1.name AS name_3 FROM (SELECT c_name AS name, c_custkey AS key, c_nationkey AS nation_key FROM main.CUSTOMER) AS _table_alias_0 LEFT JOIN (SELECT n_name AS name, n_nationkey AS key FROM main.NATION) AS _table_alias_1 ON nation_key = _table_alias_1.key) AS _table_alias_4 LEFT JOIN (SELECT _table_alias_2.key AS key, name AS name_6 FROM (SELECT n_nationkey AS key, n_regionkey AS regi

## Bussines Context 4

Average discount applied on orders for each market segment. 
This query allows you to analyze which market segments receive higher discounts on their orders. This can help assess whether discount strategies are aligned with business objectives and whether certain segments are getting too high or too low discounts compared to others.

SQL:

In [6]:
query = '''
SELECT 
    C.C_MKTSEGMENT AS MarketSegment,
    AVG(L.L_DISCOUNT) * 100 AS AvgDiscountPercentage
FROM CUSTOMER C
JOIN ORDERS O ON C.C_CUSTKEY = O.O_CUSTKEY
JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY
GROUP BY C.C_MKTSEGMENT
ORDER BY AvgDiscountPercentage DESC;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,MarketSegment,AvgDiscountPercentage
0,MACHINERY,5.006444
1,HOUSEHOLD,5.000242
2,FURNITURE,4.999733
3,AUTOMOBILE,4.998568
4,BUILDING,4.994801


PyDough solution:

In [12]:
%%pydough

output = PARTITION(customers, name="part", by=mktsegment)(
    MarketSegment=mktsegment,
    AvgDiscountPercentage=AVG(part.orders.lines.discount)*100
).ORDER_BY(AvgDiscountPercentage.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,MarketSegment,AvgDiscountPercentage
0,MACHINERY,5.006444
1,HOUSEHOLD,5.000242
2,FURNITURE,4.999733
3,AUTOMOBILE,4.998568
4,BUILDING,4.994801


Compare results in SQL and PyDough:

In [13]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [14]:
%%pydough

pydough.to_sql(output)

'SELECT MarketSegment, AvgDiscountPercentage FROM (SELECT agg_0 * 100 AS AvgDiscountPercentage, mktsegment AS MarketSegment, agg_0 * 100 AS ordering_1 FROM (SELECT _table_alias_2.mktsegment AS mktsegment, agg_0 FROM (SELECT mktsegment FROM (SELECT c_mktsegment AS mktsegment FROM main.CUSTOMER) GROUP BY mktsegment) AS _table_alias_2 LEFT JOIN (SELECT mktsegment, AVG(discount) AS agg_0 FROM (SELECT mktsegment, discount FROM (SELECT mktsegment, _table_alias_1.key AS key_2 FROM (SELECT c_custkey AS key, c_mktsegment AS mktsegment FROM main.CUSTOMER) AS _table_alias_0 INNER JOIN (SELECT o_orderkey AS key, o_custkey AS customer_key FROM main.ORDERS) AS _table_alias_1 ON _table_alias_0.key = customer_key) INNER JOIN (SELECT l_discount AS discount, l_orderkey AS order_key FROM main.LINEITEM) ON key_2 = order_key) GROUP BY mktsegment) AS _table_alias_3 ON _table_alias_2.mktsegment = _table_alias_3.mktsegment)) ORDER BY ordering_1 DESC'

## Bussines Context 5

The query wants to find the cheapest suppliers for the 10 best selling products. It is important to know the name of the product and the total solds, the supplier name and nation, and the price at which it is supplied. 

SQL:

In [None]:
query = '''
WITH TopProducts AS (
    -- Obtener los 10 productos más vendidos en cantidad total
    SELECT 
        L_PARTKEY AS PartKey,
        P_PARTKEY AS ProductId,
        P_NAME AS ProductName,
        SUM(L_QUANTITY) AS TotalSold
    FROM LINEITEM
    JOIN PART ON L_PARTKEY = P_PARTKEY
    GROUP BY L_PARTKEY, P_NAME, P_PARTKEY
    ORDER BY TotalSold DESC
    LIMIT 10
), RankedSuppliers AS (
    -- Encontrar el proveedor más económico para cada uno de los 10 productos más vendidos
    SELECT 
        TP.PartKey,
        TP.ProductName,
        TP.ProductId,
        TP.TotalSold,  -- Se mantiene la cantidad total vendida
        S.S_NAME AS SupplierName,
        N.N_NAME AS SupplierNation,
        PS.PS_SUPPLYCOST AS SupplyCost,
        RANK() OVER (PARTITION BY PS.PS_PARTKEY ORDER BY PS.PS_SUPPLYCOST ASC) AS CostRank
    FROM PARTSUPP PS
    JOIN SUPPLIER S ON PS.PS_SUPPKEY = S.S_SUPPKEY
    JOIN NATION N ON S.S_NATIONKEY = N.N_NATIONKEY
    JOIN TopProducts TP ON PS.PS_PARTKEY = TP.PartKey  -- Unimos con los productos más vendidos
)
SELECT 
    ProductId,
    ProductName,
    TotalSold,  -- Agregamos la cantidad total vendida en el resultado final
    SupplierName,
    SupplierNation,
    SupplyCost
FROM RankedSuppliers
WHERE CostRank = 1
ORDER BY TotalSold DESC;  -- Ordenamos por cantidad vendida para mayor claridad
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

## Bussines Context 6

Evaluate the performance of suppliers in terms of quantity of products supplied in a specific period. This allows to identify which suppliers are more consistent in supplying the company, helping in decision making for strategic commercial relationships. The year selected is 1998. Return the top 10 suppliers. 

SQL:

In [None]:
query = '''
WITH SupplierPerformance AS (
    -- Calculamos la cantidad total de productos suministrados por cada proveedor en un período específico
    SELECT 
        S.S_SUPPKEY AS SupplierKey,
        S.S_NAME AS SupplierName,
        N.N_NAME AS SupplierNation,
        SUM(PS.PS_AVAILQTY) AS TotalSupplied
    FROM PARTSUPP PS
    JOIN SUPPLIER S ON PS.PS_SUPPKEY = S.S_SUPPKEY
    JOIN NATION N ON S.S_NATIONKEY = N.N_NATIONKEY
    WHERE PS.PS_PARTKEY IN (
        -- Filtramos los productos que han sido ordenados en 1998
        SELECT DISTINCT L.L_PARTKEY
        FROM LINEITEM L
        JOIN ORDERS O ON L.L_ORDERKEY = O.O_ORDERKEY
        WHERE O.O_ORDERDATE BETWEEN '1998-01-01' AND '1998-12-31'
    )
    GROUP BY S.S_SUPPKEY, S.S_NAME, N.N_NAME
)
SELECT 
    SupplierName,
    SupplierNation,
    TotalSupplied
FROM SupplierPerformance
ORDER BY TotalSupplied DESC
LIMIT 10;  -- Muestra los 10 proveedores más confiables en términos de volumen
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,SupplierName,SupplierNation,TotalSupplied
0,Supplier#000008099,INDIA,491764
1,Supplier#000004527,CANADA,463634
2,Supplier#000007747,KENYA,463045
3,Supplier#000002621,GERMANY,459166
4,Supplier#000005322,FRANCE,458062
5,Supplier#000003467,GERMANY,457830
6,Supplier#000007181,EGYPT,453131
7,Supplier#000006638,ALGERIA,452991
8,Supplier#000004592,FRANCE,452843
9,Supplier#000006048,IRAQ,452323


PyDough solution:

In [37]:
%%pydough

selected_lines = lines(
    SupplierName=supplier.name,
    SupplierNation=supplier.nation.name
)

output = PARTITION(selected_lines, name="p", by=(SupplierName, SupplierNation))(
    SupplierName,
    SupplierNation,
    TotalSupplied=COUNT(p.part_and_supplier.availqty)
).TOP_K(10, TotalSupplied.ASC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,SupplierName,SupplierNation,TotalSupplied
0,Supplier#000007905,ETHIOPIA,517
1,Supplier#000001364,KENYA,523
2,Supplier#000001128,IRAN,524
3,Supplier#000004435,PERU,524
4,Supplier#000008137,KENYA,524
5,Supplier#000001084,ARGENTINA,526
6,Supplier#000007038,FRANCE,526
7,Supplier#000006700,VIETNAM,528
8,Supplier#000003516,PERU,529
9,Supplier#000003730,INDONESIA,529


## Bussines Context 7

This query allows you to identify the countries that generate the most sales and the total revenue generated in each. It is useful for assessing business performance in different markets and making strategic decisions on expansion, resource allocation and optimization of logistics and distribution.

Total revenues is defined as the sum of (extended price times one less discount).

SQL:

In [4]:
query = '''
SELECT 
    N.N_NAME AS Nation,
    COUNT(DISTINCT O.O_ORDERKEY) AS TotalOrders,
    SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) AS TotalSales
FROM NATION N
JOIN CUSTOMER C ON N.N_NATIONKEY = C.C_NATIONKEY
JOIN ORDERS O ON C.C_CUSTKEY = O.O_CUSTKEY
JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY
WHERE L.L_LINESTATUS = 'O'
GROUP BY N.N_NAME
ORDER BY TotalSales DESC
LIMIT 10;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,Nation,TotalOrders,TotalSales
0,FRANCE,31673,4508949477.7389
1,INDONESIA,31538,4480669042.4026
2,MOZAMBIQUE,31616,4465579958.6469
3,ROMANIA,31461,4451619022.2513
4,JORDAN,31244,4451490317.308
5,RUSSIA,31566,4440710591.4438
6,CHINA,31052,4398731625.1703
7,BRAZIL,31002,4393261825.3628
8,UNITED STATES,30900,4392317679.2143
9,VIETNAM,30955,4392013574.4788


PyDough solution:

In [5]:
%%pydough
selected_lines = lines(Nation=order.customer.nation.name).WHERE(LIKE(status,"O"))

output = PARTITION(selected_lines, name="part", by=Nation)(
    Nation=Nation,
    TotalOrders=NDISTINCT(part.order.key),
    TotalSales=SUM(part.extended_price * (1 - part.discount))
).TOP_K(10, by=TotalSales.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,Nation,TotalOrders,TotalSales
0,FRANCE,31673,4508949477.7389
1,INDONESIA,31538,4480669042.4026
2,MOZAMBIQUE,31616,4465579958.6469
3,ROMANIA,31461,4451619022.2513
4,JORDAN,31244,4451490317.308
5,RUSSIA,31566,4440710591.4438
6,CHINA,31052,4398731625.1703
7,BRAZIL,31002,4393261825.3628
8,UNITED STATES,30900,4392317679.2143
9,VIETNAM,30955,4392013574.4788


Compare results in SQL and PyDough:

In [6]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [7]:
%%pydough

pydough.to_sql(output)

"SELECT Nation, TotalOrders, TotalSales FROM (SELECT Nation, TotalOrders, TotalSales, ordering_2 FROM (SELECT Nation, agg_0 AS TotalOrders, COALESCE(agg_1, 0) AS TotalSales, COALESCE(agg_1, 0) AS ordering_2 FROM (SELECT _table_alias_8.Nation AS Nation, agg_1, agg_0 FROM (SELECT Nation, SUM(extended_price * (1 - discount)) AS agg_1 FROM (SELECT extended_price, discount, Nation FROM (SELECT extended_price, status, discount, name_5 AS Nation FROM (SELECT l_extendedprice AS extended_price, l_linestatus AS status, l_discount AS discount, l_orderkey AS order_key FROM main.LINEITEM) LEFT JOIN (SELECT _table_alias_2.key AS key, name AS name_5 FROM (SELECT _table_alias_0.key AS key, nation_key FROM (SELECT o_orderkey AS key, o_custkey AS customer_key FROM main.ORDERS) AS _table_alias_0 INNER JOIN (SELECT c_custkey AS key, c_nationkey AS nation_key FROM main.CUSTOMER) AS _table_alias_1 ON customer_key = _table_alias_1.key) AS _table_alias_2 INNER JOIN (SELECT n_name AS name, n_nationkey AS key F

## Bussines Context 8

This query identifies customers with the highest average spend per order. It is useful for segmenting high-value customers, customizing loyalty strategies and optimizing sales campaigns focused on customers with higher purchasing power.

Get the top 10 customers with the most orders.

SQL:

In [37]:
query = '''
SELECT 
    C.C_NAME AS CustomerName,
    C.C_MKTSEGMENT AS MarketSegment,
    COUNT(DISTINCT O.O_ORDERKEY) AS TotalOrders,
    SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) AS TotalSpent,
    (SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) / COUNT(DISTINCT O.O_ORDERKEY)) AS AvgOrderValue
FROM CUSTOMER C
JOIN ORDERS O ON C.C_CUSTKEY = O.O_CUSTKEY
JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY
WHERE L.L_LINESTATUS = 'O'
GROUP BY C.C_NAME, C.C_MKTSEGMENT
ORDER BY TotalOrders DESC
LIMIT 10;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,CustomerName,MarketSegment,TotalOrders,TotalSpent,AvgOrderValue
0,Customer#000102004,MACHINERY,28,3377976.5139,120642.018354
1,Customer#000056278,HOUSEHOLD,26,3521077.7275,135426.066442
2,Customer#000078634,FURNITURE,26,3572392.6684,137399.718015
3,Customer#000014920,AUTOMOBILE,25,3263225.4956,130529.019824
4,Customer#000019516,HOUSEHOLD,25,2658246.6352,106329.865408
5,Customer#000019486,AUTOMOBILE,24,3414344.253,142264.343875
6,Customer#000089959,HOUSEHOLD,24,3435186.5812,143132.774217
7,Customer#000095257,HOUSEHOLD,24,4096702.0028,170695.916783
8,Customer#000101320,BUILDING,24,2884777.2898,120199.053742
9,Customer#000102022,AUTOMOBILE,24,3293046.9552,137210.2898


PyDough Solution:

In [36]:
%%pydough

selected_lines = lines(
    CustomerName=order.customer.name,
    MarketSegment=order.customer.mktsegment,
).WHERE(LIKE(status,"O"))

partition = PARTITION(selected_lines, name="part", by=(CustomerName, MarketSegment))(
    CustomerName,
    MarketSegment,
    TotalOrders=NDISTINCT(part.order.key),
    TotalSpent=SUM(part.extended_price * (1 - part.discount))
).TOP_K(10, by=TotalOrders.DESC())

output = partition(
    CustomerName,
    MarketSegment,
    TotalOrders,
    TotalSpent,
    AvgOrderValue=(TotalSpent / TotalOrders)
)

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,CustomerName,MarketSegment,TotalOrders,TotalSpent,AvgOrderValue
0,Customer#000102004,MACHINERY,28,3377976.5139,120642.018354
1,Customer#000078634,FURNITURE,26,3572392.6684,137399.718015
2,Customer#000056278,HOUSEHOLD,26,3521077.7275,135426.066442
3,Customer#000014920,AUTOMOBILE,25,3263225.4956,130529.019824
4,Customer#000019516,HOUSEHOLD,25,2658246.6352,106329.865408
5,Customer#000019486,AUTOMOBILE,24,3414344.253,142264.343875
6,Customer#000102022,AUTOMOBILE,24,3293046.9552,137210.2898
7,Customer#000101320,BUILDING,24,2884777.2898,120199.053742
8,Customer#000149035,BUILDING,24,2900707.5002,120862.812508
9,Customer#000089959,HOUSEHOLD,24,3435186.5812,143132.774217


**#FEATURE: We could have done the calculation in the Partition itself, but in terms of performance, it is better to take advantage of the calculations that have already been done.**

Compare results in SQL and PyDough:

In [38]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

np.False_

The result is “False” because the first 10 selected are not the same, since the ties give different results. 

PyDough to SQL:

In [39]:
%%pydough

pydough.to_sql(output)

"SELECT CustomerName, MarketSegment, TotalOrders, TotalSpent, AvgOrderValue FROM (SELECT MarketSegment, CustomerName, TotalOrders, TotalSpent, ordering_2, CAST(TotalSpent AS REAL) / TotalOrders AS AvgOrderValue FROM (SELECT MarketSegment, CustomerName, TotalOrders, TotalSpent, ordering_2 FROM (SELECT MarketSegment, CustomerName, agg_0 AS TotalOrders, COALESCE(agg_1, 0) AS TotalSpent, agg_0 AS ordering_2 FROM (SELECT _table_alias_4.MarketSegment AS MarketSegment, _table_alias_4.CustomerName AS CustomerName, agg_1, agg_0 FROM (SELECT MarketSegment, CustomerName, SUM(extended_price * (1 - discount)) AS agg_1 FROM (SELECT extended_price, discount, CustomerName, MarketSegment FROM (SELECT extended_price, status, discount, name AS CustomerName, mktsegment AS MarketSegment FROM (SELECT l_extendedprice AS extended_price, l_linestatus AS status, l_discount AS discount, l_orderkey AS order_key FROM main.LINEITEM) LEFT JOIN (SELECT _table_alias_0.key AS key, mktsegment, name FROM (SELECT o_orderk

## Bussines Context 9

This query identifies the product categories that have generated the highest sales volume, allowing you to optimize inventories, adjust pricing strategies and focus marketing efforts on the most profitable products.

SQL:

In [48]:
query = '''
SELECT 
    P.P_TYPE AS ProductCategory,
    SUM(L.L_QUANTITY) AS TotalUnitsSold,
    SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) AS TotalRevenue
FROM PART P
JOIN LINEITEM L ON P.P_PARTKEY = L.L_PARTKEY
WHERE L.L_LINESTATUS = 'O'
GROUP BY P.P_TYPE
ORDER BY TotalUnitsSold DESC
LIMIT 10;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,ProductCategory,TotalUnitsSold,TotalRevenue
0,ECONOMY ANODIZED STEEL,560283,799177159.6352
1,LARGE PLATED STEEL,552677,790956730.0435
2,MEDIUM BURNISHED COPPER,545438,776692549.1232
3,LARGE POLISHED COPPER,545254,778502383.3763
4,LARGE PLATED BRASS,544191,777252791.8398
5,LARGE ANODIZED TIN,543764,769788435.7892
6,PROMO BRUSHED BRASS,542206,778048790.708
7,SMALL POLISHED NICKEL,540346,771060965.4619
8,SMALL BRUSHED TIN,540277,769926566.0819
9,MEDIUM ANODIZED TIN,537091,764450514.4678


PyDough solution:

In [53]:
%%pydough

selected_lines = lines(
    ProductCategory=part.part_type
).WHERE(LIKE(status,"O"))

output = PARTITION(selected_lines, name="part", by=ProductCategory)(
    ProductCategory,
    TotalUnitsSold=SUM(part.quantity),
    TotalRevenue=SUM(part.extended_price * (1 - part.discount))
).TOP_K(10, TotalUnitsSold.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,ProductCategory,TotalUnitsSold,TotalRevenue
0,ECONOMY ANODIZED STEEL,560283,799177159.6352
1,LARGE PLATED STEEL,552677,790956730.0435
2,MEDIUM BURNISHED COPPER,545438,776692549.1232
3,LARGE POLISHED COPPER,545254,778502383.3763
4,LARGE PLATED BRASS,544191,777252791.8398
5,LARGE ANODIZED TIN,543764,769788435.7892
6,PROMO BRUSHED BRASS,542206,778048790.708
7,SMALL POLISHED NICKEL,540346,771060965.4619
8,SMALL BRUSHED TIN,540277,769926566.0819
9,MEDIUM ANODIZED TIN,537091,764450514.4678


Compare results in SQL and PyDough:

In [54]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [56]:
%%pydough

pydough.to_sql(output)

"SELECT ProductCategory, TotalUnitsSold, TotalRevenue FROM (SELECT ProductCategory, TotalRevenue, TotalUnitsSold, ordering_2 FROM (SELECT ProductCategory, COALESCE(agg_0, 0) AS TotalRevenue, COALESCE(agg_1, 0) AS TotalUnitsSold, COALESCE(agg_1, 0) AS ordering_2 FROM (SELECT ProductCategory, SUM(extended_price * (1 - discount)) AS agg_0, SUM(quantity) AS agg_1 FROM (SELECT quantity, extended_price, discount, ProductCategory FROM (SELECT quantity, extended_price, status, discount, part_type AS ProductCategory FROM (SELECT l_partkey AS part_key, l_quantity AS quantity, l_extendedprice AS extended_price, l_linestatus AS status, l_discount AS discount FROM main.LINEITEM) LEFT JOIN (SELECT p_type AS part_type, p_partkey AS key FROM main.PART) ON part_key = key) WHERE status LIKE 'O') GROUP BY ProductCategory)) ORDER BY ordering_2 DESC LIMIT 10) ORDER BY ordering_2 DESC"

## Bussines Context 10

This query identifies the 20 most popular products within each market segment, which helps customize sales and marketing strategies for each customer group.

SQL:

In [60]:
query = '''
SELECT 
    C.C_MKTSEGMENT AS MarketSegment,
    P.P_NAME AS ProductName,
    SUM(L.L_QUANTITY) AS TotalUnitsSold
FROM CUSTOMER C
JOIN ORDERS O ON C.C_CUSTKEY = O.O_CUSTKEY
JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY
JOIN PART P ON L.L_PARTKEY = P.P_PARTKEY
WHERE L.L_LINESTATUS = 'O'
GROUP BY C.C_MKTSEGMENT, P.P_NAME
ORDER BY TotalUnitsSold DESC
LIMIT 20;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,MarketSegment,ProductName,TotalUnitsSold
0,MACHINERY,peru slate tomato saddle honeydew,466
1,HOUSEHOLD,burlywood blanched beige dim grey,447
2,MACHINERY,slate seashell lawn royal plum,409
3,AUTOMOBILE,orange puff light turquoise misty,403
4,BUILDING,pink chartreuse lace midnight smoke,403
5,FURNITURE,lemon azure linen smoke aquamarine,403
6,BUILDING,lace antique medium orchid grey,392
7,BUILDING,blush antique puff cornflower lavender,391
8,BUILDING,burnished lime rosy burlywood olive,389
9,BUILDING,pink royal ivory lavender dim,389


PyDough solution:

In [63]:
%%pydough

selected_lines = lines(
    MarketSegment=order.customer.mktsegment,
    ProductName=part.name
).WHERE(LIKE(status,"O"))

output = PARTITION(selected_lines, name="part", by=(MarketSegment, ProductName))(
    MarketSegment,
    ProductName,
    TotalUnitsSold=SUM(part.quantity)
).TOP_K(20, TotalUnitsSold.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,MarketSegment,ProductName,TotalUnitsSold
0,MACHINERY,peru slate tomato saddle honeydew,466
1,HOUSEHOLD,burlywood blanched beige dim grey,447
2,MACHINERY,slate seashell lawn royal plum,409
3,AUTOMOBILE,orange puff light turquoise misty,403
4,BUILDING,pink chartreuse lace midnight smoke,403
5,FURNITURE,lemon azure linen smoke aquamarine,403
6,BUILDING,lace antique medium orchid grey,392
7,BUILDING,blush antique puff cornflower lavender,391
8,BUILDING,burnished lime rosy burlywood olive,389
9,BUILDING,pink royal ivory lavender dim,389


Compare results in SQL and PyDough:

In [64]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [65]:
%%pydough

pydough.to_sql(output)

"SELECT MarketSegment, ProductName, TotalUnitsSold FROM (SELECT MarketSegment, ProductName, TotalUnitsSold, ordering_1 FROM (SELECT MarketSegment, ProductName, COALESCE(agg_0, 0) AS TotalUnitsSold, COALESCE(agg_0, 0) AS ordering_1 FROM (SELECT MarketSegment, ProductName, SUM(quantity) AS agg_0 FROM (SELECT quantity, MarketSegment, ProductName FROM (SELECT quantity, status, mktsegment AS MarketSegment, name AS ProductName FROM (SELECT part_key, quantity, status, mktsegment FROM (SELECT l_partkey AS part_key, l_quantity AS quantity, l_linestatus AS status, l_orderkey AS order_key FROM main.LINEITEM) LEFT JOIN (SELECT _table_alias_0.key AS key, mktsegment FROM (SELECT o_orderkey AS key, o_custkey AS customer_key FROM main.ORDERS) AS _table_alias_0 INNER JOIN (SELECT c_custkey AS key, c_mktsegment AS mktsegment FROM main.CUSTOMER) AS _table_alias_1 ON customer_key = _table_alias_1.key) ON order_key = key) LEFT JOIN (SELECT p_name AS name, p_partkey AS key FROM main.PART) ON part_key = key)

## Bussines Context 11

This query provides a detailed view of each sale, including which customer purchased, which product was purchased, who supplied it and from which country both the customer and the supplier originate. This is useful for analyzing the relationship between suppliers and customers on an international level. Get the firs 10 results ordered by quantity sold.

SQL:

In [5]:
query = '''
SELECT 
    O.O_ORDERKEY AS OrderID,
    C.C_NAME AS CustomerName,
    C.C_MKTSEGMENT AS MarketSegment,
    N_C.N_NAME AS CustomerNation,
    P.P_NAME AS ProductName,
    P.P_TYPE AS ProductCategory,
    S.S_NAME AS SupplierName,
    N_S.N_NAME AS SupplierNation,
    L.L_QUANTITY AS QuantitySold,
    L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT) AS TotalSaleValue,
    O.O_ORDERDATE AS OrderDate,
    L.L_SHIPDATE AS ShipDate
FROM ORDERS O
JOIN CUSTOMER C ON O.O_CUSTKEY = C.C_CUSTKEY
JOIN NATION N_C ON C.C_NATIONKEY = N_C.N_NATIONKEY
JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY
JOIN PART P ON L.L_PARTKEY = P.P_PARTKEY
JOIN SUPPLIER S ON L.L_SUPPKEY = S.S_SUPPKEY
JOIN NATION N_S ON S.S_NATIONKEY = N_S.N_NATIONKEY
WHERE L.L_LINESTATUS = 'O'
ORDER BY L.L_QUANTITY DESC
LIMIT 10;
'''

sql_output = pd.read_sql_query(query, connection)
sql_output

Unnamed: 0,OrderID,CustomerName,MarketSegment,CustomerNation,ProductName,ProductCategory,SupplierName,SupplierNation,QuantitySold,TotalSaleValue,OrderDate,ShipDate
0,199,Customer#000052970,MACHINERY,UNITED KINGDOM,ivory plum lavender saddle smoke,LARGE POLISHED STEEL,Supplier#000009612,BRAZIL,50,54099.43,1996-03-07,1996-06-12
1,260,Customer#000104728,FURNITURE,CHINA,misty steel navajo floral peach,STANDARD BURNISHED NICKEL,Supplier#000005888,EGYPT,50,90343.92,1996-12-10,1997-03-24
2,354,Customer#000138268,MACHINERY,INDONESIA,peru forest tan sienna turquoise,ECONOMY BRUSHED BRASS,Supplier#000008126,FRANCE,50,49823.52,1996-03-14,1996-03-21
3,484,Customer#000054244,HOUSEHOLD,IRAQ,dim blush chartreuse burnished seashell,STANDARD BURNISHED TIN,Supplier#000005870,ETHIOPIA,50,67414.45,1997-01-03,1997-01-24
4,485,Customer#000100561,FURNITURE,MOROCCO,beige green forest peru chocolate,ECONOMY BRUSHED TIN,Supplier#000009524,IRAQ,50,77839.74,1997-03-26,1997-03-28
5,739,Customer#000000307,FURNITURE,JORDAN,navajo burnished lawn orange thistle,LARGE BRUSHED NICKEL,Supplier#000006003,UNITED KINGDOM,50,65355.75,1998-05-31,1998-08-26
6,1061,Customer#000102679,FURNITURE,MOZAMBIQUE,azure hot navajo seashell red,STANDARD POLISHED TIN,Supplier#000000015,INDIA,50,50112.48,1998-05-15,1998-05-25
7,1121,Customer#000028006,MACHINERY,PERU,honeydew mint blue orchid peru,STANDARD ANODIZED STEEL,Supplier#000009423,PERU,50,71506.74,1997-01-13,1997-04-21
8,1153,Customer#000119551,BUILDING,INDIA,dodger midnight smoke moccasin seashell,STANDARD ANODIZED COPPER,Supplier#000001509,ARGENTINA,50,103049.5,1996-04-18,1996-06-27
9,1379,Customer#000064160,AUTOMOBILE,ALGERIA,cream almond peach violet snow,ECONOMY ANODIZED STEEL,Supplier#000009619,VIETNAM,50,52270.65,1998-05-25,1998-08-31


PyDough solution:

In [6]:
%%pydough

output = lines(
    OrderID=order_key,
    CustomerName=order.customer.name,
    MarketSegment=order.customer.mktsegment,
    CustomerNation=order.customer.nation.name,
    ProductName=part.name,
    ProductCategory=part.part_type,
    SupplierName=supplier.name,
    SupplierNation=supplier.nation.name,
    QuantitySold=quantity,
    TotalSaleValue=extended_price * (1 - discount),
    OrderDate=order.order_date,
    ShipDate=ship_date
).WHERE(LIKE(status,"O")).TOP_K(10, QuantitySold.DESC())

pydough_output=pydough.to_df(output)
pydough_output

Unnamed: 0,OrderID,CustomerName,MarketSegment,CustomerNation,ProductName,ProductCategory,SupplierName,SupplierNation,QuantitySold,TotalSaleValue,OrderDate,ShipDate
0,199,Customer#000052970,MACHINERY,UNITED KINGDOM,ivory plum lavender saddle smoke,LARGE POLISHED STEEL,Supplier#000009612,BRAZIL,50,54099.43,1996-03-07,1996-06-12
1,260,Customer#000104728,FURNITURE,CHINA,misty steel navajo floral peach,STANDARD BURNISHED NICKEL,Supplier#000005888,EGYPT,50,90343.92,1996-12-10,1997-03-24
2,354,Customer#000138268,MACHINERY,INDONESIA,peru forest tan sienna turquoise,ECONOMY BRUSHED BRASS,Supplier#000008126,FRANCE,50,49823.52,1996-03-14,1996-03-21
3,484,Customer#000054244,HOUSEHOLD,IRAQ,dim blush chartreuse burnished seashell,STANDARD BURNISHED TIN,Supplier#000005870,ETHIOPIA,50,67414.45,1997-01-03,1997-01-24
4,485,Customer#000100561,FURNITURE,MOROCCO,beige green forest peru chocolate,ECONOMY BRUSHED TIN,Supplier#000009524,IRAQ,50,77839.74,1997-03-26,1997-03-28
5,739,Customer#000000307,FURNITURE,JORDAN,navajo burnished lawn orange thistle,LARGE BRUSHED NICKEL,Supplier#000006003,UNITED KINGDOM,50,65355.75,1998-05-31,1998-08-26
6,1061,Customer#000102679,FURNITURE,MOZAMBIQUE,azure hot navajo seashell red,STANDARD POLISHED TIN,Supplier#000000015,INDIA,50,50112.48,1998-05-15,1998-05-25
7,1121,Customer#000028006,MACHINERY,PERU,honeydew mint blue orchid peru,STANDARD ANODIZED STEEL,Supplier#000009423,PERU,50,71506.74,1997-01-13,1997-04-21
8,1153,Customer#000119551,BUILDING,INDIA,dodger midnight smoke moccasin seashell,STANDARD ANODIZED COPPER,Supplier#000001509,ARGENTINA,50,103049.5,1996-04-18,1996-06-27
9,1379,Customer#000064160,AUTOMOBILE,ALGERIA,cream almond peach violet snow,ECONOMY ANODIZED STEEL,Supplier#000009619,VIETNAM,50,52270.65,1998-05-25,1998-08-31


Compare results in SQL and PyDough:

In [7]:
dfcompare.compare_df(pydough_output, sql_output, query_category="", 
                     question="")

True

PyDough to SQL:

In [8]:
%%pydough

pydough.to_sql(output)

"SELECT OrderID, CustomerName, MarketSegment, CustomerNation, ProductName, ProductCategory, SupplierName, SupplierNation, QuantitySold, TotalSaleValue, OrderDate, ShipDate FROM (SELECT CustomerName, CustomerNation, MarketSegment, OrderDate, OrderID, ProductCategory, ProductName, QuantitySold, ShipDate, SupplierName, SupplierNation, TotalSaleValue, ordering_0 FROM (SELECT CustomerName, CustomerNation, MarketSegment, OrderDate, OrderID, ProductCategory, ProductName, QuantitySold, ShipDate, SupplierName, SupplierNation, TotalSaleValue, QuantitySold AS ordering_0 FROM (SELECT status, name AS CustomerName, name_7 AS CustomerNation, mktsegment AS MarketSegment, order_date AS OrderDate, order_key AS OrderID, part_type AS ProductCategory, name_9 AS ProductName, quantity AS QuantitySold, ship_date AS ShipDate, name_13 AS SupplierName, name_18 AS SupplierNation, extended_price * (1 - discount) AS TotalSaleValue FROM (SELECT discount, status, quantity, ship_date, extended_price, order_key, mktseg

Structure of PyDough graph: TPCH

  customers
  ├── acctbal
  ├── address
  ├── comment
  ├── key
  ├── mktsegment
  ├── name
  ├── nation_key
  ├── phone
  ├── nation [one member of nations] (reverse of nations.customers)
  └── orders [multiple orders] (reverse of orders.customer)

  lines
  ├── comment
  ├── commit_date
  ├── discount
  ├── extended_price
  ├── line_number
  ├── order_key
  ├── part_key
  ├── quantity
  ├── receipt_date
  ├── return_flag
  ├── ship_date
  ├── ship_instruct
  ├── ship_mode
  ├── status
  ├── supplier_key
  ├── tax
  ├── order [one member of orders] (reverse of orders.lines)
  ├── part [one member of parts] (reverse of parts.lines)
  ├── part_and_supplier [one member of supply_records] (reverse of supply_records.lines)
  └── supplier [one member of suppliers] (reverse of suppliers.lines)

  nations
  ├── comment
  ├── key
  ├── name
  ├── region_key
  ├── customers [multiple customers] (reverse of customers.nation)
  ├── region [one member of regions] (reverse of regions.nations)
  └── suppliers [multiple suppliers] (reverse of suppliers.nation)

  orders
  ├── clerk
  ├── comment
  ├── customer_key
  ├── key
  ├── order_date
  ├── order_priority
  ├── order_status
  ├── ship_priority
  ├── total_price
  ├── customer [one member of customers] (reverse of customers.orders)
  └── lines [multiple lines] (reverse of lines.order)

  parts
  ├── brand
  ├── comment
  ├── container
  ├── key
  ├── manufacturer
  ├── name
  ├── part_type
  ├── retail_price
  ├── size
  ├── lines [multiple lines] (reverse of lines.part)
  └── supply_records [multiple supply_records] (reverse of supply_records.part)

  regions
  ├── comment
  ├── key
  ├── name
  └── nations [multiple nations] (reverse of nations.region)

  suppliers
  ├── account_balance
  ├── address
  ├── comment
  ├── key
  ├── name
  ├── nation_key
  ├── phone
  ├── lines [multiple lines] (reverse of lines.supplier)
  ├── nation [one member of nations] (reverse of nations.suppliers)
  └── supply_records [multiple supply_records] (reverse of supply_records.supplier)

  supply_records
  ├── availqty
  ├── comment
  ├── part_key
  ├── supplier_key
  ├── supplycost
  ├── lines [multiple lines] (reverse of lines.part_and_supplier)
  ├── part [one member of parts] (reverse of parts.supply_records)
  └── supplier [one member of suppliers] (reverse of suppliers.supply_records)