# PyDough DSL Examples

In [2]:
# Setup steps for loading in PyDough, the metadata, and the database context
import pydough
pydough.active_session.load_metadata_graph("../metadata/tpch_demo_graph.json", "TPCH")
pydough.active_session.connect_database("sqlite", database="../../tpch.db")
%load_ext pydough.jupyter_extensions

The pydough.jupyter_extensions extension is already loaded. To reload it, use:
  %reload_ext pydough.jupyter_extensions


## Question: Top Nations

**Q:** What are the 3 nations whose customers have made the most orders in 1994? List the nations, the region they belong to, and the number of orders.

In [12]:
%%pydough

selected_orders = customers.orders.WHERE(YEAR(order_date) == 1994)
top_nations = (
   nations
  .CALCULATE(
        nation_name=name,
        region_name=region.name,
        n_orders=COUNT(selected_orders)
  )
  .TOP_K(3, by=n_orders.DESC())
)
print(pydough.to_sql(top_nations))

WITH _s5 AS (
  SELECT
    COUNT() AS agg_0,
    customer.c_nationkey AS nation_key
  FROM main.customer AS customer
  JOIN main.orders AS orders
    ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1994
    AND customer.c_custkey = orders.o_custkey
  GROUP BY
    customer.c_nationkey
)
SELECT
  nation.n_name AS nation_name,
  region.r_name AS region_name,
  COALESCE(_s5.agg_0, 0) AS n_orders
FROM main.nation AS nation
LEFT JOIN main.region AS region
  ON nation.n_regionkey = region.r_regionkey
LEFT JOIN _s5 AS _s5
  ON _s5.nation_key = nation.n_nationkey
ORDER BY
  n_orders DESC
LIMIT 3


In [7]:
pydough.to_df(top_nations)

Unnamed: 0,nation_name,region_name,n_orders
0,FRANCE,EUROPE,61600
1,RUSSIA,EUROPE,61495
2,INDONESIA,ASIA,61377


## Question: TPC-H Query #5

**Q**: Within each nation of the Asian region, what is the amount of lineitem revenue generated by customers in that nation from suppliers in the same nation in 1994?


In [14]:
%%pydough
selected_lines = (
    customers
    .orders
    .WHERE(YEAR(order_date) == 1994)
 	.lines
    .WHERE(supplier.nation.name == nation_name)
    .CALCULATE(value=extended_price * (1 - discount))
)
tpch_q5 = (
    nations
    .CALCULATE(nation_name=name)
    .WHERE(region.name == "ASIA")
    .WHERE(HAS(selected_lines)) # <-- minor optimization
    .CALCULATE(name, revenue=SUM(selected_lines.value))
)
print(pydough.to_sql(tpch_q5))

WITH _t0 AS (
  SELECT
    MAX(nation.n_name) AS agg_3,
    SUM(lineitem.l_extendedprice * (
      1 - lineitem.l_discount
    )) AS agg_0
  FROM main.nation AS nation
  JOIN main.region AS region
    ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA'
  JOIN main.customer AS customer
    ON customer.c_nationkey = nation.n_nationkey
  JOIN main.orders AS orders
    ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1994
    AND customer.c_custkey = orders.o_custkey
  JOIN main.lineitem AS lineitem
    ON lineitem.l_orderkey = orders.o_orderkey
  LEFT JOIN main.supplier AS supplier
    ON lineitem.l_suppkey = supplier.s_suppkey
  JOIN main.nation AS nation_2
    ON nation_2.n_nationkey = supplier.s_nationkey
  WHERE
    nation.n_name = nation_2.n_name
  GROUP BY
    nation.n_nationkey
)
SELECT
  agg_3 AS name,
  COALESCE(agg_0, 0) AS revenue
FROM _t0


In [15]:
pydough.to_df(tpch_q5)

Unnamed: 0,name,revenue
0,INDIA,52035510.0
1,INDONESIA,55502040.0
2,JAPAN,45410180.0
3,CHINA,53724490.0
4,VIETNAM,55295090.0
