diff --git a/benchmarks/expected-plans/q1.txt b/benchmarks/expected-plans/q1.txt index 1a9d4b7c98cc..92864712a020 100644 --- a/benchmarks/expected-plans/q1.txt +++ b/benchmarks/expected-plans/q1.txt @@ -1,6 +1,6 @@ Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST Projection: lineitem.l_returnflag, lineitem.l_linestatus, SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order - Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(CAST(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount AS Decimal128(38, 6)) * CAST(Decimal128(Some(100),23,2) + CAST(lineitem.l_tax AS Decimal128(23, 2)) AS Decimal128(38, 6))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]] - Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus + Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice AS lineitem.l_extendedprice * Float64(1) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice AS lineitem.l_extendedprice * Float64(1) - lineitem.l_discount * (Float64(1) + lineitem.l_tax)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]] + Projection: lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount) AS lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus Filter: lineitem.l_shipdate <= Date32("10471") - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] \ No newline at end of file + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] diff --git a/benchmarks/expected-plans/q10.txt b/benchmarks/expected-plans/q10.txt index 25189feb61e1..8f83bfc95258 100644 --- a/benchmarks/expected-plans/q10.txt +++ b/benchmarks/expected-plans/q10.txt @@ -1,6 +1,6 @@ Sort: revenue DESC NULLS FIRST Projection: customer.c_custkey, customer.c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment - Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Inner Join: customer.c_nationkey = nation.n_nationkey Inner Join: orders.o_orderkey = lineitem.l_orderkey Inner Join: customer.c_custkey = orders.o_custkey @@ -9,4 +9,4 @@ Sort: revenue DESC NULLS FIRST TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] Filter: lineitem.l_returnflag = Utf8("R") TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag] - TableScan: nation projection=[n_nationkey, n_name] \ No newline at end of file + TableScan: nation projection=[n_nationkey, n_name] diff --git a/benchmarks/expected-plans/q11.txt b/benchmarks/expected-plans/q11.txt index b408340a32a0..675bf63f639a 100644 --- a/benchmarks/expected-plans/q11.txt +++ b/benchmarks/expected-plans/q11.txt @@ -1,16 +1,16 @@ Sort: value DESC NULLS FIRST Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value - Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 17)) > __sq_1.__value + Filter: SUM(partsupp.ps_supplycost * partsupp.ps_availqty) > __sq_1.__value CrossJoin: - Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]] + Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Float64))]] Inner Join: supplier.s_nationkey = nation.n_nationkey Inner Join: partsupp.ps_suppkey = supplier.s_suppkey TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] TableScan: supplier projection=[s_suppkey, s_nationkey] Filter: nation.n_name = Utf8("GERMANY") TableScan: nation projection=[n_nationkey, n_name] - Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 17)) * Decimal128(Some(10000000000000),38,17) AS __value, alias=__sq_1 - Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]] + Projection: SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001) AS __value, alias=__sq_1 + Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Float64))]] Inner Join: supplier.s_nationkey = nation.n_nationkey Inner Join: partsupp.ps_suppkey = supplier.s_suppkey TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] diff --git a/benchmarks/expected-plans/q14.txt b/benchmarks/expected-plans/q14.txt index c410363a5821..87c02e27fb64 100644 --- a/benchmarks/expected-plans/q14.txt +++ b/benchmarks/expected-plans/q14.txt @@ -1,6 +1,6 @@ -Projection: CAST(Decimal128(Some(1000000000000000000000),38,19) * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Decimal128(38, 19)) AS Decimal128(38, 38)) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Decimal128(38, 38)) AS promo_revenue - Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, part.p_type +Projection: Float64(100) * SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) / SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS promo_revenue + Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice AS lineitem.l_extendedprice * Float64(1) - lineitem.l_discount ELSE Float64(0) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice AS lineitem.l_extendedprice * Float64(1) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount) AS lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)Float64(1) - lineitem.l_discountlineitem.l_discountFloat64(1)lineitem.l_extendedprice, part.p_type Inner Join: lineitem.l_partkey = part.p_partkey Filter: lineitem.l_shipdate >= Date32("9374") AND lineitem.l_shipdate < Date32("9404") TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate] diff --git a/benchmarks/expected-plans/q15.txt b/benchmarks/expected-plans/q15.txt index e78f8e0d9887..cfb403e4fb8a 100644 --- a/benchmarks/expected-plans/q15.txt +++ b/benchmarks/expected-plans/q15.txt @@ -7,7 +7,7 @@ Sort: supplier.s_suppkey ASC NULLS LAST Projection: supplier_no, total_revenue, alias=revenue0 Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue Projection: lineitem.l_suppkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) - Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] Projection: MAX(revenue0.total_revenue) AS __value, alias=__sq_1 @@ -15,7 +15,7 @@ Sort: supplier.s_suppkey ASC NULLS LAST Projection: total_revenue, alias=revenue0 Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) - Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] EmptyRelation \ No newline at end of file diff --git a/benchmarks/expected-plans/q18.txt b/benchmarks/expected-plans/q18.txt index ce0b20c201e3..0d412e3eadb2 100644 --- a/benchmarks/expected-plans/q18.txt +++ b/benchmarks/expected-plans/q18.txt @@ -8,6 +8,6 @@ Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] TableScan: lineitem projection=[l_orderkey, l_quantity] Projection: lineitem.l_orderkey AS l_orderkey, alias=__sq_1 - Filter: SUM(lineitem.l_quantity) > Decimal128(Some(30000),25,2) + Filter: SUM(lineitem.l_quantity) > Float64(300) Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_quantity)]] - TableScan: lineitem projection=[l_orderkey, l_quantity] \ No newline at end of file + TableScan: lineitem projection=[l_orderkey, l_quantity] diff --git a/benchmarks/expected-plans/q19.txt b/benchmarks/expected-plans/q19.txt index 552d743917dc..0a1ddea368ff 100644 --- a/benchmarks/expected-plans/q19.txt +++ b/benchmarks/expected-plans/q19.txt @@ -1,9 +1,9 @@ Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Projection: lineitem.l_extendedprice, lineitem.l_discount - Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15) + Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Float64(1) AND lineitem.l_quantity <= Float64(11) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Float64(10) AND lineitem.l_quantity <= Float64(20) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Float64(20) AND lineitem.l_quantity <= Float64(30) AND part.p_size <= Int32(15) Inner Join: lineitem.l_partkey = part.p_partkey - Filter: (lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)) AND lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") + Filter: (lineitem.l_quantity >= Float64(1) AND lineitem.l_quantity <= Float64(11) OR lineitem.l_quantity >= Float64(10) AND lineitem.l_quantity <= Float64(20) OR lineitem.l_quantity >= Float64(20) AND lineitem.l_quantity <= Float64(30)) AND lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode] Filter: (part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) TableScan: part projection=[p_partkey, p_brand, p_size, p_container] diff --git a/benchmarks/expected-plans/q20.txt b/benchmarks/expected-plans/q20.txt index e5398325e966..58649e7b851e 100644 --- a/benchmarks/expected-plans/q20.txt +++ b/benchmarks/expected-plans/q20.txt @@ -6,14 +6,14 @@ Sort: supplier.s_name ASC NULLS LAST Filter: nation.n_name = Utf8("CANADA") TableScan: nation projection=[n_nationkey, n_name] Projection: partsupp.ps_suppkey AS ps_suppkey, alias=__sq_2 - Filter: CAST(partsupp.ps_availqty AS Decimal128(38, 17)) > __sq_3.__value + Filter: CAST(partsupp.ps_availqty AS Float64) > __sq_3.__value Inner Join: partsupp.ps_partkey = __sq_3.l_partkey, partsupp.ps_suppkey = __sq_3.l_suppkey LeftSemi Join: partsupp.ps_partkey = __sq_1.p_partkey TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] Projection: part.p_partkey AS p_partkey, alias=__sq_1 Filter: part.p_name LIKE Utf8("forest%") TableScan: part projection=[p_partkey, p_name] - Projection: lineitem.l_partkey, lineitem.l_suppkey, Decimal128(Some(50000000000000000),38,17) * CAST(SUM(lineitem.l_quantity) AS Decimal128(38, 17)) AS __value, alias=__sq_3 + Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * SUM(lineitem.l_quantity) AS __value, alias=__sq_3 Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]] Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate] \ No newline at end of file diff --git a/benchmarks/expected-plans/q22.txt b/benchmarks/expected-plans/q22.txt index b56c8ff96fc7..2308db227806 100644 --- a/benchmarks/expected-plans/q22.txt +++ b/benchmarks/expected-plans/q22.txt @@ -3,7 +3,7 @@ Sort: custsale.cntrycode ASC NULLS LAST Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]] Projection: custsale.cntrycode, custsale.c_acctbal, alias=custsale Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal, alias=custsale - Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __sq_1.__value + Filter: customer.c_acctbal > __sq_1.__value CrossJoin: LeftAnti Join: customer.c_custkey = orders.o_custkey Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]) @@ -11,5 +11,5 @@ Sort: custsale.cntrycode ASC NULLS LAST TableScan: orders projection=[o_custkey] Projection: AVG(customer.c_acctbal) AS __value, alias=__sq_1 Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]] - Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]) - TableScan: customer projection=[c_phone, c_acctbal] \ No newline at end of file + Filter: customer.c_acctbal > Float64(0) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]) + TableScan: customer projection=[c_phone, c_acctbal] diff --git a/benchmarks/expected-plans/q3.txt b/benchmarks/expected-plans/q3.txt index 7cd69b92a556..3748dc0bc755 100644 --- a/benchmarks/expected-plans/q3.txt +++ b/benchmarks/expected-plans/q3.txt @@ -1,6 +1,6 @@ Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority - Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Inner Join: orders.o_orderkey = lineitem.l_orderkey Inner Join: customer.c_custkey = orders.o_custkey Filter: customer.c_mktsegment = Utf8("BUILDING") @@ -8,4 +8,4 @@ Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST Filter: orders.o_orderdate < Date32("9204") TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority] Filter: lineitem.l_shipdate > Date32("9204") - TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] \ No newline at end of file + TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] diff --git a/benchmarks/expected-plans/q5.txt b/benchmarks/expected-plans/q5.txt index 0c2e0c131805..64804f8044c7 100644 --- a/benchmarks/expected-plans/q5.txt +++ b/benchmarks/expected-plans/q5.txt @@ -1,6 +1,6 @@ Sort: revenue DESC NULLS FIRST Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Inner Join: nation.n_regionkey = region.r_regionkey Inner Join: supplier.s_nationkey = nation.n_nationkey Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey @@ -13,4 +13,4 @@ Sort: revenue DESC NULLS FIRST TableScan: supplier projection=[s_suppkey, s_nationkey] TableScan: nation projection=[n_nationkey, n_name, n_regionkey] Filter: region.r_name = Utf8("ASIA") - TableScan: region projection=[r_regionkey, r_name] \ No newline at end of file + TableScan: region projection=[r_regionkey, r_name] diff --git a/benchmarks/expected-plans/q6.txt b/benchmarks/expected-plans/q6.txt index efc17a2724b8..9e2705e0435c 100644 --- a/benchmarks/expected-plans/q6.txt +++ b/benchmarks/expected-plans/q6.txt @@ -1,6 +1,4 @@ Projection: SUM(lineitem.l_extendedprice * lineitem.l_discount) AS revenue Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice * lineitem.l_discount)]] - Projection: lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") AND CAST(lineitem.l_discount AS Decimal128(30, 15))lineitem.l_discount >= Decimal128(Some(49999999999999),30,15) AND CAST(lineitem.l_discount AS Decimal128(30, 15))lineitem.l_discount <= Decimal128(Some(69999999999999),30,15) AND lineitem.l_quantity < Decimal128(Some(2400),15,2) - Projection: CAST(lineitem.l_discount AS Decimal128(30, 15)) AS CAST(lineitem.l_discount AS Decimal128(30, 15))lineitem.l_discount, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate] \ No newline at end of file + Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") AND lineitem.l_discount >= Float64(0.049999999999999996) AND lineitem.l_discount <= Float64(0.06999999999999999) AND lineitem.l_quantity < Float64(24) + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate] diff --git a/benchmarks/expected-plans/q7.txt b/benchmarks/expected-plans/q7.txt index fad02c09881c..1a3e9289becf 100644 --- a/benchmarks/expected-plans/q7.txt +++ b/benchmarks/expected-plans/q7.txt @@ -2,7 +2,7 @@ Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, SUM(shipping.volume) AS revenue Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[SUM(shipping.volume)]] Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, shipping.volume, alias=shipping - Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, alias=shipping + Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount) AS volume, alias=shipping Filter: (n1.n_name = Utf8("FRANCE") OR n2.n_name = Utf8("FRANCE")) AND (n2.n_name = Utf8("GERMANY") OR n1.n_name = Utf8("GERMANY")) Inner Join: customer.c_nationkey = n2.n_nationkey Inner Join: supplier.s_nationkey = n1.n_nationkey diff --git a/benchmarks/expected-plans/q8.txt b/benchmarks/expected-plans/q8.txt index 20452d4bde93..1d01113550ab 100644 --- a/benchmarks/expected-plans/q8.txt +++ b/benchmarks/expected-plans/q8.txt @@ -1,8 +1,8 @@ Sort: all_nations.o_year ASC NULLS LAST Projection: all_nations.o_year, SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) / SUM(all_nations.volume) AS mkt_share - Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]] + Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Float64(0) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]] Projection: all_nations.o_year, all_nations.volume, all_nations.nation, alias=all_nations - Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation, alias=all_nations + Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount) AS volume, n2.n_name AS nation, alias=all_nations Inner Join: n1.n_regionkey = region.r_regionkey Inner Join: supplier.s_nationkey = n2.n_nationkey Inner Join: customer.c_nationkey = n1.n_nationkey @@ -22,4 +22,4 @@ Sort: all_nations.o_year ASC NULLS LAST SubqueryAlias: n2 TableScan: nation projection=[n_nationkey, n_name] Filter: region.r_name = Utf8("AMERICA") - TableScan: region projection=[r_regionkey, r_name] \ No newline at end of file + TableScan: region projection=[r_regionkey, r_name] diff --git a/benchmarks/expected-plans/q9.txt b/benchmarks/expected-plans/q9.txt index 954c28a35bb1..23ce1cee93ff 100644 --- a/benchmarks/expected-plans/q9.txt +++ b/benchmarks/expected-plans/q9.txt @@ -2,7 +2,7 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]] Projection: profit.nation, profit.o_year, profit.amount, alias=profit - Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(38, 4)) AS amount, alias=profit + Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Float64(1) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount, alias=profit Inner Join: supplier.s_nationkey = nation.n_nationkey Inner Join: lineitem.l_orderkey = orders.o_orderkey Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey @@ -14,4 +14,4 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST TableScan: supplier projection=[s_suppkey, s_nationkey] TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] TableScan: orders projection=[o_orderkey, o_orderdate] - TableScan: nation projection=[n_nationkey, n_name] \ No newline at end of file + TableScan: nation projection=[n_nationkey, n_name] diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index b9afe4d6a17e..948c178169c8 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -668,7 +668,6 @@ mod tests { } #[cfg(feature = "ci")] - #[ignore] // TODO produces correct result but has rounding error #[tokio::test] async fn verify_q9() -> Result<()> { verify_query(9).await @@ -681,7 +680,6 @@ mod tests { } #[cfg(feature = "ci")] - #[ignore] // https://github.com/apache/arrow-datafusion/issues/4023 #[tokio::test] async fn verify_q11() -> Result<()> { verify_query(11).await @@ -700,7 +698,6 @@ mod tests { } #[cfg(feature = "ci")] - #[ignore] // https://github.com/apache/arrow-datafusion/issues/4025 #[tokio::test] async fn verify_q14() -> Result<()> { verify_query(14).await @@ -719,7 +716,6 @@ mod tests { } #[cfg(feature = "ci")] - #[ignore] // https://github.com/apache/arrow-datafusion/issues/4026 #[tokio::test] async fn verify_q17() -> Result<()> { verify_query(17).await @@ -896,6 +892,7 @@ mod tests { #[cfg(feature = "ci")] async fn verify_query(n: usize) -> Result<()> { use datafusion::arrow::datatypes::{DataType, Field}; + use datafusion::common::ScalarValue; use datafusion::logical_expr::expr::Cast; use datafusion::logical_expr::Expr; use std::env; @@ -990,7 +987,12 @@ mod tests { } data_type => data_type == e.data_type(), }); - assert!(schema_matches); + if !schema_matches { + panic!( + "expected_fields: {:?}\ntransformed_fields: {:?}", + expected_fields, transformed_fields + ) + } // convert both datasets to Vec> for simple comparison let expected_vec = result_vec(&expected); @@ -1000,8 +1002,26 @@ mod tests { assert_eq!(expected_vec.len(), actual_vec.len()); // compare each row. this works as all TPC-H queries have deterministically ordered results - for i in 0..actual_vec.len() { - assert_eq!(expected_vec[i], actual_vec[i]); + for i in 0..expected_vec.len() { + let expected_row = &expected_vec[i]; + let actual_row = &actual_vec[i]; + assert_eq!(expected_row.len(), actual_row.len()); + + for j in 0..expected.len() { + match (&expected_row[j], &actual_row[j]) { + (ScalarValue::Float64(Some(l)), ScalarValue::Float64(Some(r))) => { + // allow for rounding errors until we move to decimal types + let tolerance = 1.0; + if (l - r).abs() > tolerance { + panic!( + "Expected: {}; Actual: {}; Tolerance: {}", + l, r, tolerance + ) + } + } + (l, r) => assert_eq!(format!("{:?}", l), format!("{:?}", r)), + } + } } Ok(()) diff --git a/benchmarks/src/tpch.rs b/benchmarks/src/tpch.rs index 46c53edf120e..bd20647c42c0 100644 --- a/benchmarks/src/tpch.rs +++ b/benchmarks/src/tpch.rs @@ -15,7 +15,10 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::ArrayRef; +use arrow::array::{ + Array, ArrayRef, Date32Array, Decimal128Array, Float64Array, Int32Array, Int64Array, + StringArray, +}; use arrow::record_batch::RecordBatch; use std::fs; use std::ops::{Div, Mul}; @@ -23,7 +26,7 @@ use std::path::Path; use std::sync::Arc; use std::time::Instant; -use datafusion::arrow::util::display::array_value_to_string; +use datafusion::common::ScalarValue; use datafusion::logical_expr::Cast; use datafusion::prelude::*; use datafusion::{ @@ -38,12 +41,17 @@ pub const TPCH_TABLES: &[&str] = &[ "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region", ]; +fn decimal_type(_p: u8, _s: u8) -> DataType { + // TODO use decimal_type(p, s) once Decimal is fully supported + // https://github.com/apache/arrow-datafusion/issues/3523 + DataType::Float64 +} + /// Get the schema for the benchmarks derived from TPC-H pub fn get_tpch_table_schema(table: &str) -> Schema { // note that the schema intentionally uses signed integers so that any generated Parquet // files can also be used to benchmark tools that only support signed integers, such as // Apache Spark - match table { "part" => Schema::new(vec![ Field::new("p_partkey", DataType::Int64, false), @@ -53,7 +61,7 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("p_type", DataType::Utf8, false), Field::new("p_size", DataType::Int32, false), Field::new("p_container", DataType::Utf8, false), - Field::new("p_retailprice", DataType::Decimal128(15, 2), false), + Field::new("p_retailprice", decimal_type(15, 2), false), Field::new("p_comment", DataType::Utf8, false), ]), @@ -63,7 +71,7 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("s_address", DataType::Utf8, false), Field::new("s_nationkey", DataType::Int64, false), Field::new("s_phone", DataType::Utf8, false), - Field::new("s_acctbal", DataType::Decimal128(15, 2), false), + Field::new("s_acctbal", decimal_type(15, 2), false), Field::new("s_comment", DataType::Utf8, false), ]), @@ -71,7 +79,7 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("ps_partkey", DataType::Int64, false), Field::new("ps_suppkey", DataType::Int64, false), Field::new("ps_availqty", DataType::Int32, false), - Field::new("ps_supplycost", DataType::Decimal128(15, 2), false), + Field::new("ps_supplycost", decimal_type(15, 2), false), Field::new("ps_comment", DataType::Utf8, false), ]), @@ -81,7 +89,7 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("c_address", DataType::Utf8, false), Field::new("c_nationkey", DataType::Int64, false), Field::new("c_phone", DataType::Utf8, false), - Field::new("c_acctbal", DataType::Decimal128(15, 2), false), + Field::new("c_acctbal", decimal_type(15, 2), false), Field::new("c_mktsegment", DataType::Utf8, false), Field::new("c_comment", DataType::Utf8, false), ]), @@ -90,7 +98,7 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("o_orderkey", DataType::Int64, false), Field::new("o_custkey", DataType::Int64, false), Field::new("o_orderstatus", DataType::Utf8, false), - Field::new("o_totalprice", DataType::Decimal128(15, 2), false), + Field::new("o_totalprice", decimal_type(15, 2), false), Field::new("o_orderdate", DataType::Date32, false), Field::new("o_orderpriority", DataType::Utf8, false), Field::new("o_clerk", DataType::Utf8, false), @@ -103,10 +111,10 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { Field::new("l_partkey", DataType::Int64, false), Field::new("l_suppkey", DataType::Int64, false), Field::new("l_linenumber", DataType::Int32, false), - Field::new("l_quantity", DataType::Decimal128(15, 2), false), - Field::new("l_extendedprice", DataType::Decimal128(15, 2), false), - Field::new("l_discount", DataType::Decimal128(15, 2), false), - Field::new("l_tax", DataType::Decimal128(15, 2), false), + Field::new("l_quantity", decimal_type(15, 2), false), + Field::new("l_extendedprice", decimal_type(15, 2), false), + Field::new("l_discount", decimal_type(15, 2), false), + Field::new("l_tax", decimal_type(15, 2), false), Field::new("l_returnflag", DataType::Utf8, false), Field::new("l_linestatus", DataType::Utf8, false), Field::new("l_shipdate", DataType::Date32, false), @@ -140,18 +148,18 @@ pub fn get_answer_schema(n: usize) -> Schema { 1 => Schema::new(vec![ Field::new("l_returnflag", DataType::Utf8, true), Field::new("l_linestatus", DataType::Utf8, true), - Field::new("sum_qty", DataType::Decimal128(15, 2), true), - Field::new("sum_base_price", DataType::Decimal128(15, 2), true), - Field::new("sum_disc_price", DataType::Decimal128(15, 2), true), - Field::new("sum_charge", DataType::Decimal128(15, 2), true), - Field::new("avg_qty", DataType::Decimal128(15, 2), true), - Field::new("avg_price", DataType::Decimal128(15, 2), true), - Field::new("avg_disc", DataType::Decimal128(15, 2), true), + Field::new("sum_qty", decimal_type(15, 2), true), + Field::new("sum_base_price", decimal_type(15, 2), true), + Field::new("sum_disc_price", decimal_type(15, 2), true), + Field::new("sum_charge", decimal_type(15, 2), true), + Field::new("avg_qty", decimal_type(15, 2), true), + Field::new("avg_price", decimal_type(15, 2), true), + Field::new("avg_disc", decimal_type(15, 2), true), Field::new("count_order", DataType::Int64, true), ]), 2 => Schema::new(vec![ - Field::new("s_acctbal", DataType::Decimal128(15, 2), true), + Field::new("s_acctbal", decimal_type(15, 2), true), Field::new("s_name", DataType::Utf8, true), Field::new("n_name", DataType::Utf8, true), Field::new("p_partkey", DataType::Int64, true), @@ -163,7 +171,7 @@ pub fn get_answer_schema(n: usize) -> Schema { 3 => Schema::new(vec![ Field::new("l_orderkey", DataType::Int64, true), - Field::new("revenue", DataType::Decimal128(15, 2), true), + Field::new("revenue", decimal_type(15, 2), true), Field::new("o_orderdate", DataType::Date32, true), Field::new("o_shippriority", DataType::Int32, true), ]), @@ -175,38 +183,34 @@ pub fn get_answer_schema(n: usize) -> Schema { 5 => Schema::new(vec![ Field::new("n_name", DataType::Utf8, true), - Field::new("revenue", DataType::Decimal128(15, 2), true), + Field::new("revenue", decimal_type(15, 2), true), ]), - 6 => Schema::new(vec![Field::new( - "revenue", - DataType::Decimal128(15, 2), - true, - )]), + 6 => Schema::new(vec![Field::new("revenue", decimal_type(15, 2), true)]), 7 => Schema::new(vec![ Field::new("supp_nation", DataType::Utf8, true), Field::new("cust_nation", DataType::Utf8, true), Field::new("l_year", DataType::Int32, true), - Field::new("revenue", DataType::Decimal128(15, 2), true), + Field::new("revenue", decimal_type(15, 2), true), ]), 8 => Schema::new(vec![ Field::new("o_year", DataType::Int32, true), - Field::new("mkt_share", DataType::Decimal128(15, 2), true), + Field::new("mkt_share", decimal_type(15, 2), true), ]), 9 => Schema::new(vec![ Field::new("nation", DataType::Utf8, true), Field::new("o_year", DataType::Int32, true), - Field::new("sum_profit", DataType::Decimal128(15, 2), true), + Field::new("sum_profit", decimal_type(15, 2), true), ]), 10 => Schema::new(vec![ Field::new("c_custkey", DataType::Int64, true), Field::new("c_name", DataType::Utf8, true), - Field::new("revenue", DataType::Decimal128(15, 2), true), - Field::new("c_acctbal", DataType::Decimal128(15, 2), true), + Field::new("revenue", decimal_type(15, 2), true), + Field::new("c_acctbal", decimal_type(15, 2), true), Field::new("n_name", DataType::Utf8, true), Field::new("c_address", DataType::Utf8, true), Field::new("c_phone", DataType::Utf8, true), @@ -215,7 +219,7 @@ pub fn get_answer_schema(n: usize) -> Schema { 11 => Schema::new(vec![ Field::new("ps_partkey", DataType::Int64, true), - Field::new("value", DataType::Decimal128(15, 2), true), + Field::new("value", decimal_type(15, 2), true), ]), 12 => Schema::new(vec![ @@ -229,18 +233,14 @@ pub fn get_answer_schema(n: usize) -> Schema { Field::new("custdist", DataType::Int64, true), ]), - 14 => Schema::new(vec![Field::new( - "promo_revenue", - DataType::Decimal128(38, 2), - true, - )]), + 14 => Schema::new(vec![Field::new("promo_revenue", decimal_type(38, 2), true)]), 15 => Schema::new(vec![ Field::new("s_suppkey", DataType::Int64, true), Field::new("s_name", DataType::Utf8, true), Field::new("s_address", DataType::Utf8, true), Field::new("s_phone", DataType::Utf8, true), - Field::new("total_revenue", DataType::Decimal128(15, 2), true), + Field::new("total_revenue", decimal_type(15, 2), true), ]), 16 => Schema::new(vec![ @@ -250,26 +250,18 @@ pub fn get_answer_schema(n: usize) -> Schema { Field::new("supplier_cnt", DataType::Int64, true), ]), - 17 => Schema::new(vec![Field::new( - "avg_yearly", - DataType::Decimal128(38, 2), - true, - )]), + 17 => Schema::new(vec![Field::new("avg_yearly", decimal_type(38, 2), true)]), 18 => Schema::new(vec![ Field::new("c_name", DataType::Utf8, true), Field::new("c_custkey", DataType::Int64, true), Field::new("o_orderkey", DataType::Int64, true), Field::new("o_orderdate", DataType::Date32, true), - Field::new("o_totalprice", DataType::Decimal128(15, 2), true), - Field::new("sum_l_quantity", DataType::Decimal128(15, 2), true), + Field::new("o_totalprice", decimal_type(15, 2), true), + Field::new("sum_l_quantity", decimal_type(15, 2), true), ]), - 19 => Schema::new(vec![Field::new( - "revenue", - DataType::Decimal128(15, 2), - true, - )]), + 19 => Schema::new(vec![Field::new("revenue", decimal_type(15, 2), true)]), 20 => Schema::new(vec![ Field::new("s_name", DataType::Utf8, true), @@ -284,7 +276,7 @@ pub fn get_answer_schema(n: usize) -> Schema { 22 => Schema::new(vec![ Field::new("cntrycode", DataType::Utf8, true), Field::new("numcust", DataType::Int64, true), - Field::new("totacctbal", DataType::Decimal128(15, 2), true), + Field::new("totacctbal", decimal_type(15, 2), true), ]), _ => unimplemented!(), @@ -389,14 +381,14 @@ pub async fn convert_tbl( /// Converts the results into a 2d array of strings, `result[row][column]` /// Special cases nulls to NULL for testing -pub fn result_vec(results: &[RecordBatch]) -> Vec> { +pub fn result_vec(results: &[RecordBatch]) -> Vec> { let mut result = vec![]; for batch in results { for row_index in 0..batch.num_rows() { let row_vec = batch .columns() .iter() - .map(|column| col_str(column, row_index)) + .map(|column| col_to_scalar(column, row_index)) .collect(); result.push(row_vec); } @@ -422,13 +414,37 @@ pub fn string_schema(schema: Schema) -> Schema { ) } -/// Specialised String representation -fn col_str(column: &ArrayRef, row_index: usize) -> String { +fn col_to_scalar(column: &ArrayRef, row_index: usize) -> ScalarValue { if column.is_null(row_index) { - return "NULL".to_string(); + return ScalarValue::Null; + } + match column.data_type() { + DataType::Int32 => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Int32(Some(array.value(row_index))) + } + DataType::Int64 => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Int64(Some(array.value(row_index))) + } + DataType::Float64 => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Float64(Some(array.value(row_index))) + } + DataType::Decimal128(p, s) => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Decimal128(Some(array.value(row_index)), *p, *s) + } + DataType::Date32 => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Date32(Some(array.value(row_index))) + } + DataType::Utf8 => { + let array = column.as_any().downcast_ref::().unwrap(); + ScalarValue::Utf8(Some(array.value(row_index).to_string())) + } + other => panic!("unexpected data type in benchmark: {}", other), } - - array_value_to_string(column, row_index).unwrap() } pub async fn transform_actual_result(