Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ Different queries are included to test nested loop joins under various workloads

## Hash Join

This benchmark focuses on the performance of queries with nested hash joins, minimizing other overheads such as scanning data sources or evaluating predicates.
This benchmark focuses on the performance of queries with hash joins, minimizing other overheads such as scanning data sources or evaluating predicates.

Several queries are included to test hash joins under various workloads.

Expand All @@ -774,6 +774,19 @@ Several queries are included to test hash joins under various workloads.
./bench.sh run hj
```

## Sort Merge Join

This benchmark focuses on the performance of queries with sort merge joins joins, minimizing other overheads such as scanning data sources or evaluating predicates.

Several queries are included to test sort merge joins under various workloads.

### Example Run

```bash
# No need to generate data: this benchmark uses table function `range()` as the data source

./bench.sh run smj
```
## Cancellation

Test performance of cancelling queries.
Expand Down
17 changes: 17 additions & 0 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ imdb: Join Order Benchmark (JOB) using the IMDB dataset conver
cancellation: How long cancelling a query takes
nlj: Benchmark for simple nested loop joins, testing various join scenarios
hj: Benchmark for simple hash joins, testing various join scenarios
smj: Benchmark for simple sort merge joins, testing various join scenarios
compile_profile: Compile and execute TPC-H across selected Cargo profiles, reporting timing and binary size


Expand Down Expand Up @@ -311,6 +312,10 @@ main() {
# hj uses range() function, no data generation needed
echo "HJ benchmark does not require data generation"
;;
smj)
# smj uses range() function, no data generation needed
echo "SMJ benchmark does not require data generation"
;;
compile_profile)
data_tpch "1"
;;
Expand Down Expand Up @@ -384,6 +389,7 @@ main() {
run_external_aggr
run_nlj
run_hj
run_smj
;;
tpch)
run_tpch "1" "parquet"
Expand Down Expand Up @@ -494,6 +500,9 @@ main() {
hj)
run_hj
;;
smj)
run_smj
;;
compile_profile)
run_compile_profile "${PROFILE_ARGS[@]}"
;;
Expand Down Expand Up @@ -1154,6 +1163,14 @@ run_hj() {
debug_run $CARGO_COMMAND --bin dfbench -- hj --iterations 5 -o "${RESULTS_FILE}" ${QUERY_ARG}
}

# Runs the smj benchmark
run_smj() {
RESULTS_FILE="${RESULTS_DIR}/smj.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running smj benchmark..."
debug_run $CARGO_COMMAND --bin dfbench -- smj --iterations 5 -o "${RESULTS_FILE}" ${QUERY_ARG}
}


compare_benchmarks() {
BASE_RESULTS_DIR="${SCRIPT_DIR}/results"
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/src/bin/dfbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

use datafusion_benchmarks::{
cancellation, clickbench, h2o, hj, imdb, nlj, sort_tpch, tpch,
cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_tpch, tpch,
};

#[derive(Debug, StructOpt)]
Expand All @@ -46,6 +46,7 @@ enum Options {
HJ(hj::RunOpt),
Imdb(imdb::RunOpt),
Nlj(nlj::RunOpt),
Smj(smj::RunOpt),
SortTpch(sort_tpch::RunOpt),
Tpch(tpch::RunOpt),
TpchConvert(tpch::ConvertOpt),
Expand All @@ -63,6 +64,7 @@ pub async fn main() -> Result<()> {
Options::HJ(opt) => opt.run().await,
Options::Imdb(opt) => Box::pin(opt.run()).await,
Options::Nlj(opt) => opt.run().await,
Options::Smj(opt) => opt.run().await,
Options::SortTpch(opt) => opt.run().await,
Options::Tpch(opt) => Box::pin(opt.run()).await,
Options::TpchConvert(opt) => opt.run().await,
Expand Down
1 change: 1 addition & 0 deletions benchmarks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub mod h2o;
pub mod hj;
pub mod imdb;
pub mod nlj;
pub mod smj;
pub mod sort_tpch;
pub mod tpch;
pub mod util;
Loading
Loading