New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: add merge joiner benchmark and bytes usage optimization #31216

Merged
merged 2 commits into from Oct 11, 2018
Jump to file or symbol
Failed to load files and symbols.
+68 −3
Diff settings

Always

Just for now

Viewing a subset of changes. View all
Prev

sql: add merge joiner benchmark and memory account shrinking optimiza…

…tion

Merge joiner benchmark comparison against `release-2.1`

```
name                           old time/op    new time/op    delta
MergeJoiner/InputSize=0-8        4.31µs ±10%    4.33µs ±15%      ~     (p=0.968 n=10+9)
MergeJoiner/InputSize=4-8        8.34µs ± 3%    8.10µs ± 4%      ~     (p=0.074 n=8+9)
MergeJoiner/InputSize=16-8       16.3µs ± 2%    11.1µs ± 5%   -31.87%  (p=0.000 n=9+9)
MergeJoiner/InputSize=256-8       190µs ± 2%      85µs ± 2%   -55.04%  (p=0.000 n=10+8)
MergeJoiner/InputSize=4096-8     2.96ms ± 2%    1.28ms ± 2%   -56.86%  (p=0.000 n=10+10)
MergeJoiner/InputSize=65536-8    49.0ms ± 5%    20.6ms ± 1%   -57.88%  (p=0.000 n=10+10)

name                           old alloc/op   new alloc/op   delta
MergeJoiner/InputSize=0-8        6.42kB ± 0%    6.65kB ± 0%    +3.49%  (p=0.000 n=10+10)
MergeJoiner/InputSize=4-8        9.50kB ± 0%    9.72kB ± 0%    +2.36%  (p=0.000 n=10+10)
MergeJoiner/InputSize=16-8       9.50kB ± 0%    9.72kB ± 0%    +2.36%  (p=0.000 n=10+10)
MergeJoiner/InputSize=256-8      32.5kB ± 0%    32.8kB ± 0%    +0.69%  (p=0.000 n=10+10)
MergeJoiner/InputSize=4096-8      401kB ± 0%     401kB ± 0%    +0.06%  (p=0.000 n=9+10)
MergeJoiner/InputSize=65536-8    6.30MB ± 0%    6.30MB ± 0%    +0.00%  (p=0.000 n=9+10)

name                           old allocs/op  new allocs/op  delta
MergeJoiner/InputSize=0-8          14.0 ± 0%      15.0 ± 0%    +7.14%  (p=0.000 n=10+10)
MergeJoiner/InputSize=4-8          16.0 ± 0%      17.0 ± 0%    +6.25%  (p=0.000 n=10+10)
MergeJoiner/InputSize=16-8         16.0 ± 0%      17.0 ± 0%    +6.25%  (p=0.000 n=10+10)
MergeJoiner/InputSize=256-8        46.0 ± 0%      47.0 ± 0%    +2.17%  (p=0.000 n=10+10)
MergeJoiner/InputSize=4096-8        526 ± 0%       527 ± 0%    +0.19%  (p=0.000 n=10+10)
MergeJoiner/InputSize=65536-8     8.21k ± 0%     8.21k ± 0%    +0.01%  (p=0.000 n=10+10)

name                           old speed      new speed      delta
MergeJoiner/InputSize=4-8      7.67MB/s ± 3%  7.91MB/s ± 4%      ~     (p=0.070 n=8+9)
MergeJoiner/InputSize=16-8     15.7MB/s ± 2%  23.0MB/s ± 5%   +46.89%  (p=0.000 n=9+9)
MergeJoiner/InputSize=256-8    21.6MB/s ± 2%  48.0MB/s ± 2%  +122.41%  (p=0.000 n=10+8)
MergeJoiner/InputSize=4096-8   22.1MB/s ± 2%  51.3MB/s ± 2%  +131.81%  (p=0.000 n=10+10)
MergeJoiner/InputSize=65536-8  21.4MB/s ± 5%  50.8MB/s ± 1%  +137.16%  (p=0.000 n=10+10)
```

Release note: None
  • Loading branch information...
changangela committed Oct 10, 2018
commit 17d7dc27af4868ebecff2c15df85674aad2c2d9b
@@ -17,6 +17,7 @@ package distsqlrun
import (
"context"
"fmt"
"math"
"testing"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
@@ -871,4 +872,43 @@ func BenchmarkMergeJoiner(b *testing.B) {
}
})
}
for _, inputSize := range []int{0, 1 << 2, 1 << 4, 1 << 8, 1 << 12, 1 << 16} {
numRepeats := inputSize
b.Run(fmt.Sprintf("OneSideRepeatInputSize=%d", inputSize), func(b *testing.B) {
leftInput := NewRepeatableRowSource(oneIntCol, makeIntRows(inputSize, numCols))
rightInput := NewRepeatableRowSource(oneIntCol, makeRepeatedIntRows(numRepeats, inputSize, numCols))
b.SetBytes(int64(8 * inputSize * numCols * 2))
b.ResetTimer()
for i := 0; i < b.N; i++ {
m, err := newMergeJoiner(flowCtx, 0 /* processorID */, spec, leftInput, rightInput, post, disposer)
if err != nil {
b.Fatal(err)
}
m.Run(context.Background(), nil /* wg */)
leftInput.Reset()
rightInput.Reset()
}
})
}
for _, inputSize := range []int{0, 1 << 2, 1 << 4, 1 << 8, 1 << 12, 1 << 16} {
numRepeats := int(math.Sqrt(float64(inputSize)))
b.Run(fmt.Sprintf("BothSidesRepeatInputSize=%d", inputSize), func(b *testing.B) {
row := makeRepeatedIntRows(100, numRepeats, numCols)
leftInput := NewRepeatableRowSource(oneIntCol, row)
rightInput := NewRepeatableRowSource(oneIntCol, row)
b.SetBytes(int64(8 * inputSize * numCols * 2))
b.ResetTimer()
for i := 0; i < b.N; i++ {
m, err := newMergeJoiner(flowCtx, 0 /* processorID */, spec, leftInput, rightInput, post, disposer)
if err != nil {
b.Fatal(err)
}
m.Run(context.Background(), nil /* wg */)
leftInput.Reset()
rightInput.Reset()
}
})
}
}
@@ -117,7 +117,7 @@ func (s *streamGroupAccumulator) nextGroup(
n := len(s.curGroup)
ret := s.curGroup[:n:n]
s.curGroup = s.curGroup[:0]
s.memAcc.Clear(evalCtx.Ctx())
s.memAcc.Empty(evalCtx.Ctx())
s.leftoverRow = row
return ret, nil
}
@@ -311,6 +311,19 @@ func makeRandIntRows(rng *rand.Rand, numRows int, numCols int) sqlbase.EncDatumR
return rows
}
// makeRepeatedIntRows constructs a numRows x numCols table where blocks of n
// consecutive rows have the same value.
func makeRepeatedIntRows(n int, numRows int, numCols int) sqlbase.EncDatumRows {
rows := make(sqlbase.EncDatumRows, numRows)
for i := range rows {
rows[i] = make(sqlbase.EncDatumRow, numCols)
for j := 0; j < numCols; j++ {
rows[i][j] = intEncDatum(i/n + j)
}
}
return rows
}
// runProcessorTest instantiates a processor with the provided spec, runs it
// with the given inputs, and asserts that the outputted rows are as expected.
func runProcessorTest(
@@ -40,7 +40,7 @@ ALTER TABLE kw EXPERIMENTAL_RELOCATE SELECT ARRAY[i], i FROM generate_series(1,
query T
SELECT url FROM [EXPLAIN ANALYZE (DISTSQL) SELECT kv.k, avg(kw.k) FROM kv JOIN kw ON kv.k=kw.k GROUP BY kv.k]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzcmFFvo0YUhd_7K9A8pVoqMwN2HKRKbPuUdmNX2eShqqyIhFsHrW2sAXc3WuW_V4Bb2-DMZcJ4ZpI3GwP343Dn-Mz9TlZZApN4CTkJ_yKUuIQRl_jEJQFxyZDMXLLm2QPkecbLU-oLLpNvJPRckq7Wm6I8PHPJQ8aBhN9JkRYLICG5ie8XcA1xAnzgEZckUMTpoiqz5uky5k_Rl3-IS6abInSisjLPvuYOhzgJnfJrXsSLhVOkSwgdLyezZ5dkm2JbcFfn_sl5jPPHwwoRJbPnWXWPOZCQPruvAx-9AP5VEzh7EXx3n4wnwCFp3udDWbjTWUc0uAI-h9-ydAV8QBtvbwF_F2cR_fDjzzydP9Yfd3K4UdlA5TlOQ5bqWEMbl1T3aJ5aH2ydu4y_OUtYZvzJ2eSQhA7znN_TXxoK79TzZV77x_mcwzwuMj6gw9ZLccm0FnD7yj9O_rybTG_uJrefPp1FtBTg8-3VWcTKT79Obyc328_i3jjyTPTYM8l1TdCva8TiMK-fOJ9vr-4uS3n88ts1rBLgVeM4ERtEvjLJdnIMO8ixWR0T5KgWk-ynbD1gw8aZx2uPDmrT7s5DrbJMCfBzqyyTmrFM2t8yve6W6UlYpud09Uvkne_75egkful1WvztB-pjltL9gpgl1WiWr9XrJadk3Rc9s8qtJMDHVrkVM-NW7B24FfLO993q_N24lXS_IG7F3rBb-d0XvW-VW0mAX1jlVr4Zt_L7u1XQ3a0CCbdC9iG-zGvfN6zxSQwrMLMdle4axLN8jZ7VQ7KXbCvovvoDq2xLAnxolW0FZmwreAchC3nn-5518W5ClnS_IIYVvOGQhcxQryFfZ6scDp9QZmHvCnuljpDModY9zzb8Af7g2UNFUX-dVtdVu_YE8qL-ldVfLlf__ZQXcbEHkm0K2ErTVmJ7MEnzL_8fqhyge_2x6vouuX8qIHdyWBWv4KG-bUAdFaK6gJh-hahEC7MTtLC4_lh1_Z56NFrYAqCOCilsYaRj9CvEmkDePtAhj9e82Bc-TaDaIFrw4vrU024ICNDQMiBmm0JMv0KBcAE0eJoXD4UXs9Hh8jnBP8BIb4hC1RTzKA9VkvUNhCgESH-IQjpGv0LnekNUTx7loUqyvoEQhQDpD1FIx-hXaCz8G7gQh6gLFSFK4cOIedSHKlkA_RFBDGQgRCFA-hWira25KEXtAWH3ldlha3lQmS23BUDqE480gf6Ig3WNgUFRa5ttOOQgQAZGR2Ig9bFHmsDAsAjpGgPztNb2_cDnaSCOOrS12TacdRAgAxMkjEj5f7ssgYG4gxEZyDviwc_r845t8xwEyEDesW7CgxEpzz_SXWNAE9uGOgiQgbxj3ZgHI1Kef6S7xoAm4skORUY71LbZDgJkIu9YN-1BiLrmn6DHOrBuvsNONN9hSuY7CrdcCJD-vIMAGcg7GJF1GqnPP20CJfMdlX1s23wHATKQdzAi6zRSn3_aBOL5DkPmO8y2-Q4CZCDvYEQG_tutm_dgRKefgTFV853Z8w__BgAA__-qGmsT
https://cockroachdb.github.io/distsqlplan/decode.html#eJzcmFFvo0YUhd_7K9A8pVoqMwN2HKRKbPuUdmNX2eShqqyIhFsHrW2sAXc3WuW_V4Bb2-DMZcJ4ZpI3GwP343Dn-Mz9TlZZApN4CTkJ_yKUuIQRl_jEJQFxyZDMXLLm2QPkecbLU-oLLpNvJPRckq7Wm6I8PHPJQ8aBhN9JkRYLICG5ie8XcA1xAnzgEZckUMTpoiqz5uky5k_Rl3-IS6abInSisjLPvuYOhzgJnfJrXsSLhVOkSwgdLyezZ5dkm2JbcFfn_sl5jPPHwwoRJbPnWXWPOZCQPruvAx-9AP5VEzh7EXx3n4wnwCFp3udDWbjTWUc0uAI-h9-ydAV8QBtvbwF_F2cR_fDjzzydP9Yfd3K4UdlA5TlOQ5bqWEMbl1T3aJ5aH2ydu4y_OUtYZvzJ2eSQhA7znN_TXxoK79TzZV77x_mcwzwuMj6gw9ZLccm0FnD7yj9O_rybTG_uJrefPp1FtBTg8-3VWcTKT79Obyc328_i3jjyTPTYM8l1TdCva8TiMK-fOJ9vr-4uS3n88ts1rBLgVeM4ERtEvjLJdnIMO8ixWR0T5KgWk-ynbD1gw8aZx2uPDmrT7s5DrbJMCfBzqyyTmrFM2t8yve6W6UlYpud09Uvkne_75egkful1WvztB-pjltL9gpgl1WiWr9XrJadk3Rc9s8qtJMDHVrkVM-NW7B24FfLO993q_N24lXS_IG7F3rBb-d0XvW-VW0mAX1jlVr4Zt_L7u1XQ3a2CU21Hkde-b1jjkxhWYGY7Kt01iGf5Gj2rh2Qv2VbQffUHVtmWBPjQKtsKzNhW8A5CFvLO9z3r4t2ELOl-QQwreMMhC5mhXkO-zlY5HD6hzMLeFfZKHSGZQ617nm34A_zBs4eKov46ra6rdu0J5EX9K6u_XK7--ykv4mIPJNsUsJWmrcT2YJLmX_4_VDlA9_pj1fVdcv9UQO7ksCpewUN924A6KkR1ATH9ClGJFmYnaGFx_bHq-j31aLSwBUAdFVLYwkjH6FeINYG8faBDHq95sS98mkC1QbTgxfWpp90QEKChZUDMNoWYfoUC4QJo8DQvHgovZqPD5XOCf4CR3hCFqinmUR6qJOsbCFEIkP4QhXSMfoXO9YaonjzKQ5VkfQMhCgHSH6KQjtGv0Fj4N3AhDlEXKkKUwocR86gPVbIA-iOCGMhAiEKA9CtEW1tzUYraA8LuK7PD1vKgMltuC4DUJx5pAv0RB-saA4Oi1jbbcMhBgAyMjsRA6mOPNIGBYRHSNQbmaa3t-4HP00AcdWhrs2046yBABiZIGJHy_3ZZAgNxByMykHfEg5_X5x3b5jkIkIG8Y92EByNSnn-ku8aAJrYNdRAgA3nHujEPRqQ8_0h3jQFNxJMdiox2qG2zHQTIRN6xbtqDEHXNP0GPdWDdfIedaL7DlMx3FG65ECD9eQcBMpB3MCLrNFKff9oESuY7KvvYtvkOAmQg72BE1mmkPv-0CcTzHYbMd5ht8x0EyEDewYgM_LdbN-_BiE4_A2Oq5juz5x_-DQAA___eamsU
# This query verifies stats collection for the hashJoiner, distinct and sorter.
query T
@@ -477,6 +477,18 @@ func (mm *BytesMonitor) MakeBoundAccount() BoundAccount {
return BoundAccount{mon: mm}
}
// Empty shrinks the account to use 0 bytes. Previously used memory is returned
// to the reserved buffer, which is subsequently released such that at most
// poolAllocationSize is reserved.
func (b *BoundAccount) Empty(ctx context.Context) {
b.reserved += b.used
b.used = 0
if b.reserved > b.mon.poolAllocationSize {
b.mon.releaseBytes(ctx, b.reserved-b.mon.poolAllocationSize)
b.reserved = b.mon.poolAllocationSize
}
}
// Clear releases all the cumulated allocations of an account at once and
// primes it for reuse.
func (b *BoundAccount) Clear(ctx context.Context) {
@@ -554,7 +566,7 @@ func (b *BoundAccount) Shrink(ctx context.Context, delta int64) {
}
b.used -= delta
b.reserved += delta
if b.reserved >= b.mon.poolAllocationSize {
if b.reserved > b.mon.poolAllocationSize {
b.mon.releaseBytes(ctx, b.reserved-b.mon.poolAllocationSize)
b.reserved = b.mon.poolAllocationSize
}
ProTip! Use n and p to navigate between commits in a pull request.