Skip to content

Commit

Permalink
Merge pull request #7065 from dolthub/nicktobey/perf
Browse files Browse the repository at this point in the history
Cache the computation of default columns during three-way merges.
  • Loading branch information
nicktobey committed Dec 4, 2023
2 parents aceb28c + c8ba567 commit e723577
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 8 deletions.
18 changes: 10 additions & 8 deletions go/libraries/doltcore/merge/merge_prolly_rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,12 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch

keyless := schema.IsKeyless(tm.leftSch)

pri, err := newPrimaryMerger(leftEditor, tm, valueMerger, finalSch, mergeInfo)
defaults, err := resolveDefaults(ctx, tm.name, finalSch, tm.leftSch)
if err != nil {
return nil, nil, err
}

pri, err := newPrimaryMerger(leftEditor, tm, valueMerger, finalSch, mergeInfo, defaults)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -1045,15 +1050,17 @@ type primaryMerger struct {
tableMerger *TableMerger
finalSch schema.Schema
mergeInfo MergeInfo
defaults []sql.Expression
}

func newPrimaryMerger(leftEditor *prolly.MutableMap, tableMerger *TableMerger, valueMerger *valueMerger, finalSch schema.Schema, mergeInfo MergeInfo) (*primaryMerger, error) {
func newPrimaryMerger(leftEditor *prolly.MutableMap, tableMerger *TableMerger, valueMerger *valueMerger, finalSch schema.Schema, mergeInfo MergeInfo, defaults []sql.Expression) (*primaryMerger, error) {
return &primaryMerger{
mut: leftEditor,
valueMerger: valueMerger,
tableMerger: tableMerger,
finalSch: finalSch,
mergeInfo: mergeInfo,
defaults: defaults,
}, nil
}

Expand Down Expand Up @@ -1150,13 +1157,8 @@ func (m *primaryMerger) merge(ctx *sql.Context, diff tree.ThreeWayDiff, sourceSc
return fmt.Errorf("cannot merge keyless tables with reordered columns")
}
} else {
defaults, err := resolveDefaults(ctx, m.tableMerger.name, m.finalSch, m.tableMerger.leftSch)
if err != nil {
return err
}

tempTupleValue, err := remapTupleWithColumnDefaults(ctx, diff.Key, newTupleValue, sourceSch.GetValueDescriptor(),
m.valueMerger.leftMapping, m.tableMerger, m.tableMerger.leftSch, m.finalSch, defaults, m.valueMerger.syncPool, false)
m.valueMerger.leftMapping, m.tableMerger, m.tableMerger.leftSch, m.finalSch, m.defaults, m.valueMerger.syncPool, false)
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions integration-tests/bats/performance-repo/.dolt/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5:__DOLT__:4t8rtld9ul7l137jac93phj1o6qinq6o:8ep2ilbq0lb461mqibkqp80nd0skdend:00000000000000000000000000000000:vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv:3722
Binary file not shown.
6 changes: 6 additions & 0 deletions integration-tests/bats/performance-repo/.dolt/repo_state.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"head": "refs/heads/full",
"remotes": {},
"backups": {},
"branches": {}
}
153 changes: 153 additions & 0 deletions integration-tests/bats/performance.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash

# This BATS test attempts to detect performance regressions when using standard workflows on large datasets.
# Please note that this is a rough approach that is not designed to detect all performance issues, merely an extra
# safeguard against bugs that cause large (order-of-magnitude+) regressions.

# BATS_TEST_TIMEOUT is measured in seconds and is chosen to be high enough that all tests in this suite pass
# when running on GitHub's CI, but low enough that an order-of magnitude regression will cause them to fail.
BATS_TEST_TIMEOUT=50

# This function was used to create the dolt repo used for this test. It is not run during testing.
create_repo() {
dolt init
dolt checkout -b full

dolt sql -q 'create table t (pk int primary key, c0 text default "1", c3 text default "2", c4 text default "3", c5 text default "4", c6 text default "5", c7 text default "6");'
dolt commit -Am "new table t"

echo "insert into t(pk) values" > import.sql
for i in {1..100000}
do
echo " ($i)," >> import.sql
done
echo " (104857);" >> import.sql

dolt sql < import.sql

dolt add .
dolt commit -m "Add all rows"
}

setup() {
cp -r $BATS_TEST_DIRNAME/performance-repo/ $BATS_TMPDIR/dolt-repo-$$
cd $BATS_TMPDIR/dolt-repo-$$
}

@test "performance: merge with no schema change and no conflict" {
dolt checkout full
dolt checkout -b mod2
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 2 = 0"

dolt add .
dolt commit -m "Add mod2 rows"

dolt checkout full
dolt checkout -b mod3
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 3 = 0"

dolt add .
dolt commit -m "Add mod3 rows"

run dolt merge mod2
log_status_eq 0
}

@test "performance: merge with no schema change and conflict" {
dolt checkout full
dolt checkout -b mod2
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 2 = 0"

dolt add .
dolt commit -m "Add mod2 rows"

dolt checkout full
dolt checkout -b mod3
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 3 = 0"
dolt sql -q 'update t set c0 = "conflict" where pk = 91'

dolt add .
dolt commit -m "Add mod3 rows"

run dolt merge mod2
log_status_eq 1
[[ "$output" =~ "Merge conflict in t" ]] || false

dolt conflicts resolve --ours t
BATS_TEST_TIMEOUT=1
}


@test "performance: merge with schema change and no conflict" {
dolt checkout full
dolt checkout -b mod2
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 2 = 0"

dolt add .
dolt commit -m "Add mod2 rows"

dolt sql -q "alter table t add column c1 int default 1"
dolt add .
dolt commit -m "Add column c1"

dolt checkout full
dolt checkout -b mod3
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 3 = 0"

dolt add .
dolt commit -m "Add mod3 rows"

dolt sql -q "alter table t add column c2 int default 2"
dolt add .
dolt commit -m "Add column c2"

run dolt merge mod2
log_status_eq 0
}

@test "performance: merge with schema change and conflict" {
dolt checkout full
dolt checkout -b mod2
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 2 = 0"

dolt add .
dolt commit -m "Add mod2 rows"

dolt sql -q "alter table t add column c1 int default 1"
dolt add .
dolt commit -m "Add column c1"

dolt checkout full
dolt checkout -b mod3
dolt reset --soft HEAD^

dolt sql -q "delete from t where pk % 3 = 0"
dolt sql -q 'update t set c0 = "conflict" where pk = 91'

dolt add .
dolt commit -m "Add mod3 rows"

dolt sql -q "alter table t add column c2 int default 2"
dolt add .
dolt commit -m "Add column c2"

run dolt merge mod2
log_status_eq 1

dolt conflicts resolve --ours t
}

0 comments on commit e723577

Please sign in to comment.