Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed bug in alias resolution for aliases defined in group by and used in order by #501

Merged
merged 6 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 25 additions & 48 deletions enginetest/memory_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,18 @@ func TestQueriesSimple(t *testing.T) {

// Convenience test for debugging a single query. Unskip and set to the desired query.
func TestSingleQuery(t *testing.T) {
// t.Skip()
t.Skip()

var test enginetest.QueryTest
test = enginetest.QueryTest{
Query: `SELECT i FROM mytable mt
WHERE (SELECT i FROM mytable where i = mt.i and i > 2) IS NOT NULL
AND (SELECT i2 FROM othertable where i2 = i) IS NOT NULL
ORDER BY i`,
Query: "SELECT floor(i), avg(char_length(s)) FROM mytable mt group by 1 ORDER BY floor(i) DESC",
Expected: []sql.Row{
{3},
{3, 9.0},
{2, 10.0},
{1, 9.0},
},
}

fmt.Sprintf("%v", test)
harness := enginetest.NewMemoryHarness("", 2, testNumPartitions, false, nil)
engine := enginetest.NewEngine(t, harness)
Expand All @@ -120,52 +120,29 @@ func TestSingleScript(t *testing.T) {

var scripts = []enginetest.ScriptTest{
{
Name: "Nested Subquery projections (NTC)",
Name: "Issue #499",
SetUpScript: []string{
`CREATE TABLE dcim_site (id char(32) NOT NULL,created date,last_updated datetime,_custom_field_data json NOT NULL,name varchar(100) NOT NULL,_name varchar(100) NOT NULL,slug varchar(100) NOT NULL,facility varchar(50) NOT NULL,asn bigint,time_zone varchar(63) NOT NULL,description varchar(200) NOT NULL,physical_address varchar(200) NOT NULL,shipping_address varchar(200) NOT NULL,latitude decimal(8,6),longitude decimal(9,6),contact_name varchar(50) NOT NULL,contact_phone varchar(20) NOT NULL,contact_email varchar(254) NOT NULL,comments longtext NOT NULL,region_id char(32),status_id char(32),tenant_id char(32),PRIMARY KEY (id),KEY dcim_site_region_id_45210932 (region_id),KEY dcim_site_status_id_e6a50f56 (status_id),KEY dcim_site_tenant_id_15e7df63 (tenant_id),UNIQUE KEY name (name),UNIQUE KEY slug (slug)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;`,
`CREATE TABLE dcim_rackgroup (id char(32) NOT NULL,created date,last_updated datetime,_custom_field_data json NOT NULL,name varchar(100) NOT NULL,slug varchar(100) NOT NULL,description varchar(200) NOT NULL,lft int unsigned NOT NULL,rght int unsigned NOT NULL,tree_id int unsigned NOT NULL,level int unsigned NOT NULL,parent_id char(32),site_id char(32) NOT NULL,PRIMARY KEY (id),KEY dcim_rackgroup_parent_id_cc315105 (parent_id),KEY dcim_rackgroup_site_id_13520e89 (site_id),KEY dcim_rackgroup_slug_3f4582a7 (slug),KEY dcim_rackgroup_tree_id_9c2ad6f4 (tree_id),UNIQUE KEY site_idname (site_id,name),UNIQUE KEY site_idslug (site_id,slug),CONSTRAINT dcim_rackgroup_parent_id_cc315105_fk_dcim_rackgroup_id FOREIGN KEY (parent_id) REFERENCES dcim_rackgroup (id),CONSTRAINT dcim_rackgroup_site_id_13520e89_fk_dcim_site_id FOREIGN KEY (site_id) REFERENCES dcim_site (id)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;`,
`CREATE TABLE dcim_rack (id char(32) NOT NULL,created date,last_updated datetime,_custom_field_data json NOT NULL,name varchar(100) NOT NULL,_name varchar(100) NOT NULL,facility_id varchar(50),serial varchar(50) NOT NULL,asset_tag varchar(50),type varchar(50) NOT NULL,width smallint unsigned NOT NULL,u_height smallint unsigned NOT NULL,desc_units tinyint NOT NULL,outer_width smallint unsigned,outer_depth smallint unsigned,outer_unit varchar(50) NOT NULL,comments longtext NOT NULL,group_id char(32),role_id char(32),site_id char(32) NOT NULL,status_id char(32),tenant_id char(32),PRIMARY KEY (id),UNIQUE KEY asset_tag (asset_tag),KEY dcim_rack_group_id_44e90ea9 (group_id),KEY dcim_rack_role_id_62d6919e (role_id),KEY dcim_rack_site_id_403c7b3a (site_id),KEY dcim_rack_status_id_ee3dee3e (status_id),KEY dcim_rack_tenant_id_7cdf3725 (tenant_id),UNIQUE KEY group_idfacility_id (group_id,facility_id),UNIQUE KEY group_idname (group_id,name),CONSTRAINT dcim_rack_group_id_44e90ea9_fk_dcim_rackgroup_id FOREIGN KEY (group_id) REFERENCES dcim_rackgroup (id),CONSTRAINT dcim_rack_site_id_403c7b3a_fk_dcim_site_id FOREIGN KEY (site_id) REFERENCES dcim_site (id)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;`,
`INSERT INTO dcim_site (id, created, last_updated, _custom_field_data, status_id, name, _name, slug, region_id, tenant_id, facility, asn, time_zone, description, physical_address, shipping_address, latitude, longitude, contact_name, contact_phone, contact_email, comments) VALUES ('f0471f313b694d388c8ec39d9590e396', '2021-05-20', '2021-05-20 18:51:46.416695', '{}', NULL, 'Site 1', 'Site 00000001', 'site-1', NULL, NULL, '', NULL, '', '', '', '', NULL, NULL, '', '', '', '')`,
`INSERT INTO dcim_site (id, created, last_updated, _custom_field_data, status_id, name, _name, slug, region_id, tenant_id, facility, asn, time_zone, description, physical_address, shipping_address, latitude, longitude, contact_name, contact_phone, contact_email, comments) VALUES ('442bab8b517149ab87207e8fb5ba1569', '2021-05-20', '2021-05-20 18:51:47.333720', '{}', NULL, 'Site 2', 'Site 00000002', 'site-2', NULL, NULL, '', NULL, '', '', '', '', NULL, NULL, '', '', '', '')`,
`INSERT INTO dcim_rack (id,created,last_updated,_custom_field_data,name,_name,facility_id,serial,asset_tag,type,width,u_height,desc_units,outer_width,outer_depth,outer_unit,comments,group_id,role_id,site_id,status_id,tenant_id) VALUES ('abc123', '2021-05-20', '2021-05-20 18:51:48.150116', '{}', "name", "name", "facility", "serial", "assettag", "type", 1, 1, 1, 1, 1, "outer units", "comment", "6bc0d9b1affe46918b09911359241db6", "role", "site", "status", "tenant")`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('5c107f979f434bf7a7820622f18a5211', '2021-05-20', '2021-05-20 18:51:48.150116', '{}', 'Parent Rack Group 1', 'parent-rack-group-1', 'f0471f313b694d388c8ec39d9590e396', NULL, '', 1, 2, 1, 0)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('6707c20336a2406da6a9d394477f7e8c', '2021-05-20', '2021-05-20 18:51:48.969713', '{}', 'Parent Rack Group 2', 'parent-rack-group-2', '442bab8b517149ab87207e8fb5ba1569', NULL, '', 1, 2, 2, 0)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('6bc0d9b1affe46918b09911359241db6', '2021-05-20', '2021-05-20 18:51:50.566160', '{}', 'Rack Group 1', 'rack-group-1', 'f0471f313b694d388c8ec39d9590e396', '5c107f979f434bf7a7820622f18a5211', '', 2, 3, 1, 1)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('a773cac9dc9842228cdfd8c97a67136e', '2021-05-20', '2021-05-20 18:51:52.126952', '{}', 'Rack Group 2', 'rack-group-2', 'f0471f313b694d388c8ec39d9590e396', '5c107f979f434bf7a7820622f18a5211', '', 4, 5, 1, 1)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('a35a843eb181404bb9da2126c6580977', '2021-05-20', '2021-05-20 18:51:53.706000', '{}', 'Rack Group 3', 'rack-group-3', 'f0471f313b694d388c8ec39d9590e396', '5c107f979f434bf7a7820622f18a5211', '', 6, 7, 1, 1)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('f09a02c95b064533b823e25374f5962a', '2021-05-20', '2021-05-20 18:52:03.037056', '{}', 'Test Rack Group 4', 'test-rack-group-4', '442bab8b517149ab87207e8fb5ba1569', '6707c20336a2406da6a9d394477f7e8c', '', 2, 3, 2, 1)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('ecff5b528c5140d4a58f1b24a1c80ebc', '2021-05-20', '2021-05-20 18:52:05.390373', '{}', 'Test Rack Group 5', 'test-rack-group-5', '442bab8b517149ab87207e8fb5ba1569', '6707c20336a2406da6a9d394477f7e8c', '', 4, 5, 2, 1)`,
`INSERT INTO dcim_rackgroup (id, created, last_updated, _custom_field_data, name, slug, site_id, parent_id, description, lft, rght, tree_id, level) VALUES ('d31b3772910e4418bdd5725d905e2699', '2021-05-20', '2021-05-20 18:52:07.758547', '{}', 'Test Rack Group 6', 'test-rack-group-6', '442bab8b517149ab87207e8fb5ba1569', '6707c20336a2406da6a9d394477f7e8c', '', 6, 7, 2, 1)`,
"CREATE TABLE test (time TIMESTAMP, value DOUBLE);",
`INSERT INTO test VALUES
("2021-07-04 10:00:00", 1.0),
("2021-07-03 10:00:00", 2.0),
("2021-07-02 10:00:00", 3.0),
("2021-07-01 10:00:00", 4.0);`,
},
Assertions: []enginetest.ScriptTestAssertion{
{
Query: `SELECT
((
SELECT COUNT(*)
FROM dcim_rack
WHERE group_id
IN (
SELECT m2.id
FROM dcim_rackgroup m2
WHERE m2.tree_id = dcim_rackgroup.tree_id
AND m2.lft BETWEEN dcim_rackgroup.lft
AND dcim_rackgroup.rght
)
)) AS rack_count,
dcim_rackgroup.id,
dcim_rackgroup._custom_field_data,
dcim_rackgroup.name,
dcim_rackgroup.slug,
dcim_rackgroup.site_id,
dcim_rackgroup.parent_id,
dcim_rackgroup.description,
dcim_rackgroup.lft,
dcim_rackgroup.rght,
dcim_rackgroup.tree_id,
dcim_rackgroup.level
FROM dcim_rackgroup
order by 1 limit 1`,
Expected: []sql.Row{{0, "6707c20336a2406da6a9d394477f7e8c", sql.JSONDocument{Val: map[string]interface{}{}}, "Parent Rack Group 2", "parent-rack-group-2", "442bab8b517149ab87207e8fb5ba1569", interface{}(nil), "", uint64(1), uint64(2), uint64(2), uint64(0)}},
Query: `SELECT
UNIX_TIMESTAMP(time) DIV 60 * 60 AS "time",
avg(value) AS "value"
FROM test
GROUP BY 1
ORDER BY UNIX_TIMESTAMP(time) DIV 60 * 60`,
Expected: []sql.Row{
{1625133600, 4.0},
{1625220000, 3.0},
{1625306400, 2.0},
{1625392800, 1.0},
},
},
},
},
Expand Down
23 changes: 23 additions & 0 deletions enginetest/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,22 @@ var QueryTests = []QueryTest{
{1, "first row"},
},
},
{
Query: "SELECT floor(i), s FROM mytable mt ORDER BY floor(i) DESC",
Expected: []sql.Row{
{3, "third row"},
{2, "second row"},
{1, "first row"},
},
},
{
Query: "SELECT floor(i), avg(char_length(s)) FROM mytable mt group by 1 ORDER BY floor(i) DESC",
Expected: []sql.Row{
{3, 9.0},
{2, 10.0},
{1, 9.0},
},
},
{
Query: "SELECT i AS x FROM mytable ORDER BY x DESC",
Expected: []sql.Row{{3}, {2}, {1}},
Expand Down Expand Up @@ -1452,6 +1468,13 @@ var QueryTests = []QueryTest{
Query: "SELECT dt1.i FROM datetime_table dt1 join datetime_table dt2 on dt1.date_col = date(date_sub(dt2.timestamp_col, interval 2 day)) order by 1",
Expected: []sql.Row{{1}, {2}},
},
{
Query: "SELECT unix_timestamp(timestamp_col) div 60 * 60 as timestamp_col, avg(i) from datetime_table group by 1 order by unix_timestamp(timestamp_col) div 60 * 60",
Expected: []sql.Row{
{1577966400, 1.0},
{1578225600, 2.0},
{1578398400, 3.0}},
},
{
Query: "SELECT COUNT(*) FROM mytable;",
Expected: []sql.Row{{int64(3)}},
Expand Down
27 changes: 27 additions & 0 deletions enginetest/script_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,33 @@ var ScriptTests = []ScriptTest{
},
},
},
{
Name: "Issue #499",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tripped me up at first as I was getting different results when trying this locally on MySQL. I forgot that it interprets the given time on insertion as your local timezone, so setting my instance to UTC fixed it lol.

SetUpScript: []string{
"CREATE TABLE test (time TIMESTAMP, value DOUBLE);",
`INSERT INTO test VALUES
("2021-07-04 10:00:00", 1.0),
("2021-07-03 10:00:00", 2.0),
("2021-07-02 10:00:00", 3.0),
("2021-07-01 10:00:00", 4.0);`,
},
Assertions: []ScriptTestAssertion{
{
Query: `SELECT
UNIX_TIMESTAMP(time) DIV 60 * 60 AS "time",
avg(value) AS "value"
FROM test
GROUP BY 1
ORDER BY UNIX_TIMESTAMP(time) DIV 60 * 60`,
Expected: []sql.Row{
{1625133600, 4.0},
{1625220000, 3.0},
{1625306400, 2.0},
{1625392800, 1.0},
},
},
},
},
}

var CreateCheckConstraintsScripts = []ScriptTest{
Expand Down
39 changes: 29 additions & 10 deletions sql/analyzer/resolve_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,18 @@ func pushdownGroupByAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Sc
return n, nil
}

// replacedAliases is a map of original expression string to alias that has been pushed down below the GroupBy in
// the new projection node.
replacedAliases := make(map[string]string)
return plan.TransformUp(n, func(n sql.Node) (sql.Node, error) {
// For any Expressioner node above the GroupBy, we need to apply the same alias replacement as we did in the
// GroupBy itself.
ex, ok := n.(sql.Expressioner)
if ok && len(replacedAliases) > 0 {
newExprs := replaceExpressionsWithAliases(ex.Expressions(), replacedAliases)
return ex.WithExpressions(newExprs...)
}

g, ok := n.(*plan.GroupBy)
if n.Resolved() || !ok || len(g.GroupByExprs) == 0 {
return n, nil
Expand Down Expand Up @@ -683,7 +694,6 @@ func pushdownGroupByAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Sc
}

var newSelectedExprs []sql.Expression
replacements := make(map[string]string)
var projection []sql.Expression
// Aliases will keep the aliases that have been pushed down and their
// index in the new aggregate.
Expand All @@ -710,7 +720,7 @@ func pushdownGroupByAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Sc
delete(groupingColumns, name)

projection = append(projection, expr)
replacements[alias.Child.String()] = alias.Name()
replacedAliases[alias.Child.String()] = alias.Name()
newSelectedExprs = append(newSelectedExprs, expression.NewUnresolvedColumn(alias.Name()))
} else {
newSelectedExprs = append(newSelectedExprs, expr)
Expand All @@ -725,14 +735,8 @@ func pushdownGroupByAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Sc
// expressions with a reference to that alias. This is so that we can directly compare the group by an select
// expressions for validation, which requires us to know that (table.column as col) and (table.column) are the
// same expressions. So if we replace one, replace both.
var newGroupBys []sql.Expression
for _, expr := range g.GroupByExprs {
if alias, ok := replacements[expr.String()]; ok {
newGroupBys = append(newGroupBys, expression.NewUnresolvedColumn(alias))
} else {
newGroupBys = append(newGroupBys, expr)
}
}
// TODO: this is pretty fragile and relies on string matching, need a better solution
newGroupBys := replaceExpressionsWithAliases(g.GroupByExprs, replacedAliases)

// Instead of iterating columns directly, we want them sorted so the
// executions of the rule are consistent.
Expand Down Expand Up @@ -801,6 +805,21 @@ func pushdownGroupByAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Sc
})
}

// replaceExpressionsWithAliases replaces any expressions in the slice given that match the map of aliases given with
// their alias expression. This is necessary when pushing aliases down the tree, since we introduce a projection node
// that effectively erases the original columns of a table.
func replaceExpressionsWithAliases(exprs []sql.Expression, replacedAliases map[string]string) []sql.Expression {
var newExprs []sql.Expression
for _, expr := range exprs {
if alias, ok := replacedAliases[expr.String()]; ok {
newExprs = append(newExprs, expression.NewUnresolvedColumn(alias))
} else {
newExprs = append(newExprs, expr)
}
}
return newExprs
}

func findAllColumns(e sql.Expression) []string {
var cols []string
sql.Inspect(e, func(e sql.Expression) bool {
Expand Down