Skip to content

Commit

Permalink
Change IN/NOT IN handling of empty arrays (Fixes #427)
Browse files Browse the repository at this point in the history
Previously, Sequel was inconsistent in it's handling of empty arrays
used with IN/NOT IN.  It would try to handle NULLs correctly in the
IN case, but not in the NOT IN case.  This makes correct NULL
handling the default in both cases.

The actual behavior difference is that now in the NOT IN case,
if the left hand side is NULL, the result of the expression is
NULL instead of true.

However, the correct NULL handling behavior can cause poor
performance on databases with a lousy optimizer, such as MySQL.
So there is a Sequel.empty_array_handle_nulls setting to change
the behavior so it performs well, but may not handle NULLs
correctly.

Before:

  {:b => []}
  # (b != b)
  ~{:b => []}
  # (1 = 1)

New Default:

  {:b => []}
  # (b != b)
  ~{:b => []}
  # (b = b)

Sequel.empty_array_handle_nulls = false:

  {:b => []}
  # (1 = 0)
  ~{:b => []}
  # (1 = 1)

"Correct" here is actually an opinion, not a fact.  You could
certainly argue that the static true/false behavior makes more
logic sense.  However, the default empty array behavior is
certainly more consistent with the behavior in the non-empty
array case.
  • Loading branch information
jeremyevans committed Jan 27, 2012
1 parent 527f5ad commit 4dec148
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 34 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
@@ -1,5 +1,9 @@
=== HEAD

* Add Sequel.empty_array_handle_nulls setting, can be set to false for possible better performance on some databases (jeremyevans)

* Change exclude(:b=>[]) to not return rows where b is NULL (jeremyevans) (#427)

* Support ActiveModel 3.2 in the active_model plugin, by adding support for to_partial_path (jeremyevans)

* Fix metadata methods (e.g. tables) on Oracle when custom identifier input methods are used (jeremyevans)
Expand Down
22 changes: 22 additions & 0 deletions lib/sequel/core.rb
Expand Up @@ -25,6 +25,7 @@
module Sequel
@convert_two_digit_years = true
@datetime_class = Time
@empty_array_handle_nulls = true
@virtual_row_instance_eval = true
@require_thread = nil

Expand Down Expand Up @@ -54,6 +55,27 @@ class << self
# days on +DateTime+).
attr_accessor :datetime_class

# Sets whether or not to attempt to handle NULL values correctly when given
# an empty array. By default:
#
# DB[:a].filter(:b=>[])
# # SELECT * FROM a WHERE (b != b)
# DB[:a].exclude(:b=>[])
# # SELECT * FROM a WHERE (b = b)
#
# However, some databases (e.g. MySQL) will perform very poorly
# with this type of query. You can set this to false to get the
# following behavior:
#
# DB[:a].filter(:b=>[])
# # SELECT * FROM a WHERE 1 = 0
# DB[:a].exclude(:b=>[])
# # SELECT * FROM a WHERE 1 = 1
#
# This may not handle NULLs correctly, but can be much faster on
# some databases.
attr_accessor :empty_array_handle_nulls

# For backwards compatibility, has no effect.
attr_accessor :virtual_row_instance_eval

Expand Down
45 changes: 21 additions & 24 deletions lib/sequel/dataset/sql.rb
Expand Up @@ -419,14 +419,10 @@ def complex_expression_sql_append(sql, op, args)
val_array = true
empty_val_array = vals == []
end
if col_array
if empty_val_array
if op == :IN
literal_append(sql, SQL::BooleanExpression.from_value_pairs(cols.to_a.map{|x| [x, x]}, :AND, true))
else
literal_append(sql, 1=>1)
end
elsif !supports_multiple_column_in?
if empty_val_array
literal_append(sql, empty_array_value(op, cols))
elsif col_array
if !supports_multiple_column_in?
if val_array
expr = SQL::BooleanExpression.new(:OR, *vals.to_a.map{|vs| SQL::BooleanExpression.from_value_pairs(cols.to_a.zip(vs).map{|c, v| [c, v]})})
literal_append(sql, op == :IN ? expr : ~expr)
Expand All @@ -452,19 +448,11 @@ def complex_expression_sql_append(sql, op, args)
sql << PAREN_CLOSE
end
else
if empty_val_array
if op == :IN
literal_append(sql, SQL::BooleanExpression.from_value_pairs([[cols, cols]], :AND, true))
else
literal_append(sql, 1=>1)
end
else
sql << PAREN_OPEN
literal_append(sql, cols)
sql << SPACE << op.to_s << SPACE
literal_append(sql, vals)
sql << PAREN_CLOSE
end
sql << PAREN_OPEN
literal_append(sql, cols)
sql << SPACE << op.to_s << SPACE
literal_append(sql, vals)
sql << PAREN_CLOSE
end
when *TWO_ARITY_OPERATORS
sql << PAREN_OPEN
Expand Down Expand Up @@ -856,6 +844,11 @@ def dataset_alias(number)
:"#{DATASET_ALIAS_BASE_NAME}#{number}"
end

# The strftime format to use when literalizing the time.
def default_timestamp_format
requires_sql_standard_datetimes? ? STANDARD_TIMESTAMP_FORMAT : TIMESTAMP_FORMAT
end

# The order of methods to call to build the DELETE SQL statement
def delete_clause_methods
DELETE_CLAUSE_METHODS
Expand All @@ -877,9 +870,13 @@ def expression_list_append(sql, columns)
end
end

# The strftime format to use when literalizing the time.
def default_timestamp_format
requires_sql_standard_datetimes? ? STANDARD_TIMESTAMP_FORMAT : TIMESTAMP_FORMAT
def empty_array_value(op, cols)
if Sequel.empty_array_handle_nulls
c = Array(cols)
SQL::BooleanExpression.from_value_pairs(c.zip(c), :AND, op == :IN)
else
{1 => ((op == :IN) ? 0 : 1)}
end
end

# Format the timestamp based on the default_timestamp_format, with a couple
Expand Down
42 changes: 35 additions & 7 deletions spec/core/dataset_spec.rb
Expand Up @@ -495,29 +495,42 @@ def v.values; {}; end
"SELECT * FROM test WHERE (gdp > (SELECT avg(gdp) FROM test WHERE (region = 'Asia')))"
end

specify "should handle all types of IN/NOT IN queries with empty arrays" do
@dataset.filter(:id => []).sql.should == "SELECT * FROM test WHERE (id != id)"
@dataset.filter([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE ((id1 != id1) AND (id2 != id2))"
@dataset.exclude(:id => []).sql.should == "SELECT * FROM test WHERE (id = id)"
@dataset.exclude([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE ((id1 = id1) AND (id2 = id2))"
end

specify "should handle all types of IN/NOT IN queries with empty arrays" do
begin
Sequel.empty_array_handle_nulls = false
@dataset.filter(:id => []).sql.should == "SELECT * FROM test WHERE (1 = 0)"
@dataset.filter([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE (1 = 0)"
@dataset.exclude(:id => []).sql.should == "SELECT * FROM test WHERE (1 = 1)"
@dataset.exclude([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE (1 = 1)"
ensure
Sequel.empty_array_handle_nulls = true
end
end

specify "should handle all types of IN/NOT IN queries" do
@dataset.filter(:id => @d1.select(:id)).sql.should == "SELECT * FROM test WHERE (id IN (SELECT id FROM test WHERE (region = 'Asia')))"
@dataset.filter(:id => []).sql.should == "SELECT * FROM test WHERE (id != id)"
@dataset.filter(:id => [1, 2]).sql.should == "SELECT * FROM test WHERE (id IN (1, 2))"
@dataset.filter([:id1, :id2] => @d1.select(:id1, :id2)).sql.should == "SELECT * FROM test WHERE ((id1, id2) IN (SELECT id1, id2 FROM test WHERE (region = 'Asia')))"
@dataset.filter([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE ((id1 != id1) AND (id2 != id2))"
@dataset.filter([:id1, :id2] => [[1, 2], [3,4]].sql_array).sql.should == "SELECT * FROM test WHERE ((id1, id2) IN ((1, 2), (3, 4)))"
@dataset.filter([:id1, :id2] => [[1, 2], [3,4]]).sql.should == "SELECT * FROM test WHERE ((id1, id2) IN ((1, 2), (3, 4)))"

@dataset.exclude(:id => @d1.select(:id)).sql.should == "SELECT * FROM test WHERE (id NOT IN (SELECT id FROM test WHERE (region = 'Asia')))"
@dataset.exclude(:id => []).sql.should == "SELECT * FROM test WHERE (1 = 1)"
@dataset.exclude(:id => [1, 2]).sql.should == "SELECT * FROM test WHERE (id NOT IN (1, 2))"
@dataset.exclude([:id1, :id2] => @d1.select(:id1, :id2)).sql.should == "SELECT * FROM test WHERE ((id1, id2) NOT IN (SELECT id1, id2 FROM test WHERE (region = 'Asia')))"
@dataset.exclude([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE (1 = 1)"
@dataset.exclude([:id1, :id2] => [[1, 2], [3,4]].sql_array).sql.should == "SELECT * FROM test WHERE ((id1, id2) NOT IN ((1, 2), (3, 4)))"
@dataset.exclude([:id1, :id2] => [[1, 2], [3,4]]).sql.should == "SELECT * FROM test WHERE ((id1, id2) NOT IN ((1, 2), (3, 4)))"
end

specify "should handle IN/NOT IN queries with multiple columns and an array where the database doesn't support it" do
@dataset.meta_def(:supports_multiple_column_in?){false}
@dataset.filter([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE ((id1 != id1) AND (id2 != id2))"
@dataset.filter([:id1, :id2] => [[1, 2], [3,4]].sql_array).sql.should == "SELECT * FROM test WHERE (((id1 = 1) AND (id2 = 2)) OR ((id1 = 3) AND (id2 = 4)))"
@dataset.exclude([:id1, :id2] => []).sql.should == "SELECT * FROM test WHERE (1 = 1)"
@dataset.exclude([:id1, :id2] => [[1, 2], [3,4]].sql_array).sql.should == "SELECT * FROM test WHERE (((id1 != 1) OR (id2 != 2)) AND ((id1 != 3) OR (id2 != 4)))"
end

Expand All @@ -537,10 +550,25 @@ def v.values; {}; end
d1 = db[:test].select(:id1, :id2).filter(:region=>'Asia').columns(:id1, :id2)
@dataset.filter([:id1, :id2] => d1).sql.should == "SELECT * FROM test WHERE ((id1 != id1) AND (id2 != id2))"
db.sqls.should == ["SELECT id1, id2 FROM test WHERE (region = 'Asia')"]
@dataset.exclude([:id1, :id2] => d1).sql.should == "SELECT * FROM test WHERE (1 = 1)"
@dataset.exclude([:id1, :id2] => d1).sql.should == "SELECT * FROM test WHERE ((id1 = id1) AND (id2 = id2))"
db.sqls.should == ["SELECT id1, id2 FROM test WHERE (region = 'Asia')"]
end

specify "should handle IN/NOT IN queries with multiple columns and an empty dataset where the database doesn't support it with correct NULL handling" do
begin
Sequel.empty_array_handle_nulls = false
@dataset.meta_def(:supports_multiple_column_in?){false}
db = Sequel.mock
d1 = db[:test].select(:id1, :id2).filter(:region=>'Asia').columns(:id1, :id2)
@dataset.filter([:id1, :id2] => d1).sql.should == "SELECT * FROM test WHERE (1 = 0)"
db.sqls.should == ["SELECT id1, id2 FROM test WHERE (region = 'Asia')"]
@dataset.exclude([:id1, :id2] => d1).sql.should == "SELECT * FROM test WHERE (1 = 1)"
db.sqls.should == ["SELECT id1, id2 FROM test WHERE (region = 'Asia')"]
ensure
Sequel.empty_array_handle_nulls = true
end
end

specify "should handle IN/NOT IN queries for datasets with row_procs" do
@dataset.meta_def(:supports_multiple_column_in?){false}
db = Sequel.mock(:fetch=>[{:id1=>1, :id2=>2}, {:id1=>3, :id2=>4}])
Expand Down
35 changes: 33 additions & 2 deletions spec/integration/dataset_test.rb
Expand Up @@ -1184,11 +1184,42 @@
specify "should work empty arrays with nulls" do
@ds.insert(nil, nil)
@ds.filter(:a=>[]).all.should == []
@ds.exclude(:a=>[]).all.should == [{:a=>nil, :b=>nil}]
@ds.exclude(:a=>[]).all.should == []
@ds.filter([:a, :b]=>[]).all.should == []
@ds.exclude([:a, :b]=>[]).all.should == [{:a=>nil, :b=>nil}]
@ds.exclude([:a, :b]=>[]).all.should == []

unless Sequel.guarded?(:mssql, :oracle, :db2)
# MSSQL doesn't like boolean results in the select list
pr = proc{|r| r.is_a?(Integer) ? (r != 0) : r}
pr[@ds.get({:a=>[]}.sql_expr)].should == nil
pr[@ds.get(~({:a=>[]}).sql_expr)].should == nil
pr[@ds.get({[:a, :b]=>[]}.sql_expr)].should == nil
pr[@ds.get(~({[:a, :b]=>[]}).sql_expr)].should == nil
end
end

specify "should work empty arrays with nulls and Sequel.empty_array_null_handling = true" do
begin
Sequel.empty_array_handle_nulls = false
@ds.insert(nil, nil)
@ds.filter(:a=>[]).all.should == []
@ds.exclude(:a=>[]).all.should == [{:a=>nil, :b=>nil}]
@ds.filter([:a, :b]=>[]).all.should == []
@ds.exclude([:a, :b]=>[]).all.should == [{:a=>nil, :b=>nil}]

unless Sequel.guarded?(:mssql, :oracle, :db2)
# MSSQL doesn't like boolean results in the select list
pr = proc{|r| r.is_a?(Integer) ? (r != 0) : r}
pr[@ds.get({:a=>[]}.sql_expr)].should == false
pr[@ds.get(~({:a=>[]}).sql_expr)].should == true
pr[@ds.get({[:a, :b]=>[]}.sql_expr)].should == false
pr[@ds.get(~({[:a, :b]=>[]}).sql_expr)].should == true
end
ensure
Sequel.empty_array_handle_nulls = true
end
end

it "should work multiple conditions" do
@ds.insert(20, 10)
@ds.filter(:a=>20, :b=>10).all.should == [{:a=>20, :b=>10}]
Expand Down
2 changes: 1 addition & 1 deletion spec/model/associations_spec.rb
Expand Up @@ -2590,7 +2590,7 @@ def al(v)

it "should not affect non-association IN/NOT IN filtering with an empty array" do
@Album.filter(:tag_id=>[]).sql.should == 'SELECT * FROM albums WHERE (tag_id != tag_id)'
@Album.exclude(:tag_id=>[]).sql.should == 'SELECT * FROM albums WHERE (1 = 1)'
@Album.exclude(:tag_id=>[]).sql.should == 'SELECT * FROM albums WHERE (tag_id = tag_id)'
end

it "should work correctly in subclasses" do
Expand Down

0 comments on commit 4dec148

Please sign in to comment.