Skip to content

Commit

Permalink
Avoid allocating datasets in cases where the returned dataset would b…
Browse files Browse the repository at this point in the history
…e the same as the receiver

This avoids unnecessary dataset clones/allocations.  The most
important changes are probably ungraphed/naked, since single_value_ds
uses those, and various methods call single_value_ds where the
dataset is already naked and ungraphed.

Issue pointed out by using the recently introduced provenance
extension in a real application.
  • Loading branch information
jeremyevans committed Apr 4, 2024
1 parent 559fea3 commit c4e43e2
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
=== master

* Avoid allocating datasets in cases where the returned dataset would be the same as the receiver (jeremyevans)

* Add provenance dataset extension, which includes comments in queries showing how and where the dataset was built (jeremyevans)

=== 5.79.0 (2024-04-01)
Expand Down
1 change: 1 addition & 0 deletions lib/sequel/dataset/graph.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def set_graph_aliases(graph_aliases)
# Remove the splitting of results into subhashes, and all metadata
# related to the current graph (if any).
def ungraphed
return self unless opts[:graph]
clone(:graph=>nil)
end

Expand Down
13 changes: 13 additions & 0 deletions lib/sequel/dataset/query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def clone(opts = OPTS) # :nodoc:
def distinct(*args, &block)
virtual_row_columns(args, block)
if args.empty?
return self if opts[:distinct] == EMPTY_ARRAY
cached_dataset(:_distinct_ds){clone(:distinct => EMPTY_ARRAY)}
else
raise(InvalidOperation, "DISTINCT ON not supported") unless supports_distinct_on?
Expand Down Expand Up @@ -230,6 +231,7 @@ def filter(*cond, &block)
#
# DB[:table].for_update # SELECT * FROM table FOR UPDATE
def for_update
return self if opts[:lock] == :update
cached_dataset(:_for_update_ds){lock_style(:update)}
end

Expand Down Expand Up @@ -641,6 +643,7 @@ def #{jtype}_join(table, opts=Sequel::OPTS)
# DB.from(:a, DB[:b].where(Sequel[:a][:c]=>Sequel[:b][:d]).lateral)
# # SELECT * FROM a, LATERAL (SELECT * FROM b WHERE (a.c = b.d))
def lateral
return self if opts[:lateral]
cached_dataset(:_lateral_ds){clone(:lateral=>true)}
end

Expand Down Expand Up @@ -744,6 +747,7 @@ def merge_using(source, join_condition)
# ds.all # => [{2=>:id}]
# ds.naked.all # => [{:id=>2}]
def naked
return self unless opts[:row_proc]
cached_dataset(:_naked_ds){with_row_proc(nil)}
end

Expand All @@ -753,6 +757,7 @@ def naked
# DB[:items].for_update.nowait
# # SELECT * FROM items FOR UPDATE NOWAIT
def nowait
return self if opts[:nowait]
cached_dataset(:_nowait_ds) do
raise(Error, 'This dataset does not support raises errors instead of waiting for locked rows') unless supports_nowait?
clone(:nowait=>true)
Expand Down Expand Up @@ -878,6 +883,7 @@ def qualify(table=(cache=true; first_source))
# end
def returning(*values)
if values.empty?
return self if opts[:returning] == EMPTY_ARRAY
cached_dataset(:_returning_ds) do
raise Error, "RETURNING is not supported on #{db.database_type}" unless supports_returning?(:insert)
clone(:returning=>EMPTY_ARRAY)
Expand Down Expand Up @@ -930,6 +936,7 @@ def select(*columns, &block)
# DB[:items].select_all(:items, :foo) # SELECT items.*, foo.* FROM items
def select_all(*tables)
if tables.empty?
return self unless opts[:select]
cached_dataset(:_select_all_ds){clone(:select => nil)}
else
select(*tables.map{|t| i, a = split_alias(t); a || i}.map!{|t| SQL::ColumnAll.new(t)}.freeze)
Expand Down Expand Up @@ -1005,13 +1012,15 @@ def server?(server)

# Specify that the check for limits/offsets when updating/deleting be skipped for the dataset.
def skip_limit_check
return self if opts[:skip_limit_check]
cached_dataset(:_skip_limit_check_ds) do
clone(:skip_limit_check=>true)
end
end

# Skip locked rows when returning results from this dataset.
def skip_locked
return self if opts[:skip_locked]
cached_dataset(:_skip_locked_ds) do
raise(Error, 'This dataset does not support skipping locked rows') unless supports_skip_locked?
clone(:skip_locked=>true)
Expand All @@ -1023,6 +1032,7 @@ def skip_locked
# DB[:items].group(:a).having(a: 1).where(:b).unfiltered
# # SELECT * FROM items GROUP BY a
def unfiltered
return self unless opts[:where] || opts[:having]
cached_dataset(:_unfiltered_ds){clone(:where => nil, :having => nil)}
end

Expand All @@ -1031,6 +1041,7 @@ def unfiltered
# DB[:items].group(:a).having(a: 1).where(:b).ungrouped
# # SELECT * FROM items WHERE b
def ungrouped
return self unless opts[:group] || opts[:having]
cached_dataset(:_ungrouped_ds){clone(:group => nil, :having => nil)}
end

Expand Down Expand Up @@ -1058,13 +1069,15 @@ def union(dataset, opts=OPTS)
#
# DB[:items].limit(10, 20).unlimited # SELECT * FROM items
def unlimited
return self unless opts[:limit] || opts[:offset]
cached_dataset(:_unlimited_ds){clone(:limit=>nil, :offset=>nil)}
end

# Returns a copy of the dataset with no order.
#
# DB[:items].order(:a).unordered # SELECT * FROM items
def unordered
return self unless opts[:order]
cached_dataset(:_unordered_ds){clone(:order=>nil)}
end

Expand Down
81 changes: 80 additions & 1 deletion spec/core/dataset_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,11 @@ def columns; [:a, :b] end
@dataset.db.sqls.must_equal ['DELETE FROM a']
end

it "should return receiver if already skipping limit check" do
ds = @dataset.skip_limit_check
ds.skip_limit_check.must_be_same_as ds
end

it "should raise on #update" do
proc{@dataset.update(:a=>1)}.must_raise(Sequel::InvalidOperation)
end
Expand Down Expand Up @@ -1395,6 +1400,11 @@ def supports_cte_in_subselect?; false end
@d.select_all.sql.must_equal 'SELECT * FROM test'
end

it "should return receiver if called without arguments and it already uses SELECT *" do
d = @d.select_all
d.select_all.must_be_same_as d
end

it "should override the previous select option" do
@d.select(:a, :b, :c).select_all.sql.must_equal 'SELECT * FROM test'
end
Expand Down Expand Up @@ -1584,6 +1594,12 @@ def supports_cte_in_subselect?; false end
ds.unfiltered.sql.must_equal 'SELECT * FROM test'
end
end

it "should return receiver if it does not have a WHERE or HAVING clause" do
ds = Sequel.mock.dataset.from(:test).filter(:score=>1)
ds = ds.unfiltered
ds.unfiltered.must_be_same_as ds
end
end

describe "Dataset#unlimited" do
Expand All @@ -1593,6 +1609,12 @@ def supports_cte_in_subselect?; false end
ds.unlimited.sql.must_equal 'SELECT * FROM test'
end
end

it "should return receiver if it does not have a LIMIT or OFFSET" do
ds = Sequel.mock.dataset.from(:test).limit(1, 2)
ds = ds.unlimited
ds.unlimited.must_be_same_as ds
end
end

describe "Dataset#ungrouped" do
Expand All @@ -1602,6 +1624,12 @@ def supports_cte_in_subselect?; false end
ds.ungrouped.sql.must_equal 'SELECT * FROM test'
end
end

it "should return receiver if it does not have a WHERE or HAVING clause" do
ds = Sequel.mock.dataset.from(:test).group(:a).having(:b)
ds = ds.ungrouped
ds.ungrouped.must_be_same_as ds
end
end

describe "Dataset#unordered" do
Expand All @@ -1611,6 +1639,12 @@ def supports_cte_in_subselect?; false end
ds.unordered.sql.must_equal 'SELECT * FROM test'
end
end

it "should return receiver if it does not have an ORDER clause" do
ds = Sequel.mock.dataset.from(:test).order(:name)
ds = ds.unordered
ds.unordered.must_be_same_as ds
end
end

describe "Dataset#with_sql" do
Expand Down Expand Up @@ -2014,11 +2048,16 @@ def supports_cte_in_subselect?; false end
end

describe "Dataset#naked" do
it "should returned clone dataset without row_proc" do
it "should return clone dataset without row_proc" do
d = Sequel.mock.dataset.with_row_proc(proc{|r| r})
d.naked.row_proc.must_be_nil
d.row_proc.wont_be_nil
end

it "should return receiver if it doesn't have a row_proc" do
d = Sequel.mock.dataset
d.naked.must_be_same_as d
end
end

describe "Dataset#qualified_column_name" do
Expand Down Expand Up @@ -2220,6 +2259,11 @@ def ==(o) @h == o.h; end
proc{@dataset.distinct(:a)}.must_raise(Sequel::InvalidOperation)
end

it "should return receiver if already distinct" do
ds = @dataset.distinct
ds.distinct.must_be_same_as ds
end

it "should use DISTINCT ON if columns are given and DISTINCT ON is supported" do
@dataset = @dataset.with_extend{def supports_distinct_on?; true end}
@dataset.distinct(:a, :b).sql.must_equal 'SELECT DISTINCT ON (a, b) name FROM test'
Expand Down Expand Up @@ -2523,6 +2567,13 @@ def supports_cte_in_subselect?; false end
end
end

describe "Dataset#lateral" do
it "should return receiver if already using LATERAL" do
ds = Sequel.mock.dataset.lateral
ds.lateral.must_be_same_as ds
end
end

describe "Dataset#join_table" do
before do
@d = Sequel.mock.dataset.from(:items).with_quote_identifiers(true)
Expand Down Expand Up @@ -5206,6 +5257,11 @@ def default_timestamp_format
end
end

it "#for_update should return receiver if already using FOR UPDATE" do
ds = @ds.for_update
ds.for_update.must_be_same_as ds
end

it "#lock_style should accept symbols" do
@ds.lock_style(:update).sql.must_equal "SELECT * FROM t FOR UPDATE"
end
Expand Down Expand Up @@ -5234,6 +5290,15 @@ def select_lock_sql(sql) super; sql << " NOWAIT" if @opts[:nowait] end
@ds.nowait.sql.must_equal "SELECT * FROM t FOR UPDATE NOWAIT"
end
end

it "should return receiver if it already uses NOWAIT" do
@ds = @ds.with_extend do
def supports_nowait?; true end
def select_lock_sql(sql) super; sql << " NOWAIT" if @opts[:nowait] end
end
ds = @ds.nowait
ds.nowait.must_be_same_as ds
end
end

describe "Dataset#skip_locked" do
Expand All @@ -5255,6 +5320,15 @@ def select_lock_sql(sql) super; sql << " SKIP LOCKED" if @opts[:skip_locked] end
@ds.skip_locked.sql.must_equal "SELECT * FROM t FOR UPDATE SKIP LOCKED"
end
end

it "should return receiver if it already uses SKIP LOCKED" do
@ds = @ds.with_extend do
def supports_skip_locked?; true end
def select_lock_sql(sql) super; sql << " SKIP LOCKED" if @opts[:skip_locked] end
end
ds = @ds.skip_locked
ds.skip_locked.must_be_same_as ds
end
end

describe "Custom ASTTransformer" do
Expand Down Expand Up @@ -5316,6 +5390,11 @@ def obj.to_s_append(ds, sql) sql << 'a' end
@ds.update_sql(:foo=>1).must_equal "UPDATE t SET foo = 1"
end

it "should return receiver if called without arguments and it already uses RETURNING *" do
ds = @ds.returning
ds.returning.must_be_same_as ds
end

it "should have insert, update, and delete yield to blocks if RETURNING is used" do
@pr.call
h = {}
Expand Down
4 changes: 4 additions & 0 deletions spec/core/object_graph_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -338,4 +338,8 @@
@db.fetch = {:id=>1,:x=>2,:y=>3,:lines_id=>4,:lines_x=>5,:lines_y=>6,:graph_id=>7}
@ds1.graph(@ds2, :x=>:id).ungraphed.all.must_equal [{:id=>1,:x=>2,:y=>3,:lines_id=>4,:lines_x=>5,:lines_y=>6,:graph_id=>7}]
end

it "#ungraphed should return the receiver if not graphed" do
@ds1.ungraphed.must_be_same_as @ds1
end
end

0 comments on commit c4e43e2

Please sign in to comment.