Skip to content

Commit

Permalink
Change :regexp_function_cache_class to :regexp_function_cache
Browse files Browse the repository at this point in the history
Allow it to support a proc in addition to a class name.

Keep the setup_regexp_function method API backwards compatible.

Switch to match? on Ruby 2.4+, which should provide significant
regexp speedups.

Refactor specs to always test the regexp support when running on
the sqlite adapter.
  • Loading branch information
jeremyevans committed Jan 5, 2024
1 parent a6cc908 commit 433b937
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 41 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
=== master

* Speed up regexp matches in sqlite adapter on Ruby 2.4+ (jeremyevans)

* Add sqlite adapter :regexp_function_cache option for specifying the cache object to use (paddor, jeremyevans) (#2116)

* Respect list plugin :top option when inserting the first row into the model's table (johanmagnusson) (#2115)

* Switch default connection pool to timed_queue on Ruby 3.4+ (jeremyevans)
Expand Down
35 changes: 21 additions & 14 deletions lib/sequel/adapters/sqlite.rb
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,11 @@ def initialize(opts = OPTS)
# it will be called with a string for the regexp and a string
# for the value to compare, and should return whether the regexp
# matches.
# :regexp_function_cache_class :: Defaults to Hash. To avoid a potential
# memory leak, pass ObjectSpace::WeakKeyMap.
# :regexp_function_cache :: If setting +setup_regexp_function+ to +cached+, this
# determines the cache to use. It should either be a proc or a class, and it
# defaults to +Hash+. You can use +ObjectSpace::WeakKeyMap+ on Ruby 3.3+ to
# have the VM automatically remove regexps from the cache after they
# are no longer used.
def connect(server)
opts = server_opts(server)
opts[:database] = ':memory:' if blank_object?(opts[:database])
Expand All @@ -135,7 +138,7 @@ def connect(server)
connection_pragmas.each{|s| log_connection_yield(s, db){db.execute_batch(s)}}

if typecast_value_boolean(opts[:setup_regexp_function])
setup_regexp_function(db, opts[:setup_regexp_function], opts[:regexp_function_cache_class])
setup_regexp_function(db, opts[:setup_regexp_function])
end

class << db
Expand Down Expand Up @@ -210,24 +213,28 @@ def adapter_initialize
set_integer_booleans
end

def setup_regexp_function(db, how, cache_class)
def setup_regexp_function(db, how)
case how
when Proc
# nothing
when :cached, "cached"
cache_class ||= Hash
cache = cache_class.new
how = lambda do |regexp_str, str|
if regexp = cache[regexp_str]
regexp.match(str)
else
regexp = Regexp.new(regexp_str)
cache[regexp_str] = regexp
regexp.match(str)
cache = @opts[:regexp_function_cache] || Hash
cache = cache.is_a?(Proc) ? cache.call : cache.new
how = if RUBY_VERSION >= '2.4'
lambda do |regexp_str, str|
(cache[regexp_str] ||= Regexp.new(regexp_str)).match?(str)
end
else
lambda do |regexp_str, str|
(cache[regexp_str] ||= Regexp.new(regexp_str)).match(str)
end
end
else
how = lambda{|regexp_str, str| Regexp.new(regexp_str).match(str)}
how = if RUBY_VERSION >= '2.4'
lambda{|regexp_str, str| Regexp.new(regexp_str).match?(str)}
else
lambda{|regexp_str, str| Regexp.new(regexp_str).match(str)}
end
end

db.create_function("regexp", 2) do |func, regexp_str, str|
Expand Down
57 changes: 30 additions & 27 deletions spec/adapters/sqlite_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -879,43 +879,46 @@
end
end if DB.sqlite_version >= 33800


# Force a separate Database object for these tests, so SQLite regexp support is always
# tested if testing the sqlite adapter.
describe 'Regexp support' do
before do
@db = DB
def setup_db(opts)
db = Sequel.sqlite(opts)

@db.create_table(:names) do
db.create_table(:names) do
primary_key :id
String :name
end

@db[:names].insert(name: 'Adam')
@db[:names].insert(name: 'Jane')
@db[:names].insert(name: 'John')
@db[:names].insert(name: 'Leo')
@db[:names].insert(name: 'Tim')
@db[:names].insert(name: 'Tom')
db[:names].insert(name: 'Adam')
db[:names].insert(name: 'Jane')
db[:names].insert(name: 'John')
db
end
after do
@db.drop_table?(:names)

it "should support setup_regexp_function: true option" do
db = setup_db(:setup_regexp_function=>true, :keep_reference=>false)
db.must_be :allow_regexp?
db[:names].where(name: /^J/).select_order_map(:name).must_equal %w[Jane John]
end

it "should support regexp" do
@db.must_be :allow_regexp?
it "should support setup_regexp_function: :cached option" do
db = setup_db(:setup_regexp_function=>:cached, :keep_reference=>false)
db.must_be :allow_regexp?
db[:names].where(name: /^J/).select_order_map(:name).must_equal %w[Jane John]
end

it "should find by regexp" do
names = @db[:names].where(name: /^J/).map { |row| row[:name] }
names.must_include 'Jane'
names.must_include 'John'
names.wont_include 'Adam'
it "should support :regexp_function_cache option with setup_regexp_function: :cached option" do
cache = {}
db = setup_db(:setup_regexp_function=>:cached, :regexp_function_cache=>proc{cache}, :keep_reference=>false)
db.must_be :allow_regexp?
db[:names].where(name: /^J/).select_order_map(:name).must_equal %w[Jane John]
cache.size.must_equal 1
end

it "caches regexp" do
before = ObjectSpace.count_objects[:T_REGEXP]
@db[:names].where(name: /^J/)
after = ObjectSpace.count_objects[:T_REGEXP]
diff = after - before
diff.must_be :<=, 1
end if [:cached, "cached"].include? DB.opts[:setup_regexp_function]
end if DB.adapter_scheme == :sqlite && DB.opts[:setup_regexp_function]
it "should support :regexp_function_cache option with WeakKeyMap with setup_regexp_function: :cached option" do
db = setup_db(:setup_regexp_function=>:cached, :regexp_function_cache=>ObjectSpace::WeakKeyMap, :keep_reference=>false)
db.must_be :allow_regexp?
db[:names].where(name: /^J/).select_order_map(:name).must_equal %w[Jane John]
end if RUBY_VERSION >= '3.3'
end if DB.adapter_scheme == :sqlite

0 comments on commit 433b937

Please sign in to comment.