Permalink
Browse files

namespace

  • Loading branch information...
1 parent 36e5cba commit 09b10a820160623a426d7221274391ea3f938dd1 @evan evan committed Jul 20, 2007
View
4 init.rb
@@ -1,6 +1,2 @@
-require 'fields'
require 'ultrasphinx'
-require 'autoload'
-require 'is_indexed'
-require 'search'
View
353 lib/search.rb
@@ -1,353 +0,0 @@
-
-# Ultrasphinx search model
-
-require 'ultrasphinx'
-require 'timeout'
-require 'chronic'
-
-class Search
- include Reloadable if ENV['RAILS_ENV'] == "development" and ENV["USER"] == "eweaver"
-
- OPTIONS = {:command => {:search => 0, :excerpt => 1},
- # :status => {:ok => 0, :error => 1, :retry => 2},
- :search_mode => {:all => 0, :any => 1, :phrase => 2, :boolean => 3, :extended => 4},
- :sort_mode => {:relevance => 0, :desc => 1, :asc => 2, :time => 3},
- :attribute_type => {:integer => 1, :date => 2},
- :group_by => {:day => 0, :week => 1, :month => 2, :year => 3, :attribute => 4}}
-
- DEFAULTS = {:page => 1,
- :models => nil,
- :per_page => 20,
- :sort_by => 'created_at',
- :sort_mode => :relevance,
- :weights => nil,
- :search_mode => :extended,
- :belongs_to => nil,
- :raw_filters => {}}
-
- VIEW_OPTIONS = {
- :search_mode => {"all words" => "all", "some words" => "any", "exact phrase" => "phrase", "boolean" => "boolean", "extended" => "extended"}.sort,
- :sort_mode => [["newest first", "desc"], ["oldest first", "asc"], ["relevance", "relevance"]]
- } #, "Time" => :time }
-
- MAX_RETRIES = 4
-
- MODELS = begin
- Hash[*open(Ultrasphinx::CONF_PATH).readlines.select{|s| s =~ /^(source \w|sql_query )/}.in_groups_of(2).map{|model, _id| [model[/source ([\w\d_-]*)/, 1].classify, _id[/(\d*) AS class_id/, 1].to_i]}.flatten] # XXX blargh
- rescue
- puts "Ultrasphinx configuration file not found for #{ENV['RAILS_ENV'].inspect} environment"
- {}
- end
-
- MAX_MATCHES = Ultrasphinx::DAEMON_SETTINGS["max_matches"].to_i
-
- QUERY_TYPES = [:sphinx, :google]
-
- #INDEXES = YAML.load_file(Ultrasphinx::MODELS_HASH).keys.select{|x| !x.blank?}.map(&:tableize) + ["complete"]
-
- attr_reader :options
- attr_reader :query
- attr_reader :results
- attr_reader :response
- attr_reader :subtotals
-
- def self.find *args
- args.push({}) unless args.last.is_a? Hash
- args.unshift :sphinx if args.size == 2
- self.new(*args).run
- end
-
- def initialize style, query, opts={}
- opts = {} unless opts
- raise Sphinx::SphinxArgumentError, "Invalid query type: #{style.inspect}" unless QUERY_TYPES.include? style
- @query = (query || "")
- @parsed_query = style == :google ? parse_google(@query) : @query
-
- @results = []
- @subtotals = {}
- @response = {}
-
- @options = DEFAULTS.merge(Hash[*opts.map do |key, value|
- [key.to_sym,
- if value.respond_to?(:to_i) && value.to_i.to_s == value
- value.to_i
- elsif value == ""
- nil
- elsif value.is_a? String and key.to_s != "sort_by"
- value.to_sym
- else
- value
- end]
- end._flatten_once])
- @options[:models] = Array(@options[:models])
-
- raise Sphinx::SphinxArgumentError, "Invalid options: #{@extra * ', '}" if (@extra = (@options.keys - (OPTIONS.merge(DEFAULTS).keys))).size > 0
- @options[:belongs_to] = @options[:belongs_to].name if @options[:belongs_to].is_a? Class
- end
-
- def run(instantiate = true)
- # set all the options
- @request = Sphinx::Client.new
- @request.SetServer(Ultrasphinx::PLUGIN_SETTINGS['server_host'], Ultrasphinx::PLUGIN_SETTINGS['server_port'])
- offset, limit = options[:per_page] * (options[:page] - 1), options[:per_page]
- @request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
- @request.SetMatchMode map_option(:search_mode)
- @request.SetSortMode map_option(:sort_mode), options[:sort_by]
-
- if weights = options[:weights]
- @request.SetWeights(Ultrasphinx::Fields.instance.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
- array << (weights[field] || 1.0)
- end)
- end
-
- #@request.SetIdRange # never useful
-
- unless options[:models].compact.empty?
- @request.SetFilter 'class_id', options[:models].map{|m| MODELS[m.to_s]}
- end
-
- if options[:belongs_to]
- # not sure if this actually works
- raise Sphinx::SphinxArgumentError, "You must specify a specific :model when using :belongs_to" unless options[:models] and options(:models).size == 1
- parent = options[:belongs_to]
- association = parent.class.reflect_on_all_associations.select{|a| options[:models] == a.klass.name}.first
- if MODELS.keys.inject(true) {|b, klass| b and klass.constantize.columns.map(&:name).include? association.options[:foreign_key]}
- key_name = "global_#{association.options[:foreign_key]}"
- else
- key_name = "#{options[:models].first.tableize}_#{association.options[:foreign_key]}"
- end
- @request.SetFilter key_name, [parent.id]
- end
-
- options[:raw_filters].each do |field, value|
- begin
- unless value.is_a? Range
- @request.SetFilter field, Array(value)
- else
- min, max = [value.first, value.last].map do |x|
- x._to_numeric if x.is_a? String
- end
- unless min.class != max.class
- min, max = max, min if min > max
- @request.SetFilterRange field, min, max
- end
- end
- rescue NoMethodError => e
- raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
- end
- end
- # @request.SetGroup # not useful
-
- tries = 0
- logger.info "Ultrasphinx: Searching for #{query.inspect} (parsed as #{@parsed_query.inspect}), options #{@options.inspect}"
- begin
- # run the search
- @response = @request.Query(@parsed_query)
- logger.info "Ultrasphinx: Search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total}/#{response['total_found']} in #{time} seconds."
-
- # get all the subtotals, XXX should be configurable
- # andrew says there's a better way to do this
- filtered_request = @request.dup
- MODELS.each do |key, value|
- filtered_request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
- filtered_request.SetFilter 'class_id', [value]
- @subtotals[key] = @request.Query(@parsed_query)['total_found']
-# logger.debug "Ultrasphinx: Found #{subtotals[key]} records for sub-query #{key} (filters: #{filtered_request.instance_variable_get('@filters').inspect})"
- end
-
- @results = instantiate ? reify_results(response['matches']) : response['matches']
- rescue Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::EPIPE => e
- if (tries += 1) <= MAX_RETRIES
- logger.warn "Ultrasphinx: Restarting query (#{tries} attempts already) (#{e})"
- if tries == MAX_RETRIES
- logger.warn "Ultrasphinx: Sleeping..."
- sleep(3)
- end
- retry
- else
- logger.warn "Ultrasphinx: Query failed"
- raise e
- end
- end
- end
-
- def excerpt
- run unless run?
- return if results.empty?
-
- maps = results.map do |record|
- [record] <<
- [[:title, :name], [:body, :description, :content]].map do |methods|
- methods.detect{|x| record.respond_to? x}
- end
- end
-
- texts = maps.map do |record, methods|
- [record.send(methods[0]), record.send(methods[1])]
- end.flatten.map{|x| x.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ')}
-
- responses = @request.BuildExcerpts(
- texts,
- "complete",
- @parsed_query.gsub(/AND|OR|NOT|\@\w+/, ""),
- :before_match => "<strong>", :after_match => "</strong>",
- :chunk_separator => "...",
- :limit => 200,
- :around => 1).in_groups_of(2)
-
- maps.each_with_index do |record_and_methods, i|
- record, methods = record_and_methods
- 2.times do |j|
- record._metaclass.send(:define_method, methods[j]) { responses[i][j] }
- end
- end
-
- @results = maps.map(&:first).map(&:freeze)
- end
-
-
- def total
- [response['total_found'], MAX_MATCHES].min
- end
-
- def found
- results.size
- end
-
- def time
- response['time']
- end
-
- def run?
- !response.blank?
- end
-
- def page
- options[:page]
- end
-
- def per_page
- options[:per_page]
- end
-
- def last_page
- (total / per_page) + (total % per_page == 0 ? 0 : 1)
- end
-
- private
-
- def parse_google query
- return unless query
- # alters google-style querystring into sphinx-style
- query = query.gsub(" AND ", " ").scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/) # thanks chris2
- query.each_with_index do |token, index|
-
- if token =~ /^(.*?)\((.*)\)(.*?$)/
- token = query[index] = "#{$1}(#{parse_google $2})#{$3}" # recurse for parens
- end
-
- case token
- when "OR"
- query[index] = "|"
- when "NOT"
- query[index] = "-#{query[index+1]}"
- query[index+1] = ""
- when "AND"
- query[index] = ""
- when /:/
- query[query.size] = "@" + query[index].sub(":", " ")
- query[index] = ""
- end
-
- end
- query.join(" ").squeeze(" ")
- end
-
- def reify_results(sphinx_ids)
- sphinx_ids = sphinx_ids.sort_by{|k, v| v['index']}.map(&:first).reverse # sort and then toss the rest of the data
-
- # find associated record ids
- ids = Hash.new([])
- sphinx_ids.each do |_id|
- ids[MODELS.invert[_id % MODELS.size]] += [_id / MODELS.size] # yay math
- end
- raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
-
- # fetch them for real
- results = []
- ids.each do |model, id_set|
- klass = model.constantize
- finder = klass.respond_to?(:get_cache) ? :get_cache : :find
- logger.debug "Ultrasphinx: using #{klass.name}\##{finder} as finder method"
-
- begin
- results += case instances = id_set.map {|id| klass.send(finder, id)} # XXX temporary until we update cache_fu
- when Hash
- instances.values
- when Array
- instances
- else
- Array(instances)
- end
- rescue ActiveRecord:: ActiveRecordError => e
- raise Sphinx::SphinxResponseError, e.inspect
- end
- end
-
- # put them back in order
- results.sort_by do |r|
- raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
- index = (sphinx_ids.index(sphinx_id = r.id * MODELS.size + MODELS[r.class.base_class.name]))
- raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
- index / sphinx_ids.size.to_f
- end
- end
-
- def map_option opt
- opt = opt.to_sym
- OPTIONS[opt][options[opt]] or raise Sphinx::SphinxArgumentError, "Invalid option value :#{opt} => #{options[opt]}"
- end
-
- def logger; ActiveRecord::Base.logger; end
-
-end
-
-class Array
- def _flatten_once
- self.inject([]){|r, el| r + Array(el)}
- end
-end
-
-class Object
- def _metaclass; (class << self; self; end); end
-end
-
-class String
- def _to_numeric
- zeroless = self.squeeze(" ").strip.sub(/^0+(\d)/, '\1')
- zeroless.sub!(/(\...*?)0+$/, '\1')
- if zeroless.to_i.to_s == zeroless
- zeroless.to_i
- elsif zeroless.to_f.to_s == zeroless
- zeroless.to_f
- elsif date = Chronic.parse(self)
- date.to_i
- else
- self
- end
- end
-end
-
-# leftovers
-
-# blargh
-# Array(options[:belongs_to)).each do |parent| # XXX really, only use one parent right now
-# associations = parent.class.reflect_on_all_associations.select{|a| MODELS.keys.include? a.klass.name}.select{|a| [:has_many, :has_one].include? a.macro}.select{|a| !a.options[:through]} # no has_many :through right now
-# names = associations.map(&:klass).map(&:name)
-# if names.size > 1 and !options[:models) and names.size < MODELS.size # XXX may return spurious results right now
-# associations.each {|a| SetFilter "#{a.klass.name.tableize}_#{a.options[:foreign_key]}", [parent.id, Ultrasphinx::MAX_INT]}
-# SetFilter 'class_id', MODELS.values_at(*names)
-# elsif options[:models) or names.size == 1
-#
-# else
-# associations.each {|a| SetFilter "#{a.klass.name.tableize}_#{a.options[:foreign_key]}", [parent.id, Ultrasphinx::MAX_INT]}
View
200 lib/ultrasphinx.rb
@@ -1,194 +1,8 @@
-require 'yaml'
-
-module Ultrasphinx
-
- class Exception < ::Exception
- end
- class ConfigurationError < Exception
- end
- class DaemonError < Exception
- end
-
- CONF_PATH = "#{RAILS_ROOT}/config/environments/sphinx.#{RAILS_ENV}.conf"
- ENV_BASE_PATH = "#{RAILS_ROOT}/config/environments/sphinx.#{RAILS_ENV}.base"
- GENERIC_BASE_PATH = "#{RAILS_ROOT}/config/sphinx.base"
- BASE_PATH = (File.exist?(ENV_BASE_PATH) ? ENV_BASE_PATH : GENERIC_BASE_PATH)
-
- raise ConfigurationError, "Please create a #{BASE_PATH} configuration file." unless File.exist? BASE_PATH
-
- def self.options_for(heading)
- section = open(BASE_PATH).read[/^#{heading}.*?\{(.*?)\}/m, 1]
- raise "missing heading #{heading} in #{BASE_PATH}" if section.nil?
- lines = section.split("\n").reject { |l| l.strip.empty? }
- options = lines.map do |c|
- c =~ /\s*(.*?)\s*=\s*([^\#]*)/
- $1 ? [$1, $2.strip] : []
- end
- Hash[*options.flatten]
- end
-
- SOURCE_DEFAULTS = %(
- strip_html = 0
- index_html_attrs =
- sql_query_pre = SET SESSION group_concat_max_len = 65535
- sql_query_pre = SET NAMES utf8
- sql_query_post =
- sql_range_step = 20000
- )
-
- MAX_INT = 2**32-1
- COLUMN_TYPES = {:string => 'text', :text => 'text', :integer => 'numeric', :date => 'date', :datetime => 'date' }
- CONFIG_MAP = {:username => 'sql_user',
- :password => 'sql_pass',
- :host => 'sql_host',
- :database => 'sql_db',
- :adapter => 'type',
- :port => 'sql_port',
- :socket => 'sql_sock'}
- OPTIONAL_SPHINX_KEYS = ['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
- PLUGIN_SETTINGS = options_for('ultrasphinx')
- DAEMON_SETTINGS = options_for('searchd')
-
- MAX_WORDS = 2**16 # maximum number of stopwords built
- STOPWORDS_PATH = "#{Ultrasphinx::PLUGIN_SETTINGS['path']}/stopwords.txt}"
-
- #logger.debug "Ultrasphinx options are: #{PLUGIN_SETTINGS.inspect}"
-
- MODELS_HASH = {}
-
- class << self
- def load_constants
- Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
- next if filename =~ /\/(\.svn|CVS|\.bzr)\//
- begin
- open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
- rescue Object => e
- puts "Ultrasphinx: warning; autoload error on #{filename}"
- end
- end
- Fields.instance.configure(MODELS_HASH)
- end
-
- def configure
- load_constants
-
- puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment"
- puts "Available models are #{MODELS_HASH.keys.to_sentence}"
- File.open(CONF_PATH, "w") do |conf|
- conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
-
- conf.puts "\n# #{BASE_PATH}"
- conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
-
- index_list = {"complete" => []}
-
- conf.puts "\n# Source configuration\n\n"
-
- puts "Generating SQL"
- MODELS_HASH.each_with_index do |model_options, class_id|
- model, options = model_options
- klass, source = model.constantize, model.tableize
-
-# puts "SQL for #{model}"
-
- index_list[source] = [source]
- index_list["complete"] << source
-
- conf.puts "source #{source}\n{"
- conf.puts SOURCE_DEFAULTS
- klass.connection.instance_variable_get("@config").each do |key, value|
- conf.puts "#{CONFIG_MAP[key]} = #{value}" if CONFIG_MAP[key]
- end
-
- table, pkey = klass.table_name, klass.primary_key
- condition_strings, join_strings = Array(options[:conditions]).map{|condition| "(#{condition})"}, []
- column_strings = ["(#{table}.#{pkey} * #{MODELS_HASH.size} + #{class_id}) AS id",
- "#{class_id} AS class_id", "'#{klass.name}' AS class"]
- remaining_columns = Fields.instance.keys - ["class", "class_id"]
-
- conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
-
- options[:fields].to_a.each do |f|
- column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
- column_strings << Fields.instance.cast("#{table}.#{column}", as)
- remaining_columns.delete(as)
- end
-
- options[:includes].to_a.each do |join|
- join_klass = join[:model].constantize
- association = klass.reflect_on_association(join[:model].underscore.to_sym)
- join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " +
- if (macro = association.macro) == :belongs_to
- "#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}"
- elsif macro == :has_one
- "#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}"
- else
- raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
- end
- column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
- remaining_columns.delete(join[:as] || join[:field])
- end
-
- options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
- # only has_many's right now
- join_klass = group[:model].constantize
- association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym : group[:model].underscore.pluralize.to_sym)
- join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
- column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
- remaining_columns.delete(group[:as])
- end
-
- options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
- column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
- remaining_columns.delete(concat[:as])
- end
-
-# puts "#{model} has #{remaining_columns.inspect} remaining"
- remaining_columns.each do |field|
- column_strings << Fields.instance.null(field)
- end
-
- query_strings = ["SELECT", column_strings.sort_by do |string|
- # sphinx wants them always in the same order, but "id" must be first
- (field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
- end.join(", ")]
- query_strings << "FROM #{table}"
- query_strings += join_strings.uniq
- query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
- query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
- query_strings << "GROUP BY id"
-
- conf.puts "sql_query = #{query_strings.join(" ")}"
-
- groups = []
- # group and date sorting params... this really only would have to be run once
- Fields.instance.each do |field, type|
- case type
- when 'numeric'
- groups << "sql_group_column = #{field}"
- when 'date'
- groups << "sql_date_column = #{field}"
- end
- end
- conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
- conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODELS_HASH.size})"
- conf.puts "}\n\n"
- end
-
- conf.puts "\n# Index configuration\n\n"
- index_list.to_a.sort_by {|x| x.first == "complete" ? 1 : 0}.each do |name, source_list|
- conf.puts "index #{name}\n{"
- source_list.each {|source| conf.puts "source = #{source}"}
- OPTIONAL_SPHINX_KEYS.each do |key|
- conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
- end
- conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{name}"
- conf.puts "}\n\n"
- end
- end
-
- end
-
- end
-end
+require 'ultrasphinx/core_extensions'
+require 'ultrasphinx/ultrasphinx'
+require 'ultrasphinx/autoload'
+require 'ultrasphinx/fields'
+require 'ultrasphinx/is_indexed'
+require 'ultrasphinx/search'
+require 'ultrasphinx/spell'
View
0 lib/autoload.rb → lib/ultrasphinx/autoload.rb
File renamed without changes.
View
29 lib/ultrasphinx/core_extensions.rb
@@ -0,0 +1,29 @@
+
+require 'chronic'
+
+class Array
+ def _flatten_once
+ self.inject([]){|r, el| r + Array(el)}
+ end
+end
+
+class Object
+ def _metaclass; (class << self; self; end); end
+end
+
+class String
+ def _to_numeric
+ zeroless = self.squeeze(" ").strip.sub(/^0+(\d)/, '\1')
+ zeroless.sub!(/(\...*?)0+$/, '\1')
+ if zeroless.to_i.to_s == zeroless
+ zeroless.to_i
+ elsif zeroless.to_f.to_s == zeroless
+ zeroless.to_f
+ elsif date = Chronic.parse(self)
+ date.to_i
+ else
+ self
+ end
+ end
+end
+
View
0 lib/fields.rb → lib/ultrasphinx/fields.rb
File renamed without changes.
View
0 lib/is_indexed.rb → lib/ultrasphinx/is_indexed.rb
File renamed without changes.
View
313 lib/ultrasphinx/search.rb
@@ -0,0 +1,313 @@
+
+# Ultrasphinx command-pattern search model
+
+module Ultrasphinx
+ class Search
+ unloadable if RAILS_ENV == "development"
+
+ OPTIONS = {:command => {:search => 0, :excerpt => 1},
+ # :status => {:ok => 0, :error => 1, :retry => 2},
+ :search_mode => {:all => 0, :any => 1, :phrase => 2, :boolean => 3, :extended => 4},
+ :sort_mode => {:relevance => 0, :desc => 1, :asc => 2, :time => 3},
+ :attribute_type => {:integer => 1, :date => 2},
+ :group_by => {:day => 0, :week => 1, :month => 2, :year => 3, :attribute => 4}}
+
+ DEFAULTS = {:page => 1,
+ :models => nil,
+ :per_page => 20,
+ :sort_by => 'created_at',
+ :sort_mode => :relevance,
+ :weights => nil,
+ :search_mode => :extended,
+ :belongs_to => nil,
+ :raw_filters => {}}
+
+ VIEW_OPTIONS = {
+ :search_mode => {"all words" => "all", "some words" => "any", "exact phrase" => "phrase", "boolean" => "boolean", "extended" => "extended"}.sort,
+ :sort_mode => [["newest first", "desc"], ["oldest first", "asc"], ["relevance", "relevance"]]
+ } #, "Time" => :time }
+
+ MAX_RETRIES = 4
+
+ MODELS = begin
+ Hash[*open(CONF_PATH).readlines.select{|s| s =~ /^(source \w|sql_query )/}.in_groups_of(2).map{|model, _id| [model[/source ([\w\d_-]*)/, 1].classify, _id[/(\d*) AS class_id/, 1].to_i]}.flatten] # XXX blargh
+ rescue
+ puts "Ultrasphinx configuration file not found for #{ENV['RAILS_ENV'].inspect} environment"
+ {}
+ end
+
+ MAX_MATCHES = DAEMON_SETTINGS["max_matches"].to_i
+
+ QUERY_TYPES = [:sphinx, :google]
+
+ #INDEXES = YAML.load_file(MODELS_HASH).keys.select{|x| !x.blank?}.map(&:tableize) + ["complete"]
+
+ attr_reader :options
+ attr_reader :query
+ attr_reader :results
+ attr_reader :response
+ attr_reader :subtotals
+
+ def self.find *args
+ args.push({}) unless args.last.is_a? Hash
+ args.unshift :sphinx if args.size == 2
+ self.new(*args).run
+ end
+
+ def initialize style, query, opts={}
+ opts = {} unless opts
+ raise Sphinx::SphinxArgumentError, "Invalid query type: #{style.inspect}" unless QUERY_TYPES.include? style
+ @query = (query || "")
+ @parsed_query = style == :google ? parse_google(@query) : @query
+
+ @results = []
+ @subtotals = {}
+ @response = {}
+
+ @options = DEFAULTS.merge(Hash[*opts.map do |key, value|
+ [key.to_sym,
+ if value.respond_to?(:to_i) && value.to_i.to_s == value
+ value.to_i
+ elsif value == ""
+ nil
+ elsif value.is_a? String and key.to_s != "sort_by"
+ value.to_sym
+ else
+ value
+ end]
+ end._flatten_once])
+ @options[:models] = Array(@options[:models])
+
+ raise Sphinx::SphinxArgumentError, "Invalid options: #{@extra * ', '}" if (@extra = (@options.keys - (OPTIONS.merge(DEFAULTS).keys))).size > 0
+ @options[:belongs_to] = @options[:belongs_to].name if @options[:belongs_to].is_a? Class
+ end
+
+ def run(instantiate = true)
+ # set all the options
+ @request = Sphinx::Client.new
+ @request.SetServer(PLUGIN_SETTINGS['server_host'], PLUGIN_SETTINGS['server_port'])
+ offset, limit = options[:per_page] * (options[:page] - 1), options[:per_page]
+ @request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
+ @request.SetMatchMode map_option(:search_mode)
+ @request.SetSortMode map_option(:sort_mode), options[:sort_by]
+
+ if weights = options[:weights]
+ @request.SetWeights(Fields.instance.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
+ array << (weights[field] || 1.0)
+ end)
+ end
+
+ #@request.SetIdRange # never useful
+
+ unless options[:models].compact.empty?
+ @request.SetFilter 'class_id', options[:models].map{|m| MODELS[m.to_s]}
+ end
+
+ if options[:belongs_to]
+ # not sure if this actually works
+ raise Sphinx::SphinxArgumentError, "You must specify a specific :model when using :belongs_to" unless options[:models] and options(:models).size == 1
+ parent = options[:belongs_to]
+ association = parent.class.reflect_on_all_associations.select{|a| options[:models] == a.klass.name}.first
+ if MODELS.keys.inject(true) {|b, klass| b and klass.constantize.columns.map(&:name).include? association.options[:foreign_key]}
+ key_name = "global_#{association.options[:foreign_key]}"
+ else
+ key_name = "#{options[:models].first.tableize}_#{association.options[:foreign_key]}"
+ end
+ @request.SetFilter key_name, [parent.id]
+ end
+
+ options[:raw_filters].each do |field, value|
+ begin
+ unless value.is_a? Range
+ @request.SetFilter field, Array(value)
+ else
+ min, max = [value.first, value.last].map do |x|
+ x._to_numeric if x.is_a? String
+ end
+ unless min.class != max.class
+ min, max = max, min if min > max
+ @request.SetFilterRange field, min, max
+ end
+ end
+ rescue NoMethodError => e
+ raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
+ end
+ end
+ # @request.SetGroup # not useful
+
+ tries = 0
+ logger.info "Ultrasphinx: Searching for #{query.inspect} (parsed as #{@parsed_query.inspect}), options #{@options.inspect}"
+ begin
+ # run the search
+ @response = @request.Query(@parsed_query)
+ logger.info "Ultrasphinx: Search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total}/#{response['total_found']} in #{time} seconds."
+
+ # get all the subtotals, XXX should be configurable
+ # andrew says there's a better way to do this
+ filtered_request = @request.dup
+ MODELS.each do |key, value|
+ filtered_request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
+ filtered_request.SetFilter 'class_id', [value]
+ @subtotals[key] = @request.Query(@parsed_query)['total_found']
+ # logger.debug "Ultrasphinx: Found #{subtotals[key]} records for sub-query #{key} (filters: #{filtered_request.instance_variable_get('@filters').inspect})"
+ end
+
+ @results = instantiate ? reify_results(response['matches']) : response['matches']
+ rescue Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::EPIPE => e
+ if (tries += 1) <= MAX_RETRIES
+ logger.warn "Ultrasphinx: Restarting query (#{tries} attempts already) (#{e})"
+ if tries == MAX_RETRIES
+ logger.warn "Ultrasphinx: Sleeping..."
+ sleep(3)
+ end
+ retry
+ else
+ logger.warn "Ultrasphinx: Query failed"
+ raise e
+ end
+ end
+ end
+
+ def excerpt
+ run unless run?
+ return if results.empty?
+
+ maps = results.map do |record|
+ [record] <<
+ [[:title, :name], [:body, :description, :content]].map do |methods|
+ methods.detect{|x| record.respond_to? x}
+ end
+ end
+
+ texts = maps.map do |record, methods|
+ [record.send(methods[0]), record.send(methods[1])]
+ end.flatten.map{|x| x.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ')}
+
+ responses = @request.BuildExcerpts(
+ texts,
+ "complete",
+ @parsed_query.gsub(/AND|OR|NOT|\@\w+/, ""),
+ :before_match => "<strong>", :after_match => "</strong>",
+ :chunk_separator => "...",
+ :limit => 200,
+ :around => 1).in_groups_of(2)
+
+ maps.each_with_index do |record_and_methods, i|
+ record, methods = record_and_methods
+ 2.times do |j|
+ record._metaclass.send(:define_method, methods[j]) { responses[i][j] }
+ end
+ end
+
+ @results = maps.map(&:first).map(&:freeze)
+ end
+
+
+ def total
+ [response['total_found'], MAX_MATCHES].min
+ end
+
+ def found
+ results.size
+ end
+
+ def time
+ response['time']
+ end
+
+ def run?
+ !response.blank?
+ end
+
+ def page
+ options[:page]
+ end
+
+ def per_page
+ options[:per_page]
+ end
+
+ def last_page
+ (total / per_page) + (total % per_page == 0 ? 0 : 1)
+ end
+
+ private
+
+ def parse_google query
+ return unless query
+ # alters google-style querystring into sphinx-style
+ query = query.gsub(" AND ", " ").scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/) # thanks chris2
+ query.each_with_index do |token, index|
+
+ if token =~ /^(.*?)\((.*)\)(.*?$)/
+ token = query[index] = "#{$1}(#{parse_google $2})#{$3}" # recurse for parens
+ end
+
+ case token
+ when "OR"
+ query[index] = "|"
+ when "NOT"
+ query[index] = "-#{query[index+1]}"
+ query[index+1] = ""
+ when "AND"
+ query[index] = ""
+ when /:/
+ query[query.size] = "@" + query[index].sub(":", " ")
+ query[index] = ""
+ end
+
+ end
+ query.join(" ").squeeze(" ")
+ end
+
+ def reify_results(sphinx_ids)
+ sphinx_ids = sphinx_ids.sort_by{|k, v| v['index']}.map(&:first).reverse # sort and then toss the rest of the data
+
+ # find associated record ids
+ ids = Hash.new([])
+ sphinx_ids.each do |_id|
+ ids[MODELS.invert[_id % MODELS.size]] += [_id / MODELS.size] # yay math
+ end
+ raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
+
+ # fetch them for real
+ results = []
+ ids.each do |model, id_set|
+ klass = model.constantize
+ finder = klass.respond_to?(:get_cache) ? :get_cache : :find
+ logger.debug "Ultrasphinx: using #{klass.name}\##{finder} as finder method"
+
+ begin
+ results += case instances = id_set.map {|id| klass.send(finder, id)} # XXX temporary until we update cache_fu
+ when Hash
+ instances.values
+ when Array
+ instances
+ else
+ Array(instances)
+ end
+ rescue ActiveRecord:: ActiveRecordError => e
+ raise Sphinx::SphinxResponseError, e.inspect
+ end
+ end
+
+ # put them back in order
+ results.sort_by do |r|
+ raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
+ index = (sphinx_ids.index(sphinx_id = r.id * MODELS.size + MODELS[r.class.base_class.name]))
+ raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
+ index / sphinx_ids.size.to_f
+ end
+ end
+
+ def map_option opt
+ opt = opt.to_sym
+ OPTIONS[opt][options[opt]] or raise Sphinx::SphinxArgumentError, "Invalid option value :#{opt} => #{options[opt]}"
+ end
+
+ def logger
+ RAILS_DEFAULT_LOGGER
+ end
+
+ end
+end
View
0 lib/spell.rb → lib/ultrasphinx/spell.rb
File renamed without changes.
View
194 lib/ultrasphinx/ultrasphinx.rb
@@ -0,0 +1,194 @@
+
+require 'yaml'
+
+module Ultrasphinx
+
+ class Exception < ::Exception
+ end
+ class ConfigurationError < Exception
+ end
+ class DaemonError < Exception
+ end
+
+ CONF_PATH = "#{RAILS_ROOT}/config/environments/sphinx.#{RAILS_ENV}.conf"
+ ENV_BASE_PATH = "#{RAILS_ROOT}/config/environments/sphinx.#{RAILS_ENV}.base"
+ GENERIC_BASE_PATH = "#{RAILS_ROOT}/config/sphinx.base"
+ BASE_PATH = (File.exist?(ENV_BASE_PATH) ? ENV_BASE_PATH : GENERIC_BASE_PATH)
+
+ raise ConfigurationError, "Please create a #{BASE_PATH} configuration file." unless File.exist? BASE_PATH
+
+ def self.options_for(heading)
+ section = open(BASE_PATH).read[/^#{heading}.*?\{(.*?)\}/m, 1]
+ raise "missing heading #{heading} in #{BASE_PATH}" if section.nil?
+ lines = section.split("\n").reject { |l| l.strip.empty? }
+ options = lines.map do |c|
+ c =~ /\s*(.*?)\s*=\s*([^\#]*)/
+ $1 ? [$1, $2.strip] : []
+ end
+ Hash[*options.flatten]
+ end
+
+ SOURCE_DEFAULTS = %(
+ strip_html = 0
+ index_html_attrs =
+ sql_query_pre = SET SESSION group_concat_max_len = 65535
+ sql_query_pre = SET NAMES utf8
+ sql_query_post =
+ sql_range_step = 20000
+ )
+
+ MAX_INT = 2**32-1
+ COLUMN_TYPES = {:string => 'text', :text => 'text', :integer => 'numeric', :date => 'date', :datetime => 'date' }
+ CONFIG_MAP = {:username => 'sql_user',
+ :password => 'sql_pass',
+ :host => 'sql_host',
+ :database => 'sql_db',
+ :adapter => 'type',
+ :port => 'sql_port',
+ :socket => 'sql_sock'}
+ OPTIONAL_SPHINX_KEYS = ['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
+ PLUGIN_SETTINGS = options_for('ultrasphinx')
+ DAEMON_SETTINGS = options_for('searchd')
+
+ MAX_WORDS = 2**16 # maximum number of stopwords built
+ STOPWORDS_PATH = "#{Ultrasphinx::PLUGIN_SETTINGS['path']}/stopwords.txt}"
+
+ #logger.debug "Ultrasphinx options are: #{PLUGIN_SETTINGS.inspect}"
+
+ MODELS_HASH = {}
+
+ class << self
+ def load_constants
+ Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
+ next if filename =~ /\/(\.svn|CVS|\.bzr)\//
+ begin
+ open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
+ rescue Object => e
+ puts "Ultrasphinx: warning; autoload error on #{filename}"
+ end
+ end
+ Fields.instance.configure(MODELS_HASH)
+ end
+
+ def configure
+ load_constants
+
+ puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment"
+ puts "Available models are #{MODELS_HASH.keys.to_sentence}"
+ File.open(CONF_PATH, "w") do |conf|
+ conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
+
+ conf.puts "\n# #{BASE_PATH}"
+ conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
+
+ index_list = {"complete" => []}
+
+ conf.puts "\n# Source configuration\n\n"
+
+ puts "Generating SQL"
+ MODELS_HASH.each_with_index do |model_options, class_id|
+ model, options = model_options
+ klass, source = model.constantize, model.tableize
+
+# puts "SQL for #{model}"
+
+ index_list[source] = [source]
+ index_list["complete"] << source
+
+ conf.puts "source #{source}\n{"
+ conf.puts SOURCE_DEFAULTS
+ klass.connection.instance_variable_get("@config").each do |key, value|
+ conf.puts "#{CONFIG_MAP[key]} = #{value}" if CONFIG_MAP[key]
+ end
+
+ table, pkey = klass.table_name, klass.primary_key
+ condition_strings, join_strings = Array(options[:conditions]).map{|condition| "(#{condition})"}, []
+ column_strings = ["(#{table}.#{pkey} * #{MODELS_HASH.size} + #{class_id}) AS id",
+ "#{class_id} AS class_id", "'#{klass.name}' AS class"]
+ remaining_columns = Fields.instance.keys - ["class", "class_id"]
+
+ conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
+
+ options[:fields].to_a.each do |f|
+ column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
+ column_strings << Fields.instance.cast("#{table}.#{column}", as)
+ remaining_columns.delete(as)
+ end
+
+ options[:includes].to_a.each do |join|
+ join_klass = join[:model].constantize
+ association = klass.reflect_on_association(join[:model].underscore.to_sym)
+ join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " +
+ if (macro = association.macro) == :belongs_to
+ "#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}"
+ elsif macro == :has_one
+ "#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}"
+ else
+ raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
+ end
+ column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
+ remaining_columns.delete(join[:as] || join[:field])
+ end
+
+ options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
+ # only has_many's right now
+ join_klass = group[:model].constantize
+ association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym : group[:model].underscore.pluralize.to_sym)
+ join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
+ column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
+ remaining_columns.delete(group[:as])
+ end
+
+ options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
+ column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
+ remaining_columns.delete(concat[:as])
+ end
+
+# puts "#{model} has #{remaining_columns.inspect} remaining"
+ remaining_columns.each do |field|
+ column_strings << Fields.instance.null(field)
+ end
+
+ query_strings = ["SELECT", column_strings.sort_by do |string|
+ # sphinx wants them always in the same order, but "id" must be first
+ (field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
+ end.join(", ")]
+ query_strings << "FROM #{table}"
+ query_strings += join_strings.uniq
+ query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
+ query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
+ query_strings << "GROUP BY id"
+
+ conf.puts "sql_query = #{query_strings.join(" ")}"
+
+ groups = []
+ # group and date sorting params... this really only would have to be run once
+ Fields.instance.each do |field, type|
+ case type
+ when 'numeric'
+ groups << "sql_group_column = #{field}"
+ when 'date'
+ groups << "sql_date_column = #{field}"
+ end
+ end
+ conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
+ conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODELS_HASH.size})"
+ conf.puts "}\n\n"
+ end
+
+ conf.puts "\n# Index configuration\n\n"
+ index_list.to_a.sort_by {|x| x.first == "complete" ? 1 : 0}.each do |name, source_list|
+ conf.puts "index #{name}\n{"
+ source_list.each {|source| conf.puts "source = #{source}"}
+ OPTIONAL_SPHINX_KEYS.each do |key|
+ conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
+ end
+ conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{name}"
+ conf.puts "}\n\n"
+ end
+ end
+
+ end
+
+ end
+end
View
14 tasks/ultrasphinx.rake
@@ -14,7 +14,7 @@ namespace :ultrasphinx do
cmd << " --rotate" if daemon_running?
cmd << " complete"
puts cmd
- exec cmd
+ system cmd
end
namespace :daemon do
@@ -23,13 +23,19 @@ namespace :ultrasphinx do
raise Ultrasphinx::DaemonError, "Already running" if daemon_running?
# remove lockfiles
Dir[Ultrasphinx::PLUGIN_SETTINGS["path"] + "*spl"].each {|file| File.delete(file)}
- exec "searchd --config #{Ultrasphinx::CONF_PATH}"
+ system "searchd --config #{Ultrasphinx::CONF_PATH}"
+ if daemon_running?
+ puts "Started successfully"
+ else
+ puts "Failed to start"
+ end
end
desc "Stop the search daemon"
task :stop => [:environment] do
raise Ultrasphinx::DaemonError, "Doesn't seem to be running" unless daemon_running?
system "kill #{daemon_pid}"
+ puts "Stopped"
end
desc "Restart the search daemon"
@@ -55,9 +61,9 @@ namespace :ultrasphinx do
desc "Check if the search daemon is running"
task :status => :environment do
if daemon_running?
- puts "Running."
+ puts "Daemon is running"
else
- puts "Stopped."
+ puts "Daemon is stopped"
end
end
end

0 comments on commit 09b10a8

Please sign in to comment.