/
configure.rb
365 lines (283 loc) · 15.4 KB
/
configure.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
module Ultrasphinx
class Configure
class << self
include Associations
# Force all the indexed models to load and register in the MODEL_CONFIGURATION hash.
def load_constants
Dir.chdir "#{RAILS_ROOT}/app/models/" do
Dir["**/*.rb"].each do |filename|
open(filename) do |file|
begin
if file.grep(/^\s+is_indexed/).any?
filename = filename[0..-4]
begin
File.basename(filename).camelize.constantize
rescue NameError => e
filename.camelize.constantize
end
end
rescue Object => e
say "warning: critical autoload error on #{filename}; try referencing \"#{filename.camelize}\" directly in the console"
#say e.backtrace.join("\n") if RAILS_ENV == "development"
end
end
end
end
# Build the field-to-type mappings.
Fields.instance.configure(MODEL_CONFIGURATION)
end
# Main SQL builder.
def run
load_constants
say "rebuilding configurations for #{RAILS_ENV} environment"
say "available models are #{MODEL_CONFIGURATION.keys.to_sentence}"
File.open(CONF_PATH, "w") do |conf|
conf.puts global_header
say "generating SQL"
INDEXES.each do |index|
sources = []
cached_groups = Fields.instance.groups.join("\n")
MODEL_CONFIGURATION.each_with_index do |model_and_options, class_id|
# This relies on hash sort order being deterministic per-machine
model, options = model_and_options
klass = model.constantize
source = "#{model.tableize.gsub('/', '__')}_#{index}"
if index != DELTA_INDEX or options['delta']
# If we are building the delta, we only want to include the models that requested it
conf.puts build_source(index, Fields.instance, model, options, class_id, klass, source, cached_groups)
sources << source
end
end
if sources.any?
# Don't generate a delta index if there are no delta tables
conf.puts build_index(index, sources)
end
end
end
end
######
private
def global_header
["\n# Auto-generated at #{Time.now}.",
"# Hand modifications will be overwritten.",
"# #{BASE_PATH}\n",
INDEXER_SETTINGS.except('delta')._to_conf_string('indexer'),
"",
DAEMON_SETTINGS._to_conf_string("searchd")]
end
def setup_source_database(klass)
# Supporting Postgres now
connection_settings = klass.connection.instance_variable_get("@config")
adapter_defaults = DEFAULTS[ADAPTER]
raise ConfigurationError, "Unsupported database adapter" unless adapter_defaults
conf = [adapter_defaults]
connection_settings.reverse_merge(CONNECTION_DEFAULTS).each do |key, value|
conf << "#{CONFIG_MAP[key]} = #{value}" if CONFIG_MAP[key]
end
conf.sort.join("\n")
end
def build_delta_condition(index, klass, options)
if index == DELTA_INDEX and options['delta']
# Add delta condition if necessary
table, field = klass.table_name, options['delta']['field']
source_string = "#{table}.#{field}"
delta_column = klass.columns_hash[field]
if delta_column
raise ConfigurationError, "#{source_string} is not a :datetime" unless delta_column.type == :datetime
if (options['fields'] + options['concatenate'] + options['include']).detect { |entry| entry['sortable'] }
# Warning about the sortable problem
# XXX Kind of in an odd place, but I want to happen at index time
Ultrasphinx.say "warning; text sortable columns on #{klass.name} will return wrong results with partial delta indexing"
end
delta = INDEXER_SETTINGS['delta']
if delta
string = "#{source_string} > #{SQL_FUNCTIONS[ADAPTER]['delta']._interpolate(delta)}";
else
raise ConfigurationError, "No 'indexer { delta }' setting specified in '#{BASE_PATH}'"
end
else
Ultrasphinx.say "warning; #{klass.name} will reindex the entire table during delta indexing"
end
end
end
def setup_source_arrays(index, klass, fields, class_id, conditions, order)
condition_strings = Array(conditions).map do |condition|
"(#{condition})"
end
column_strings = [
"(#{klass.table_name}.#{klass.primary_key} * #{MODEL_CONFIGURATION.size} + #{class_id}) AS id",
"#{class_id} AS class_id", "'#{klass.name}' AS class"]
remaining_columns = fields.types.keys - ["class", "class_id"]
[column_strings, [], condition_strings, [], false, remaining_columns, order]
end
def range_select_string(klass, delta_condition)
["sql_query_range = SELECT",
SQL_FUNCTIONS[ADAPTER]['range_cast']._interpolate("MIN(#{klass.primary_key})"),
",",
SQL_FUNCTIONS[ADAPTER]['range_cast']._interpolate("MAX(#{klass.primary_key})"),
"FROM #{klass.table_name}",
("WHERE #{delta_condition}" if delta_condition),
].join(" ")
end
def query_info_string(klass, class_id)
"sql_query_info = SELECT * FROM #{klass.table_name} WHERE #{klass.table_name}.#{klass.primary_key} = (($id - #{class_id}) / #{MODEL_CONFIGURATION.size})"
end
def build_source(index, fields, model, options, class_id, klass, source, groups)
column_strings, join_strings, condition_strings, group_bys, use_distinct, remaining_columns, order =
setup_source_arrays(
index, klass, fields, class_id, options['conditions'], options['order'])
delta_condition =
build_delta_condition(
index, klass, options)
condition_strings << delta_condition if delta_condition
column_strings, join_strings, group_bys, remaining_columns =
build_regular_fields(
klass, fields, options['fields'], column_strings, join_strings, group_bys, remaining_columns)
column_strings, join_strings, group_bys, remaining_columns =
build_includes(
klass, fields, options['include'], column_strings, join_strings, group_bys, remaining_columns)
column_strings, join_strings, group_bys, use_distinct, remaining_columns =
build_concatenations(
klass, fields, options['concatenate'], column_strings, join_strings, group_bys, use_distinct, remaining_columns)
column_strings = add_missing_columns(fields, remaining_columns, column_strings)
["\n# Source configuration\n\n",
"source #{source}\n{",
SOURCE_SETTINGS._to_conf_string,
setup_source_database(klass),
range_select_string(klass, delta_condition),
build_query(klass, column_strings, join_strings, condition_strings, use_distinct, group_bys, order),
"\n" + groups,
query_info_string(klass, class_id),
"}\n\n"]
end
def build_query(klass, column_strings, join_strings, condition_strings, use_distinct, group_bys, order)
primary_key = "#{klass.table_name}.#{klass.primary_key}"
group_bys = case ADAPTER
when 'mysql'
primary_key
when 'postgresql'
# Postgres is very fussy about GROUP_BY
([primary_key] + group_bys.reject {|s| s == primary_key}.uniq.sort).join(', ')
end
["sql_query =",
"SELECT",
# Avoid DISTINCT; it destroys performance
column_strings.sort_by do |string|
# Sphinx wants them always in the same order, but "id" must be first
(field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
end.join(", "),
"FROM #{klass.table_name}",
join_strings.uniq,
"WHERE #{primary_key} >= $start AND #{primary_key} <= $end",
condition_strings.uniq.map {|condition| "AND #{condition}" },
"GROUP BY #{group_bys}",
("ORDER BY #{order}" if order)
].flatten.compact.join(" ")
end
def add_missing_columns(fields, remaining_columns, column_strings)
remaining_columns.each do |field|
column_strings << fields.null(field)
end
column_strings
end
def build_regular_fields(klass, fields, entries, column_strings, join_strings, group_bys, remaining_columns)
entries.to_a.each do |entry|
source_string = if entry['sortable']
entry['field'] # Use the alias
else
"#{entry['table_alias']}.#{entry['field']}" # Use the column
end
group_bys << source_string
column_strings, remaining_columns = install_field(fields, source_string, entry['as'], entry['function_sql'], entry['facet'], column_strings, remaining_columns)
end
[column_strings, join_strings, group_bys, remaining_columns]
end
def build_includes(klass, fields, entries, column_strings, join_strings, group_bys, remaining_columns)
entries.to_a.each do |entry|
raise ConfigurationError, "You must identify your association with either class_name or association_name, but not both" if entry['class_name'] && entry ['association_name']
association = get_association(klass, entry)
# You can use 'class_name' and 'association_sql' to associate to a model that doesn't actually
# have an association.
join_klass = association ? association.class_name.constantize : entry['class_name'].constantize
raise ConfigurationError, "Unknown association from #{klass} to #{entry['class_name'] || entry['association_name']}" if not association and not entry['association_sql']
join_strings = install_join_unless_association_sql(entry['association_sql'], nil, join_strings) do
"LEFT OUTER JOIN #{join_klass.table_name} AS #{entry['table_alias']} ON " +
if (macro = association.macro) == :belongs_to
"#{entry['table_alias']}.#{join_klass.primary_key} = #{klass.table_name}.#{association.primary_key_name}"
elsif macro == :has_one
"#{klass.table_name}.#{klass.primary_key} = #{entry['table_alias']}.#{association.primary_key_name}"
else
raise ConfigurationError, "Unidentified association macro #{macro.inspect}. Please use the :association_sql key to manually specify the JOIN syntax."
end
end
source_string = "#{entry['table_alias']}.#{entry['field']}"
group_bys << source_string
column_strings, remaining_columns = install_field(fields, source_string, entry['as'], entry['function_sql'], entry['facet'], column_strings, remaining_columns)
end
[column_strings, join_strings, group_bys, remaining_columns]
end
def build_concatenations(klass, fields, entries, column_strings, join_strings, group_bys, use_distinct, remaining_columns)
entries.to_a.each do |entry|
if entry['field']
# Group concats
# Only has_many's or explicit sql right now.
association = get_association(klass, entry)
# You can use 'class_name' and 'association_sql' to associate to a model that doesn't actually
# have an association. The automatic choice of a table alias chosen might be kind of strange.
join_klass = association ? association.class_name.constantize : entry['class_name'].constantize
join_strings = install_join_unless_association_sql(entry['association_sql'], nil, join_strings) do
# XXX The foreign key is not verified for polymorphic relationships.
association = get_association(klass, entry)
"LEFT OUTER JOIN #{join_klass.table_name} AS #{entry['table_alias']} ON #{klass.table_name}.#{klass.primary_key} = #{entry['table_alias']}.#{association.primary_key_name}" +
# XXX Is this valid?
(entry['conditions'] ? " AND (#{entry['conditions']})" : "")
end
source_string = "#{entry['table_alias']}.#{entry['field']}"
order_string = ("ORDER BY #{entry['order']}" if entry['order'])
# We are using the field in an aggregate, so we don't want to add it to group_bys
source_string = SQL_FUNCTIONS[ADAPTER]['group_concat']._interpolate(source_string, order_string)
use_distinct = true
column_strings, remaining_columns = install_field(fields, source_string, entry['as'], entry['function_sql'], entry['facet'], column_strings, remaining_columns)
elsif entry['fields']
# Regular concats
source_string = "CONCAT_WS(' ', " + entry['fields'].map do |subfield|
"#{entry['table_alias']}.#{subfield}"
end.each do |subsource_string|
group_bys << subsource_string
end.join(', ') + ")"
column_strings, remaining_columns = install_field(fields, source_string, entry['as'], entry['function_sql'], entry['facet'], column_strings, remaining_columns)
else
raise ConfigurationError, "Invalid concatenate parameters for #{model}: #{entry.inspect}."
end
end
[column_strings, join_strings, group_bys, use_distinct, remaining_columns]
end
def build_index(index, sources)
["\n# Index configuration\n\n",
"index #{index}\n{",
sources.sort.map do |source|
" source = #{source}"
end.join("\n"),
INDEX_SETTINGS.merge('path' => INDEX_SETTINGS['path'] + "/sphinx_index_#{index}")._to_conf_string,
"}\n\n"]
end
def install_field(fields, source_string, as, function_sql, with_facet, column_strings, remaining_columns)
source_string = function_sql._interpolate(source_string) if function_sql
column_strings << fields.cast(source_string, as)
remaining_columns.delete(as)
# Generate hashed integer fields for text grouping
if with_facet
column_strings << "#{SQL_FUNCTIONS[ADAPTER]['hash']._interpolate(source_string)} AS #{as}_facet"
remaining_columns.delete("#{as}_facet")
end
[column_strings, remaining_columns]
end
def install_join_unless_association_sql(association_sql, join_string, join_strings)
join_strings << (association_sql or join_string or yield)
end
def say(s)
Ultrasphinx.say s
end
end
end
end