/
weighted_query_strategy.rb
670 lines (630 loc) · 29.4 KB
/
weighted_query_strategy.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
module Articles
module Feeds
# @api private
#
# This is an experimental object that we're refining to be a
# competitor to the existing feed strategies.
#
# It works to implement conceptual parity with two methods of
# Articles::Feeds::LargeForemExperimental:
#
# - #default_home_feed
# - #more_comments_minimal_weight_randomized
#
# What do we mean by "conceptual parity"? Those two methods are
# used in the two feeds controllers: StoriesController and
# Stories::FeedsController. And while they use some of the
# internal tooling there's some notable subtle differences.
#
# Where this class differs is that it is aiming to build the feed
# based from the given user's perspective. Whereas the other Feed
# algorithm starts with a list of candidates that are global to
# the given Forem (e.g., starting the base query from the
# `articles.score`, a volatile and swingy value that favors global
# reactions over user desired content).
#
# This is not quite a chronological only feed but could be easily
# modified to favor that.
#
# @note One possible short-coming is that the query does not
# account for the Forem's administrators.
# @note For those considering extending this, be very mindful of
# SQL injection.
class WeightedQueryStrategy
# This constant defines the allowable relevance scoring methods.
#
# A scoring method should be a SQL fragment that produces a
# value between 0 and 1. The closer the value is to 1, the more
# relevant the article is for the given user. Note: the values
# are multiplicative. Make sure to consider if you want a 0
# multiplier for your score. Aspirationally, you may want to
# think of the relevance_score as the range (0,1]. That is
# greater than 0 and less than or equal to 1.
#
# In addition, as part of initialization, the caller can
# configure each of the scoring methods :cases and :fallback.
#
# Each scoring method has the following keys:
#
# - clause: [Required] The SQL clause statement; note: there
# exists a coupling between the clause and the SQL
# fragments that join the various tables. Also, under
# no circumstances should you allow any user value for
# this, as it is not something we can sanitize.
#
# - cases: [Required] An Array of Arrays, the first value is
# what matches the clause, the second value is the
# multiplicative factor.
#
# - fallback: [Required] When no case is matched use this
# factor.
#
# - requires_user: [Required] Does this scoring method require a
# given user. If not, don't use it if we don't
# have a nil user.
#
# - group_by: [Optional] An SQL fragment that ensures a valid
# postgres statement in older versions of postgres.
# See
# https://github.com/forem/forem/pull/15240#discussion_r750392321
# for further sleuthing details. When you reference
# a field in the clause, you likely need to include
# a corresponding :group_by attribute.
#
# - joins: [Optional] An SQL fragment that defines the join
# necessary to fulfill the clause of the scoring
# method.
#
# - enabled: [Optional] When false, we won't include this. By
# default a scoring method is enabled.
#
# @note The group by clause appears necessary for postgres
# versions and Heroku configurations of current (as of
# <2021-11-16 Tue>) DEV.to installations.
SCORING_METHOD_CONFIGURATIONS = {
# Weight to give based on the age of the article.
daily_decay_factor: {
clause: "(current_date - articles.published_at::date)",
cases: [
[0, 1], [1, 0.99], [2, 0.985],
[3, 0.98], [4, 0.975], [5, 0.97],
[6, 0.965], [7, 0.96], [8, 0.955],
[9, 0.95], [10, 0.945], [11, 0.94],
[12, 0.935], [13, 0.93], [14, 0.925]
],
fallback: 0.9,
requires_user: false,
group_by: "articles.published_at"
},
# Weight to give for the number of comments on the article
# from other users that the given user follows.
comment_count_by_those_followed_factor: {
clause: "COUNT(comments_by_followed.id)",
cases: [[0, 0.95], [1, 0.98], [2, 0.99]],
fallback: 0.93,
requires_user: true,
joins: ["LEFT OUTER JOIN follows AS followed_user
ON articles.user_id = followed_user.followable_id
AND followed_user.followable_type = 'User'
AND followed_user.follower_id = :user_id
AND followed_user.follower_type = 'User'",
"LEFT OUTER JOIN comments AS comments_by_followed
ON comments_by_followed.commentable_id = articles.id
AND comments_by_followed.commentable_type = 'Article'
AND followed_user.followable_id = comments_by_followed.user_id
AND followed_user.followable_type = 'User'
AND comments_by_followed.deleted = false
AND comments_by_followed.created_at > :oldest_published_at"]
},
# Weight to give to the number of comments on the article.
comments_count_factor: {
clause: "articles.comments_count",
cases: (0..9).map { |n| [n, 0.8 + (0.02 * n)] },
fallback: 1,
requires_user: false,
group_by: "articles.comments_count"
},
# Weight to give based on the difference between experience
# level of the article and given user.
experience_factor: {
clause: "ROUND(ABS(articles.experience_level_rating - (SELECT
(CASE
WHEN experience_level IS NULL THEN :default_user_experience_level
ELSE experience_level END ) AS user_experience_level
FROM users_settings WHERE users_settings.user_id = :user_id
)))",
cases: [[0, 1], [1, 0.98], [2, 0.97], [3, 0.96], [4, 0.95], [5, 0.94]],
fallback: 0.93,
requires_user: true,
group_by: "articles.experience_level_rating",
enabled: false
},
# Weight to give for feature or unfeatured articles.
featured_article_factor: {
clause: "(CASE articles.featured WHEN true THEN 1 ELSE 0 END)",
cases: [[1, 1]],
fallback: 0.85,
requires_user: false,
group_by: "articles.featured",
enabled: true
},
# Weight to give when the given user follows the article's
# author.
following_author_factor: {
clause: "COUNT(followed_user.follower_id)",
cases: [[0, 0.8], [1, 1]],
fallback: 1,
requires_user: true,
joins: ["LEFT OUTER JOIN follows AS followed_user
ON articles.user_id = followed_user.followable_id
AND followed_user.followable_type = 'User'
AND followed_user.follower_id = :user_id
AND followed_user.follower_type = 'User'"]
},
# Weight to give to the when the given user follows the
# article's organization.
following_org_factor: {
clause: "COUNT(followed_org.follower_id)",
cases: [[0, 0.95], [1, 1]],
fallback: 1,
requires_user: true,
joins: ["LEFT OUTER JOIN follows AS followed_org
ON articles.organization_id = followed_org.followable_id
AND followed_org.followable_type = 'Organization'
AND followed_org.follower_id = :user_id
AND followed_org.follower_type = 'User'"]
},
# Weight to give an article based on it's most recent comment.
latest_comment_factor: {
clause: "(current_date - MAX(comments.created_at)::date)",
cases: [[0, 1], [1, 0.9988]],
fallback: 0.988,
requires_user: false,
joins: ["LEFT OUTER JOIN comments
ON comments.commentable_id = articles.id
AND comments.commentable_type = 'Article'
AND comments.deleted = false
AND comments.created_at > :oldest_published_at"]
},
# Weight to give for the number of intersecting tags the given
# user follows and the article has.
matching_tags_factor: {
clause: "LEAST(10.0, SUM(followed_tags.points))::integer",
cases: (0..9).map { |n| [n, 0.70 + (0.0303 * n)] },
fallback: 1,
requires_user: true,
joins: ["LEFT OUTER JOIN taggings
ON taggings.taggable_id = articles.id
AND taggable_type = 'Article'",
"INNER JOIN tags
ON taggings.tag_id = tags.id",
"LEFT OUTER JOIN follows AS followed_tags
ON tags.id = followed_tags.followable_id
AND followed_tags.followable_type = 'ActsAsTaggableOn::Tag'
AND followed_tags.follower_type = 'User'
AND followed_tags.follower_id = :user_id
AND followed_tags.explicit_points >= 0"]
},
# Weight privileged user's reactions.
privileged_user_reaction_factor: {
clause: "(CASE
WHEN articles.privileged_users_reaction_points_sum < :negative_reaction_threshold THEN -1
WHEN articles.privileged_users_reaction_points_sum > :positive_reaction_threshold THEN 1
ELSE 0 END)",
cases: [[-1, 0.2],
[1, 1]],
fallback: 0.95,
requires_user: false,
group_by: "articles.privileged_users_reaction_points_sum"
},
# Weight to give for the number of reactions on the article.
reactions_factor: {
clause: "articles.reactions_count",
cases: [
[0, 0.9988], [1, 0.9988], [2, 0.9988],
[3, 0.9988]
],
fallback: 1,
requires_user: false,
group_by: "articles.reactions_count"
},
# Weight to give based on spaminess of the article.
spaminess_factor: {
clause: "articles.spaminess_rating",
cases: [[0, 1]],
fallback: 0,
requires_user: false,
group_by: "articles.spaminess_rating"
}
}.freeze
DEFAULT_USER_EXPERIENCE_LEVEL = 5
DEFAULT_NEGATIVE_REACTION_THRESHOLD = -10
DEFAULT_POSITIVE_REACTION_THRESHOLD = 10
# @param user [User] who are we querying for?
# @param number_of_articles [Integer] how many articles are we
# returning
# @param page [Integer] what is the pagination page
# @param tag [String, nil] this isn't implemented in other feeds
# so we'll see
# @param strategy [String, "original"] pass a current a/b test in
# @param config [Hash<Symbol, Object>] a list of configurations,
# see {#initialize} implementation details.
# @option config [Array<Symbol>] :scoring_configs
# allows for you to configure which methods you want to use.
# This is most relevant when running A/B testing.
# @option config [Integer] :negative_reaction_threshold, when
# the `articles.privileged_users_reaction_points_sum` is
# less than this amount, treat this is a negative
# reaction from moderators.
# @option config [Integer] :positive_reaction_threshold when
# the `articles.privileged_users_reaction_points_sum` is
# greater than this amount, treat this is a positive
# reaction from moderators.
#
# @todo I envision that we will tweak the factors we choose, so
# those will likely need some kind of structured consideration.
#
# rubocop:disable Layout/LineLength
def initialize(user: nil, number_of_articles: 50, page: 1, tag: nil, strategy: AbExperiment::ORIGINAL_VARIANT, **config)
@user = user
@number_of_articles = number_of_articles.to_i
@page = (page || 1).to_i
# TODO: The tag parameter is vestigial, there's no logic around this value.
@tag = tag
@strategy = strategy
@default_user_experience_level = config.fetch(:default_user_experience_level) { DEFAULT_USER_EXPERIENCE_LEVEL }
@negative_reaction_threshold = config.fetch(:negative_reaction_threshold, DEFAULT_NEGATIVE_REACTION_THRESHOLD)
@positive_reaction_threshold = config.fetch(:positive_reaction_threshold, DEFAULT_POSITIVE_REACTION_THRESHOLD)
@scoring_configs = config.fetch(:scoring_configs) { default_scoring_configs }
configure!(scoring_configs: @scoring_configs)
@oldest_published_at = Articles::Feeds.oldest_published_at_to_consider_for(
user: @user,
days_since_published: @days_since_published,
)
end
# rubocop:enable Layout/LineLength
# The goal of this query is to generate a list of articles that
# are relevant to the user's interest.
#
# First we give a score to an article based on it's publication
# date. The max possible score is 1.
#
# Then we begin multiplying that score by numbers between 0 and
# 1. The closer that multiplier is to 1 the "more relevant"
# that factor is to the user.
#
# @param only_featured [Boolean] select only articles that are
# "featured"
# @param must_have_main_image [Boolean] select only articles
# that have a main image.
# @param limit [Integer] the number of records to return
# @param offset [Integer] start the paging window at the given offset
# @param omit_article_ids [Array] don't include these articles
# in the search results
#
# @return ActiveRecord::Relation for Article
#
# @note This creates a complicated SQL query; well actually an
# ActiveRecord::Relation object on which you can call
# `to_sql`. Which you might find helpful to see what's
# really going on. A great place to do this is in the
# corresponding spec file. See the example below:
#
# @example
#
# user = User.first
# strategy = Articles::Feed::WeightedQueryStrategy.new(user: user)
# puts strategy.call.to_sql
#
# rubocop:disable Layout/LineLength
def call(only_featured: false, must_have_main_image: false, limit: default_limit, offset: default_offset, omit_article_ids: [])
repeated_query_variables = {
negative_reaction_threshold: @negative_reaction_threshold,
positive_reaction_threshold: @positive_reaction_threshold,
oldest_published_at: @oldest_published_at,
omit_article_ids: omit_article_ids,
now: Time.current
}
unsanitized_sub_sql = if @user.nil?
[
sql_sub_query_for_nil_user(
only_featured: only_featured,
must_have_main_image: must_have_main_image,
limit: limit,
offset: offset,
omit_article_ids: omit_article_ids,
),
repeated_query_variables,
]
else
[
sql_sub_query_for_existing_user(
only_featured: only_featured,
must_have_main_image: must_have_main_image,
limit: limit,
offset: offset,
omit_article_ids: omit_article_ids,
),
repeated_query_variables.merge({
user_id: @user.id,
default_user_experience_level: @default_user_experience_level.to_i
}),
]
end
# This sub-query allows us to take the hard work of the
# hand-coded unsanitized sql and create a sub-query that we
# can use to help ensure that we can use all of the
# ActiveRecord goodness of scopes (e.g.,
# limited_column_select) and eager includes.
finalized_results = Article.where(
Article.arel_table[:id].in(
Arel.sql(
Article.sanitize_sql(unsanitized_sub_sql),
),
),
).limited_column_select.includes(top_comments: :user)
final_order_logic(finalized_results)
end
# rubocop:enable Layout/LineLength
# Provided as a means to align interfaces with existing feeds.
#
# @note I really dislike this method name as it is opaque on
# it's purpose.
# @note We're specifically In the LargeForemExperimental implementation, the
# default home feed omits the featured story. In this
# case, I don't want to do that. Instead, I want to see
# how this behaves.
def default_home_feed(**)
call
end
alias more_comments_minimal_weight_randomized call
# The featured story should be the article that:
#
# - has the highest relevance score for the nil_user version
# - has a main image (see note below).
#
# The other articles should use the nil_user version and require
# the `featured = true` attribute. In my envisioned
# implementation, the pagination would omit the featured story.
#
# @return [Array<Article, Array<Article>] a featured story
# Article and an array of Article objects.
#
# @note Per prior work, a featured story is the article that has
# a main image, is marked as featured (e.g., featured =
# true), and has the highest relevance score. In the
# Articles::Feeds::LargeForemExperimental object we used
# the hotness_score to determine which to use. The
# hotness score is most analogue to how this class
# calculates the relevance score when we don't have a
# user.
#
# @note There are requests to allow for the featured article to
# NOT require a main image. We're still talking through
# what that means. This work relates to PR #15333.
#
# @note including the ** operator to mirror the method interface
# of the other feed strategies.
#
# @todo In other implementations, when user's aren't signed in
# we favor featured stories. But not so much that they're
# in the featured story. For non-signed in users, we may
# want to use a completely different set of scoring
# methods.
#
# @note The logic of Articles::Feeds::FindFeaturedStory does not
# (at present) filter apply an `Article.featured` scope.
# [@jeremyf] I have reported this in
# https://github.com/forem/forem/issues/15613 to get clarity
# from product.
def featured_story_and_default_home_feed(**)
# NOTE: See the
# https://github.com/forem/forem/blob/c1a3ba99ebec2e1ca220e9530c26cac7757c690b/app/services/articles/feeds/weighted_query_strategy.rb#L410-L426
# state of the codebase for the implementation of first selecting
# the feature story (using the same query logic) then selecting
# the related articles. With the below implementation, we need to
# do antics in the upstream javascript file to remove the featured
# file. See the
# https://github.com/forem/forem/blob/c1a3ba99ebec2e1ca220e9530c26cac7757c690b/app/javascript/articles/Feed.jsx#L42-L63
# for that process.
#
# tl;dr - the below implementation creates additional downstream complexities.
articles = call
featured_story = Articles::Feeds::FindFeaturedStory.call(articles)
[featured_story, articles]
end
private
def final_order_logic(articles)
articles.order(Arel.sql("RANDOM() ^ (1.0 / greatest(articles.score, 0.1)) DESC"))
end
# Concatenate the required group by clauses.
#
# @return [String]
def group_by_fields_as_sql
@group_by_fields.join(", ")
end
# The sql statement for selecting based on relevance scores that
# are for nil users.
# rubocop:disable Layout/LineLength
def sql_sub_query_for_nil_user(limit:, offset:, omit_article_ids:, only_featured: false, must_have_main_image: false)
# rubocop:enable Layout/LineLength
where_clause = build_sql_with_where_clauses(
only_featured: only_featured,
must_have_main_image: must_have_main_image,
omit_article_ids: omit_article_ids,
)
<<~THE_SQL_STATEMENT
SELECT articles.id
FROM articles
#{joins_clauses_as_sql}
WHERE #{where_clause}
GROUP BY articles.id
ORDER BY (#{relevance_score_components_as_sql}) DESC,
articles.published_at DESC
#{offset_and_limit_clause(offset: offset, limit: limit)}
THE_SQL_STATEMENT
end
# The sql statement for selecting based on relevance scores that
# are user required.
def sql_sub_query_for_existing_user(only_featured:, must_have_main_image:, limit:, offset:, omit_article_ids:)
where_clause = build_sql_with_where_clauses(
only_featured: only_featured,
must_have_main_image: must_have_main_image,
omit_article_ids: omit_article_ids,
)
<<~THE_SQL_STATEMENT
SELECT articles.id
FROM articles
#{joins_clauses_as_sql}
WHERE #{where_clause}
GROUP BY #{group_by_fields_as_sql}
ORDER BY (#{relevance_score_components_as_sql}) DESC,
articles.published_at DESC
#{offset_and_limit_clause(offset: offset, limit: limit)}
THE_SQL_STATEMENT
end
# @todo Do we want to favor published at for scoping, or do we
# want to consider `articles.last_comment_at`. If we do,
# we must remember to add an index to that field.
def build_sql_with_where_clauses(only_featured:, must_have_main_image:, omit_article_ids:)
where_clauses = "articles.published = true AND articles.published_at > :oldest_published_at"
# See Articles.published scope discussion regarding the query planner
where_clauses += " AND articles.published_at < :now"
# Without the compact, if we have `omit_article_ids: [nil]` we
# have the following SQL clause: `articles.id NOT IN (NULL)`
# which will immediately omit EVERYTHING from the query.
where_clauses += " AND articles.id NOT IN (:omit_article_ids)" unless omit_article_ids.compact.empty?
where_clauses += " AND articles.featured = true" if only_featured
where_clauses += " AND articles.main_image IS NOT NULL" if must_have_main_image
where_clauses
end
def offset_and_limit_clause(offset:, limit:)
if offset.to_i.positive?
Article.sanitize_sql_array(["OFFSET ? LIMIT ?", offset, limit])
else
Article.sanitize_sql_array(["LIMIT ?", limit])
end
end
def joins_clauses_as_sql
@joins.join("\n")
end
# We multiply the relevance score components together.
def relevance_score_components_as_sql
@relevance_score_components.join(" * \n")
end
def default_limit
@number_of_articles.to_i
end
def default_offset
return 0 if @page == 1
@page.to_i - (1 * default_limit)
end
# By default, we use all of the possible scoring methods.
def default_scoring_configs
SCORING_METHOD_CONFIGURATIONS
end
# This method converts the caller provided :scoring_configs into
# an array of SQL clause fragments.
#
# @param scoring_configs [Hash] the caller provided configurations.
#
# @see SCORING_METHOD_CONFIGURATIONS
# @note Be mindful to guard against SQL injection!
def configure!(scoring_configs:)
@days_since_published = Articles::Feeds::DEFAULT_DAYS_SINCE_PUBLISHED
@relevance_score_components = []
# By default we always need to group by the articles.id
# column. And as we add scoring methods to the query, we need
# to add additional group_by clauses based on the chosen
# scoring method.
@group_by_fields = ["articles.id"]
@joins = Set.new
unless @user.nil?
@joins << "LEFT OUTER JOIN user_blocks
ON user_blocks.blocked_id = articles.user_id
AND user_blocks.blocked_id IS NULL
AND user_blocks.blocker_id = :user_id"
end
# We looping through the possible scoring method
# configurations, we're only accepting those as valid
# configurations.
SCORING_METHOD_CONFIGURATIONS.each_pair do |valid_method_name, default_config|
# Don't attempt to use this factor if we don't have user.
next if default_config.fetch(:requires_user) && @user.nil?
# Don't proceed with this one if it's not enabled.
next unless default_config.fetch(:enabled, true)
# Ensure that we're only using a scoring configuration that
# the caller provided.
next unless scoring_configs.key?(valid_method_name)
scoring_config = scoring_configs.fetch(valid_method_name)
# If the caller didn't provide a hash for this scoring configuration,
# then we'll use the default configuration.
scoring_config = default_config unless scoring_config.is_a?(Hash)
# Change an alement of config via a/b test strategy
scoring_config = inject_config_ab_test(valid_method_name, scoring_config)
# This scoring method requires a group by clause.
@group_by_fields << default_config[:group_by] if default_config.key?(:group_by)
@joins += default_config[:joins] if default_config.key?(:joins)
@relevance_score_components << build_score_element_from(
# Under NO CIRCUMSTANCES should you trust the caller to
# provide a valid :clause. Don't trust them to send a
# valid clause. That's the path of SQL injection.
clause: default_config.fetch(:clause),
# We can trust the :cases and :fallback a bit more, as we
# later cast them to integers and floats.
cases: scoring_config.fetch(:cases),
fallback: scoring_config.fetch(:fallback),
)
# Make sure that we consider all of the days for which we're
# establishing cases and for which there is a fallback.
if valid_method_name == :daily_decay_factor
@days_since_published = scoring_config.fetch(:cases).count + 1
end
end
end
def inject_config_ab_test(valid_method_name, scoring_config)
return scoring_config unless valid_method_name == :daily_decay_factor # Only proceed on this one factor
return scoring_config if @strategy == AbExperiment::ORIGINAL_VARIANT # Don't proceed if not testing new strategy
# Rewards comment count with slightly more weight up to 10 comments.
# Testing two case weights beyond what we currently have
scoring_config[:cases] = case @strategy
when "slightly_more_recent_articles"
[[0, 1], [1, 0.98], [2, 0.975],
[3, 0.97], [4, 0.965], [5, 0.96],
[6, 0.955], [7, 0.95], [8, 0.945],
[9, 0.94], [10, 0.935], [11, 0.93],
[12, 0.925], [13, 0.92], [14, 0.915]]
else # much_more_recent_articles
[[0, 1], [1, 0.975], [2, 0.965],
[3, 0.955], [4, 0.945], [5, 0.935],
[6, 0.925], [7, 0.915], [8, 0.905],
[9, 0.895], [10, 0.885], [11, 0.875],
[12, 0.865], [13, 0.855], [14, 0.845]]
end
scoring_config
end
# Responsible for transforming the :clause, :cases, and
# :fallback into a SQL fragment that we can use to multiply with
# the other SQL fragments.
#
# @param clause [String]
# @param cases [Array<Array<#to_i, #to_f>>]
# @param fallback [#to_f]
def build_score_element_from(clause:, cases:, fallback:)
values = []
# I would love to sanitize this, but alas, we must trust this
# clause.
text = "(CASE #{clause}"
cases.each do |value, factor|
text += "\nWHEN ? THEN ?"
values << value.to_i
values << factor.to_f
end
text += "\nELSE ? END)"
values << fallback.to_f
values.unshift(text)
Article.sanitize_sql_array(values)
end
end
end
end