Skip to content

Commit

Permalink
PERF: remove avg_time calculations and regular jobs from posts and to…
Browse files Browse the repository at this point in the history
…pics

After careful analysis of large data-sets it became apparent that avg_time
had no impact whatsoever on "best of" topic scoring. Calculating avg_time
was a very costly operation especially on large databases.

We have some longer term plans of introducing other weighting that is read
time based into our scoring for "best of" and "top" topics, but in the
interim to stop a large amount of work that is not achieving any value we
are removing the jobs.

Column removal will follow once we decide on a new replacement metric.
  • Loading branch information
SamSaffron committed May 6, 2019
1 parent ba3cf71 commit f8eddd4
Show file tree
Hide file tree
Showing 12 changed files with 0 additions and 198 deletions.
14 changes: 0 additions & 14 deletions app/jobs/scheduled/calculate_avg_time.rb

This file was deleted.

2 changes: 0 additions & 2 deletions app/jobs/scheduled/weekly.rb
Expand Up @@ -8,8 +8,6 @@ class Weekly < Jobs::Scheduled
every 1.week

def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
MiniScheduler::Stat.purge_old
Draft.cleanup!
Expand Down
31 changes: 0 additions & 31 deletions app/models/post.rb
Expand Up @@ -674,37 +674,6 @@ def self.estimate_posts_per_day

end

# This calculates the geometric mean of the post timings and stores it along with
# each post.
def self.calculate_avg_time(min_topic_age = nil)
retry_lock_error do
builder = DB.build("UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean
FROM post_timings
INNER JOIN posts AS p2
ON p2.post_number = post_timings.post_number
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
/*where2*/
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
/*where*/")

builder.where("x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")

if min_topic_age
builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
bumped_at: min_topic_age)
end

builder.exec
end
end

before_save do
self.last_editor_id ||= user_id

Expand Down
25 changes: 0 additions & 25 deletions app/models/topic.rb
Expand Up @@ -671,31 +671,6 @@ def self.reset_highest(topic_id)
SQL
end

# This calculates the geometric mean of the posts and stores it with the topic
def self.calculate_avg_time(min_topic_age = nil)
builder = DB.build <<~SQL
UPDATE topics
SET avg_time = x.gmean
FROM (SELECT topic_id,
round(exp(avg(ln(avg_time)))) AS gmean
FROM posts
WHERE avg_time > 0 AND avg_time IS NOT NULL
GROUP BY topic_id) AS x
/*where*/
SQL

builder.where <<~SQL
x.topic_id = topics.id AND
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)
SQL

if min_topic_age
builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age)
end

builder.exec
end

def changed_to_category(new_category)
return true if new_category.blank? || Category.exists?(topic_id: id)
return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics
Expand Down
1 change: 0 additions & 1 deletion app/serializers/post_serializer.rb
Expand Up @@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer
:reply_count,
:reply_to_post_number,
:quote_count,
:avg_time,
:incoming_link_count,
:reads,
:score,
Expand Down
1 change: 0 additions & 1 deletion lib/score_calculator.rb
Expand Up @@ -6,7 +6,6 @@ def self.default_score_weights
like_score: 15,
incoming_link_count: 5,
bookmark_count: 2,
avg_time: 0.05,
reads: 0.2
}
end
Expand Down
8 changes: 0 additions & 8 deletions spec/models/post_spec.rb
Expand Up @@ -1023,14 +1023,6 @@ def post_with_body(body, user = nil)
end
end

describe "calculate_avg_time" do

it "should not crash" do
Post.calculate_avg_time
Post.calculate_avg_time(1.day.ago)
end
end

describe "has_host_spam" do
let(:raw) { "hello from my site http://www.example.net http://#{GlobalSetting.hostname} http://#{RailsMultisite::ConnectionManagement.current_hostname}" }

Expand Down
63 changes: 0 additions & 63 deletions spec/models/post_timing_spec.rb
Expand Up @@ -162,69 +162,6 @@ def topic_user(user_id, last_read_post_number, highest_seen_post_number)

end

describe 'avg times' do

describe 'posts' do
it 'has no avg_time by default' do
expect(@post.avg_time).to be_blank
end

it "doesn't change when we calculate the avg time for the post because there's no timings" do
Post.calculate_avg_time
@post.reload
expect(@post.avg_time).to be_blank
end
end

describe 'topics' do
it 'has no avg_time by default' do
expect(@topic.avg_time).to be_blank
end

it "doesn't change when we calculate the avg time for the post because there's no timings" do
Topic.calculate_avg_time
@topic.reload
expect(@topic.avg_time).to be_blank
end
end

describe "it doesn't create an avg time for the same user" do
it 'something' do
PostTiming.record_timing(@timing_attrs.merge(user_id: @post.user_id))
Post.calculate_avg_time
@post.reload
expect(@post.avg_time).to be_blank
end

end

describe 'with a timing for another user' do
before do
PostTiming.record_timing(@timing_attrs)
Post.calculate_avg_time
@post.reload
end

it 'has a post avg_time from the timing' do
expect(@post.avg_time).to be_present
end

describe 'forum topics' do
before do
Topic.calculate_avg_time
@topic.reload
end

it 'has an avg_time from the timing' do
expect(@topic.avg_time).to be_present
end

end

end

end

end

end
7 changes: 0 additions & 7 deletions spec/models/topic_spec.rb
Expand Up @@ -2030,13 +2030,6 @@ def expect_the_right_notification_to_be_created(inviter, invitee)

end

describe "calculate_avg_time" do
it "does not explode" do
Topic.calculate_avg_time
Topic.calculate_avg_time(1.day.ago)
end
end

describe "expandable_first_post?" do

let(:topic) { Fabricate.build(:topic) }
Expand Down
3 changes: 0 additions & 3 deletions test/javascripts/fixtures/post.js.es6
Expand Up @@ -15,7 +15,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 25,
incoming_link_count: 314,
reads: 475,
score: 1702.25,
Expand Down Expand Up @@ -68,7 +67,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
Expand Down Expand Up @@ -118,7 +116,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
Expand Down
5 changes: 0 additions & 5 deletions test/javascripts/fixtures/search-fixtures.js.es6
Expand Up @@ -838,7 +838,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 24,
incoming_link_count: 4422,
reads: 327,
score: 21978.4,
Expand Down Expand Up @@ -928,7 +927,6 @@ export default {
reply_count: 2,
reply_to_post_number: null,
quote_count: 0,
avg_time: 36,
incoming_link_count: 4680,
reads: 491,
score: 23815.8,
Expand Down Expand Up @@ -1019,7 +1017,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 36,
incoming_link_count: 1483,
reads: 274,
score: 7985.4,
Expand Down Expand Up @@ -1110,7 +1107,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 39,
incoming_link_count: 1241,
reads: 149,
score: 6161.35,
Expand Down Expand Up @@ -1200,7 +1196,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 22,
incoming_link_count: 386,
reads: 163,
score: 1953.7,
Expand Down

1 comment on commit f8eddd4

@discoursebot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/post-calculate-avg-time-taking-up-a-long-time/49750/29

Please sign in to comment.