Skip to content

Commit

Permalink
PERF: improve performance of unread queries
Browse files Browse the repository at this point in the history
Figuring out what unread topics a user has is a very expensive
operation over time.

Users can easily accumulate 10s of thousands of tracking state rows
(1 for every topic they ever visit)

When figuring out what a user has that is unread we need to join
the tracking state records to the topic table. This can very quickly
lead to cases where you need to scan through the entire topic table.

This commit optimises it so we always keep track of the "first" date
a user has unread topics. Then we can easily filter out all earlier
topics from the join.

We use pg functions, instead of nested queries here to assist the
planner.
  • Loading branch information
SamSaffron committed May 25, 2017
1 parent 6eb6c25 commit 29fac1a
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 39 deletions.
2 changes: 1 addition & 1 deletion app/controllers/topics_controller.rb
Expand Up @@ -605,7 +605,7 @@ def bulk
topic_ids = params[:topic_ids].map {|t| t.to_i}
elsif params[:filter] == 'unread'
tq = TopicQuery.new(current_user)
topics = TopicQuery.unread_filter(tq.joined_topic_user, staff: guardian.is_staff?).listable_topics
topics = TopicQuery.unread_filter(tq.joined_topic_user, current_user.id, staff: guardian.is_staff?).listable_topics
topics = topics.where('category_id = ?', params[:category_id]) if params[:category_id]
topic_ids = topics.pluck(:id)
else
Expand Down
9 changes: 9 additions & 0 deletions app/jobs/scheduled/update_first_topic_unread_at.rb
@@ -0,0 +1,9 @@
module Jobs
class UpdateFirstTopicUnreadAt < Jobs::Scheduled
every 1.day

def execute(args)
UserStat.update_first_topic_unread_at!
end
end
end
4 changes: 4 additions & 0 deletions app/models/topic.rb
Expand Up @@ -541,6 +541,10 @@ def self.reset_all_highest!
def self.reset_highest(topic_id)
result = exec_sql "UPDATE topics
SET
last_unread_at = (
SELECT MAX(created_at) FROM posts
WHERE topic_id = :topic_id
),
highest_staff_post_number = (
SELECT COALESCE(MAX(post_number), 0) FROM posts
WHERE topic_id = :topic_id AND
Expand Down
2 changes: 1 addition & 1 deletion app/models/topic_tracking_state.rb
Expand Up @@ -186,7 +186,7 @@ def self.report_raw_sql(opts=nil)
if opts && opts[:skip_unread]
"1=0"
else
TopicQuery.unread_filter(Topic, staff: opts && opts[:staff]).where_values.join(" AND ")
TopicQuery.unread_filter(Topic, -999, staff: opts && opts[:staff]).where_values.join(" AND ").sub("-999", ":user_id")

This comment has been minimized.

Copy link
@eviltrout

eviltrout May 25, 2017

Contributor

This is a bit iffy. Is there no better way to do it? At the very least a constant would make this a lot more readable. USER_ID_PLACEHOLDER?

end

new =
Expand Down
47 changes: 29 additions & 18 deletions app/models/topic_user.rb
Expand Up @@ -46,11 +46,6 @@ def auto_notification(user_id, topic_id, reason, notification_level)
notification_level: notification_level,
notifications_reason_id: reason
)

MessageBus.publish("/topic/#{topic_id}", {
notification_level_change: notification_level,
notifications_reason_id: reason
}, user_ids: [user_id])
end
end

Expand Down Expand Up @@ -139,23 +134,39 @@ def change(user_id, topic_id, attrs)
end

if attrs[:notification_level]
MessageBus.publish(
"/topic/#{topic_id}",
{ notification_level_change: attrs[:notification_level] },
user_ids: [user_id]
)

DiscourseEvent.trigger(:topic_notification_level_changed,
attrs[:notification_level],
user_id,
topic_id
)
notification_level_change(user_id, topic_id, attrs[:notification_level], attrs[:notifications_reason_id])
end

rescue ActiveRecord::RecordNotUnique
# In case of a race condition to insert, do nothing
end

def notification_level_change(user_id, topic_id, notification_level, reason_id)
message = { notification_level_change: notification_level }
message[:notifications_reason_id] = reason_id if reason_id
MessageBus.publish("/topic/#{topic_id}", message, user_ids: [user_id])

if notification_level && notification_level >= notification_levels[:tracking]
sql = <<SQL
UPDATE user_stats us
SET first_topic_unread_at = t.last_unread_at
FROM topics t
WHERE t.id = :topic_id AND
t.last_unread_at < us.first_topic_unread_at AND
us.user_id = :user_id
SQL

This comment has been minimized.

Copy link
@tgxworld

tgxworld May 26, 2017

Contributor

Hmm shouldn't this be indented?

exec_sql(sql, topic_id: topic_id, user_id: user_id)
end


DiscourseEvent.trigger(:topic_notification_level_changed,
notification_level,
user_id,
topic_id
)

end

def create_missing_record(user_id, topic_id, attrs)
now = DateTime.now

Expand Down Expand Up @@ -297,7 +308,7 @@ def update_last_read(user, topic_id, post_number, msecs, opts={})
end

if before != after
MessageBus.publish("/topic/#{topic_id}", { notification_level_change: after }, user_ids: [user.id])
notification_level_change(user.id, topic_id, after, nil)
end
end

Expand Down Expand Up @@ -328,7 +339,7 @@ def update_last_read(user, topic_id, post_number, msecs, opts={})
end
end

MessageBus.publish("/topic/#{topic_id}", { notification_level_change: args[:new_status] }, user_ids: [user.id])
notification_level_change(user.id, topic_id, args[:new_status], nil)
end
end

Expand Down
24 changes: 24 additions & 0 deletions app/models/user_stat.rb
Expand Up @@ -81,6 +81,30 @@ def reset_bounce_score!
update_columns(reset_bounce_score_after: nil, bounce_score: 0)
end

def self.update_first_topic_unread_at!

exec_sql <<SQL
UPDATE user_stats us
SET first_topic_unread_at = COALESCE(X.first_unread_at, 'epoch')
FROM
(
SELECT u.id user_id, MIN(last_unread_at) first_unread_at
FROM users u
JOIN topic_users tu ON tu.user_id = u.id
JOIN topics t ON t.id = tu.topic_id
WHERE notification_level > 1 AND last_read_post_number < CASE WHEN moderator OR admin
THEN t.highest_staff_post_number
ELSE t.highest_post_number
END
AND t.deleted_at IS NULL AND t.archetype <> 'private_message'
GROUP BY u.id
) X
WHERE X.user_id = us.user_id AND X.first_unread_at <> first_topic_unread_at
SQL

nil
end

protected

def trigger_badges
Expand Down
48 changes: 48 additions & 0 deletions db/migrate/20170524182846_add_unread_tracking_columns.rb
@@ -0,0 +1,48 @@
class AddUnreadTrackingColumns < ActiveRecord::Migration
def up
add_column :user_stats, :first_topic_unread_at, :datetime, null: false, default: "epoch"
add_column :topics, :last_unread_at, :datetime, null: false, default: "epoch"

execute <<SQL
UPDATE topics SET last_unread_at = (
SELECT MAX(created_at)
FROM posts
WHERE topics.id = posts.topic_id
)
SQL

execute <<SQL
UPDATE user_stats SET first_topic_unread_at = COALESCE((
SELECT MIN(last_unread_at) FROM topics
JOIN users u ON u.id = user_stats.user_id
JOIN topic_users tu ON tu.user_id = user_stats.user_id AND topics.id = tu.topic_id
WHERE notification_level > 1 AND last_read_post_number < CASE WHEN moderator OR admin
THEN topics.highest_staff_post_number
ELSE topics.highest_post_number
END
AND topics.deleted_at IS NULL AND topics.archetype <> 'private_message'
), 'epoch')
SQL

add_index :topics, [:last_unread_at]

# we need this function for performance reasons
execute <<SQL
CREATE FUNCTION first_unread_topic_for(user_id int)
RETURNS timestamp AS
$$
SELECT COALESCE(first_topic_unread_at, 'epoch'::timestamp)
FROM users u
JOIN user_stats ON user_id = u.id
WHERE u.id = $1
$$
LANGUAGE SQL STABLE
SQL
end

def down
execute "DROP FUNCTION first_unread_topic_for(int)"
remove_column :user_stats, :first_topic_unread_at
remove_column :topics, :last_unread_at
end
end
19 changes: 10 additions & 9 deletions lib/post_creator.rb
Expand Up @@ -377,15 +377,16 @@ def create_topic
end

def update_topic_stats
return if @post.post_type == Post.types[:whisper]

attrs = {
last_posted_at: @post.created_at,
last_post_user_id: @post.user_id,
word_count: (@topic.word_count || 0) + @post.word_count,
}
attrs[:excerpt] = @post.excerpt(220, strip_links: true) if new_topic?
attrs[:bumped_at] = @post.created_at unless @post.no_bump
attrs = {last_unread_at: @post.created_at}

if @post.post_type != Post.types[:whisper]
attrs[:last_posted_at] = @post.created_at
attrs[:last_post_user_id] = @post.user_id
attrs[:word_count] = (@topic.word_count || 0) + @post.word_count
attrs[:excerpt] = @post.excerpt(220, strip_links: true) if new_topic?
attrs[:bumped_at] = @post.created_at unless @post.no_bump
end

@topic.update_attributes(attrs)
end

Expand Down
25 changes: 21 additions & 4 deletions lib/topic_query.rb
Expand Up @@ -280,10 +280,21 @@ def self.new_filter(list, treat_as_new_topic_start_date)
.where("COALESCE(tu.notification_level, :tracking) >= :tracking", tracking: TopicUser.notification_levels[:tracking])
end

def self.unread_filter(list, opts)
def self.unread_filter(list, user_id, opts)
# PERF note
# We use the function first_unread_topic_for here instead of joining
# the table to assist the PostgreSQL query planner
#
# We want the query planner to have the actual value of the first_unread_topic so
# it can pick an appropriate plan. If it does not have this upfront it will just assume
# that the value will be 1/3 of the way through the topic table which makes it use terrible
# indexes for the plan.
#
col_name = opts[:staff] ? "highest_staff_post_number" : "highest_post_number"

list.where("tu.last_read_post_number < topics.#{col_name}")
list
.where("tu.last_read_post_number < topics.#{col_name}")
.where("topics.last_unread_at >= first_unread_topic_for(?)", user_id)
.where("COALESCE(tu.notification_level, :regular) >= :tracking",
regular: TopicUser.notification_levels[:regular], tracking: TopicUser.notification_levels[:tracking])
end
Expand Down Expand Up @@ -361,7 +372,10 @@ def latest_results(options={})
end

def unread_results(options={})
result = TopicQuery.unread_filter(default_results(options.reverse_merge(:unordered => true)), staff: @user.try(:staff?))
result = TopicQuery.unread_filter(
default_results(options.reverse_merge(:unordered => true)),
@user&.id,
staff: @user&.staff?)
.order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END')

self.class.results_filter_callbacks.each do |filter_callback|
Expand Down Expand Up @@ -724,7 +738,10 @@ def new_messages(params)
end

def unread_messages(params)
TopicQuery.unread_filter(messages_for_groups_or_user(params[:my_group_ids]), staff: @user.try(:staff?))
TopicQuery.unread_filter(
messages_for_groups_or_user(params[:my_group_ids]),
@user&.id,
staff: @user&.staff?)
.limit(params[:count])
end

Expand Down
12 changes: 11 additions & 1 deletion spec/components/post_creator_spec.rb
Expand Up @@ -395,6 +395,9 @@
expect(whisper_reply).to be_present
expect(whisper_reply.post_type).to eq(Post.types[:whisper])

# date is not precise enough in db
whisper_reply.reload


first.reload
# does not leak into the OP
Expand All @@ -408,7 +411,13 @@
expect(topic.posts_count).to eq(1)
expect(topic.highest_staff_post_number).to eq(3)

topic.update_columns(highest_staff_post_number:0, highest_post_number:0, posts_count: 0, last_posted_at: 1.year.ago)
expect(topic.last_unread_at).to eq(whisper_reply.created_at)

topic.update_columns(highest_staff_post_number:0,
highest_post_number:0,
posts_count: 0,
last_unread_at: 1.year.ago,
last_posted_at: 1.year.ago)

Topic.reset_highest(topic.id)

Expand All @@ -417,6 +426,7 @@
expect(topic.posts_count).to eq(1)
expect(topic.last_posted_at).to eq(first.created_at)
expect(topic.highest_staff_post_number).to eq(3)
expect(topic.last_unread_at).to eq(whisper_reply.created_at)
end
end

Expand Down
44 changes: 41 additions & 3 deletions spec/models/topic_user_spec.rb
@@ -1,6 +1,17 @@
require 'rails_helper'

describe TopicUser do
let :watching do
TopicUser.notification_levels[:watching]
end

let :regular do
TopicUser.notification_levels[:regular]
end

let :tracking do
TopicUser.notification_levels[:tracking]
end

describe "#unwatch_categories!" do
it "correctly unwatches categories" do
Expand All @@ -10,9 +21,6 @@
tracked_topic = Fabricate(:topic)

user = op_topic.user
watching = TopicUser.notification_levels[:watching]
regular = TopicUser.notification_levels[:regular]
tracking = TopicUser.notification_levels[:tracking]

TopicUser.change(user.id, op_topic, notification_level: watching)
TopicUser.change(user.id, another_topic, notification_level: watching)
Expand All @@ -30,6 +38,36 @@

end

describe "first unread" do
it "correctly update first unread as needed" do
time1 = 3.days.ago
time2 = 2.days.ago
time3 = 1.days.ago

topic1 = Fabricate(:topic, last_unread_at: time1)
topic2 = Fabricate(:topic, last_unread_at: time2)
topic3 = Fabricate(:topic, last_unread_at: time3)

user = Fabricate(:user)
user.user_stat.update_columns(first_topic_unread_at: Time.zone.now)

TopicUser.change(user.id, topic3, notification_level: tracking)

user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time3)

TopicUser.change(user.id, topic2, notification_level: watching)

user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2)

TopicUser.change(user.id, topic1, notification_level: regular)

user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2)
end
end

describe '#notification_levels' do
context "verify enum sequence" do
before do
Expand Down

0 comments on commit 29fac1a

Please sign in to comment.