From 29fac1ac18acdc1f0d2c1650d33d2d4a1aab0a0b Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 25 May 2017 15:07:12 -0400 Subject: [PATCH] PERF: improve performance of unread queries Figuring out what unread topics a user has is a very expensive operation over time. Users can easily accumulate 10s of thousands of tracking state rows (1 for every topic they ever visit) When figuring out what a user has that is unread we need to join the tracking state records to the topic table. This can very quickly lead to cases where you need to scan through the entire topic table. This commit optimises it so we always keep track of the "first" date a user has unread topics. Then we can easily filter out all earlier topics from the join. We use pg functions, instead of nested queries here to assist the planner. --- app/controllers/topics_controller.rb | 2 +- .../scheduled/update_first_topic_unread_at.rb | 9 ++++ app/models/topic.rb | 4 ++ app/models/topic_tracking_state.rb | 2 +- app/models/topic_user.rb | 47 +++++++++++------- app/models/user_stat.rb | 24 ++++++++++ ...70524182846_add_unread_tracking_columns.rb | 48 +++++++++++++++++++ lib/post_creator.rb | 19 ++++---- lib/topic_query.rb | 25 ++++++++-- spec/components/post_creator_spec.rb | 12 ++++- spec/models/topic_user_spec.rb | 44 +++++++++++++++-- spec/models/user_stat_spec.rb | 30 +++++++++++- 12 files changed, 227 insertions(+), 39 deletions(-) create mode 100644 app/jobs/scheduled/update_first_topic_unread_at.rb create mode 100644 db/migrate/20170524182846_add_unread_tracking_columns.rb diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb index bc4455de509b44..c5930606039f42 100644 --- a/app/controllers/topics_controller.rb +++ b/app/controllers/topics_controller.rb @@ -605,7 +605,7 @@ def bulk topic_ids = params[:topic_ids].map {|t| t.to_i} elsif params[:filter] == 'unread' tq = TopicQuery.new(current_user) - topics = TopicQuery.unread_filter(tq.joined_topic_user, staff: guardian.is_staff?).listable_topics + topics = TopicQuery.unread_filter(tq.joined_topic_user, current_user.id, staff: guardian.is_staff?).listable_topics topics = topics.where('category_id = ?', params[:category_id]) if params[:category_id] topic_ids = topics.pluck(:id) else diff --git a/app/jobs/scheduled/update_first_topic_unread_at.rb b/app/jobs/scheduled/update_first_topic_unread_at.rb new file mode 100644 index 00000000000000..c5b341563344d8 --- /dev/null +++ b/app/jobs/scheduled/update_first_topic_unread_at.rb @@ -0,0 +1,9 @@ +module Jobs + class UpdateFirstTopicUnreadAt < Jobs::Scheduled + every 1.day + + def execute(args) + UserStat.update_first_topic_unread_at! + end + end +end diff --git a/app/models/topic.rb b/app/models/topic.rb index 7f5b34b39887e5..c3fbbf98937383 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -541,6 +541,10 @@ def self.reset_all_highest! def self.reset_highest(topic_id) result = exec_sql "UPDATE topics SET + last_unread_at = ( + SELECT MAX(created_at) FROM posts + WHERE topic_id = :topic_id + ), highest_staff_post_number = ( SELECT COALESCE(MAX(post_number), 0) FROM posts WHERE topic_id = :topic_id AND diff --git a/app/models/topic_tracking_state.rb b/app/models/topic_tracking_state.rb index b8a30b830d76f0..ff30630353bb69 100644 --- a/app/models/topic_tracking_state.rb +++ b/app/models/topic_tracking_state.rb @@ -186,7 +186,7 @@ def self.report_raw_sql(opts=nil) if opts && opts[:skip_unread] "1=0" else - TopicQuery.unread_filter(Topic, staff: opts && opts[:staff]).where_values.join(" AND ") + TopicQuery.unread_filter(Topic, -999, staff: opts && opts[:staff]).where_values.join(" AND ").sub("-999", ":user_id") end new = diff --git a/app/models/topic_user.rb b/app/models/topic_user.rb index 7dd30be2df28b8..3272fa8f1897ea 100644 --- a/app/models/topic_user.rb +++ b/app/models/topic_user.rb @@ -46,11 +46,6 @@ def auto_notification(user_id, topic_id, reason, notification_level) notification_level: notification_level, notifications_reason_id: reason ) - - MessageBus.publish("/topic/#{topic_id}", { - notification_level_change: notification_level, - notifications_reason_id: reason - }, user_ids: [user_id]) end end @@ -139,23 +134,39 @@ def change(user_id, topic_id, attrs) end if attrs[:notification_level] - MessageBus.publish( - "/topic/#{topic_id}", - { notification_level_change: attrs[:notification_level] }, - user_ids: [user_id] - ) - - DiscourseEvent.trigger(:topic_notification_level_changed, - attrs[:notification_level], - user_id, - topic_id - ) + notification_level_change(user_id, topic_id, attrs[:notification_level], attrs[:notifications_reason_id]) end rescue ActiveRecord::RecordNotUnique # In case of a race condition to insert, do nothing end + def notification_level_change(user_id, topic_id, notification_level, reason_id) + message = { notification_level_change: notification_level } + message[:notifications_reason_id] = reason_id if reason_id + MessageBus.publish("/topic/#{topic_id}", message, user_ids: [user_id]) + + if notification_level && notification_level >= notification_levels[:tracking] + sql = < 1 AND last_read_post_number < CASE WHEN moderator OR admin + THEN t.highest_staff_post_number + ELSE t.highest_post_number + END + AND t.deleted_at IS NULL AND t.archetype <> 'private_message' + GROUP BY u.id + ) X + WHERE X.user_id = us.user_id AND X.first_unread_at <> first_topic_unread_at +SQL + + nil + end + protected def trigger_badges diff --git a/db/migrate/20170524182846_add_unread_tracking_columns.rb b/db/migrate/20170524182846_add_unread_tracking_columns.rb new file mode 100644 index 00000000000000..bc05dd0af2bd30 --- /dev/null +++ b/db/migrate/20170524182846_add_unread_tracking_columns.rb @@ -0,0 +1,48 @@ +class AddUnreadTrackingColumns < ActiveRecord::Migration + def up + add_column :user_stats, :first_topic_unread_at, :datetime, null: false, default: "epoch" + add_column :topics, :last_unread_at, :datetime, null: false, default: "epoch" + + execute < 1 AND last_read_post_number < CASE WHEN moderator OR admin + THEN topics.highest_staff_post_number + ELSE topics.highest_post_number + END + AND topics.deleted_at IS NULL AND topics.archetype <> 'private_message' + ), 'epoch') +SQL + + add_index :topics, [:last_unread_at] + + # we need this function for performance reasons + execute <= :tracking", tracking: TopicUser.notification_levels[:tracking]) end - def self.unread_filter(list, opts) + def self.unread_filter(list, user_id, opts) + # PERF note + # We use the function first_unread_topic_for here instead of joining + # the table to assist the PostgreSQL query planner + # + # We want the query planner to have the actual value of the first_unread_topic so + # it can pick an appropriate plan. If it does not have this upfront it will just assume + # that the value will be 1/3 of the way through the topic table which makes it use terrible + # indexes for the plan. + # col_name = opts[:staff] ? "highest_staff_post_number" : "highest_post_number" - list.where("tu.last_read_post_number < topics.#{col_name}") + list + .where("tu.last_read_post_number < topics.#{col_name}") + .where("topics.last_unread_at >= first_unread_topic_for(?)", user_id) .where("COALESCE(tu.notification_level, :regular) >= :tracking", regular: TopicUser.notification_levels[:regular], tracking: TopicUser.notification_levels[:tracking]) end @@ -361,7 +372,10 @@ def latest_results(options={}) end def unread_results(options={}) - result = TopicQuery.unread_filter(default_results(options.reverse_merge(:unordered => true)), staff: @user.try(:staff?)) + result = TopicQuery.unread_filter( + default_results(options.reverse_merge(:unordered => true)), + @user&.id, + staff: @user&.staff?) .order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END') self.class.results_filter_callbacks.each do |filter_callback| @@ -724,7 +738,10 @@ def new_messages(params) end def unread_messages(params) - TopicQuery.unread_filter(messages_for_groups_or_user(params[:my_group_ids]), staff: @user.try(:staff?)) + TopicQuery.unread_filter( + messages_for_groups_or_user(params[:my_group_ids]), + @user&.id, + staff: @user&.staff?) .limit(params[:count]) end diff --git a/spec/components/post_creator_spec.rb b/spec/components/post_creator_spec.rb index ee20223391b775..45828924b8a728 100644 --- a/spec/components/post_creator_spec.rb +++ b/spec/components/post_creator_spec.rb @@ -395,6 +395,9 @@ expect(whisper_reply).to be_present expect(whisper_reply.post_type).to eq(Post.types[:whisper]) + # date is not precise enough in db + whisper_reply.reload + first.reload # does not leak into the OP @@ -408,7 +411,13 @@ expect(topic.posts_count).to eq(1) expect(topic.highest_staff_post_number).to eq(3) - topic.update_columns(highest_staff_post_number:0, highest_post_number:0, posts_count: 0, last_posted_at: 1.year.ago) + expect(topic.last_unread_at).to eq(whisper_reply.created_at) + + topic.update_columns(highest_staff_post_number:0, + highest_post_number:0, + posts_count: 0, + last_unread_at: 1.year.ago, + last_posted_at: 1.year.ago) Topic.reset_highest(topic.id) @@ -417,6 +426,7 @@ expect(topic.posts_count).to eq(1) expect(topic.last_posted_at).to eq(first.created_at) expect(topic.highest_staff_post_number).to eq(3) + expect(topic.last_unread_at).to eq(whisper_reply.created_at) end end diff --git a/spec/models/topic_user_spec.rb b/spec/models/topic_user_spec.rb index 56e7472ebe7ee1..97e41cff75e866 100644 --- a/spec/models/topic_user_spec.rb +++ b/spec/models/topic_user_spec.rb @@ -1,6 +1,17 @@ require 'rails_helper' describe TopicUser do + let :watching do + TopicUser.notification_levels[:watching] + end + + let :regular do + TopicUser.notification_levels[:regular] + end + + let :tracking do + TopicUser.notification_levels[:tracking] + end describe "#unwatch_categories!" do it "correctly unwatches categories" do @@ -10,9 +21,6 @@ tracked_topic = Fabricate(:topic) user = op_topic.user - watching = TopicUser.notification_levels[:watching] - regular = TopicUser.notification_levels[:regular] - tracking = TopicUser.notification_levels[:tracking] TopicUser.change(user.id, op_topic, notification_level: watching) TopicUser.change(user.id, another_topic, notification_level: watching) @@ -30,6 +38,36 @@ end + describe "first unread" do + it "correctly update first unread as needed" do + time1 = 3.days.ago + time2 = 2.days.ago + time3 = 1.days.ago + + topic1 = Fabricate(:topic, last_unread_at: time1) + topic2 = Fabricate(:topic, last_unread_at: time2) + topic3 = Fabricate(:topic, last_unread_at: time3) + + user = Fabricate(:user) + user.user_stat.update_columns(first_topic_unread_at: Time.zone.now) + + TopicUser.change(user.id, topic3, notification_level: tracking) + + user.user_stat.reload + expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time3) + + TopicUser.change(user.id, topic2, notification_level: watching) + + user.user_stat.reload + expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2) + + TopicUser.change(user.id, topic1, notification_level: regular) + + user.user_stat.reload + expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2) + end + end + describe '#notification_levels' do context "verify enum sequence" do before do diff --git a/spec/models/user_stat_spec.rb b/spec/models/user_stat_spec.rb index e977d3b08a343f..869682850184c5 100644 --- a/spec/models/user_stat_spec.rb +++ b/spec/models/user_stat_spec.rb @@ -2,8 +2,6 @@ describe UserStat do - it { is_expected.to belong_to :user } - it "is created automatically when a user is created" do user = Fabricate(:evil_trout) expect(user.user_stat).to be_present @@ -12,6 +10,34 @@ expect(user.user_stat.new_since).to be_present end + context "#update_first_topic_unread_at" do + it "updates date correctly for staff" do + now = Time.zone.now + + admin = Fabricate(:admin) + topic = Fabricate(:topic, + highest_staff_post_number: 7, + highest_post_number: 1, + last_unread_at: now + ) + + UserStat.update_first_topic_unread_at! + + admin.reload + + expect(admin.user_stat.first_topic_unread_at).to_not be_within(5.years).of(now) + + TopicUser.change(admin.id, topic.id, last_read_post_number: 1, + notification_level: NotificationLevels.all[:tracking]) + + UserStat.update_first_topic_unread_at! + + admin.reload + + expect(admin.user_stat.first_topic_unread_at).to be_within(1.second).of(now) + end + end + context '#update_view_counts' do let(:user) { Fabricate(:user) }