From bc46c60ec0aefe46c351d5d98dcfe0d3ebcfc02f Mon Sep 17 00:00:00 2001 From: fatkodima Date: Tue, 7 May 2024 15:02:55 +0300 Subject: [PATCH] Improve retrying of failed sharded background migrations --- CHANGELOG.md | 1 + docs/background_data_migrations.md | 11 ++++++++ docs/background_schema_migrations.md | 11 ++++++++ .../background_migrations/migration.rb | 26 +++++++++++------ .../background_migrations/migration_job.rb | 28 ++++++++++++------- .../background_schema_migrations/migration.rb | 11 ++++++-- .../migration_job_test.rb | 4 ++- test/background_migrations/migration_test.rb | 27 ++++++++++++++++-- .../migration_test.rb | 23 ++++++++++++++- 9 files changed, 116 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07d31b3..7fbe3ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## master (unreleased) +- Improve retrying of failed sharded background migrations - Fix a bug when retried background data migration can not start - Do not run multiple background schema migrations on the same table at the same time diff --git a/docs/background_data_migrations.md b/docs/background_data_migrations.md index d04266e..947a77a 100644 --- a/docs/background_data_migrations.md +++ b/docs/background_data_migrations.md @@ -247,6 +247,17 @@ migration.progress # value from 0 to 100.0 **Note**: It will be easier to work with background migrations through some kind of Web UI, but until it is implemented, we can work with them only manually. +## Retrying a failed migration + +To retry a failed migration, run: + +```ruby +migration = OnlineMigrations::BackgroundMigrations::Migration.find(id) +migration.retry # => `true` if scheduled to be retried, `false` - if not +``` + +The migration will be retried on the next Scheduler run. + ## Configuring There are a few configurable options for the Background Migrations. Custom configurations should be placed in a `online_migrations.rb` initializer. diff --git a/docs/background_schema_migrations.md b/docs/background_schema_migrations.md index 84f29fc..11759e8 100644 --- a/docs/background_schema_migrations.md +++ b/docs/background_schema_migrations.md @@ -66,6 +66,17 @@ end You shouldn't depend on the schema until the background schema migration is finished. If having the schema migrated is a requirement, then the `ensure_background_schema_migration_succeeded` helper can be used to guarantee that the migration succeeded and the schema change applied. +## Retrying a failed migration + +To retry a failed migration, run: + +```ruby +migration = OnlineMigrations::BackgroundSchemaMigrations::Migration.find(id) +migration.retry # => `true` if scheduled to be retried, `false` - if not +``` + +The migration will be retried on the next Scheduler run. + ## Instrumentation Background schema migrations use the [ActiveSupport::Notifications](http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) API. diff --git a/lib/online_migrations/background_migrations/migration.rb b/lib/online_migrations/background_migrations/migration.rb index 1db97e3..ae09247 100644 --- a/lib/online_migrations/background_migrations/migration.rb +++ b/lib/online_migrations/background_migrations/migration.rb @@ -38,9 +38,9 @@ class Migration < ApplicationRecord enum status: STATUSES.index_with(&:to_s) end - belongs_to :parent, class_name: name, optional: true - has_many :children, class_name: name, foreign_key: :parent_id, dependent: :delete_all - has_many :migration_jobs, dependent: :delete_all + belongs_to :parent, class_name: name, optional: true, inverse_of: :children + has_many :children, class_name: name, foreign_key: :parent_id, dependent: :delete_all, inverse_of: :parent + has_many :migration_jobs, dependent: :delete_all, inverse_of: :migration validates :migration_name, :batch_column_name, presence: true @@ -156,20 +156,28 @@ def interval_elapsed? last_job.enqueued? || (last_job.updated_at + batch_pause <= Time.current) end - # Manually retry failed jobs. + # Mark this migration as ready to be processed again. # # This method marks failed jobs as ready to be processed again, and # they will be picked up on the next Scheduler run. # - def retry_failed_jobs - iterator = BatchIterator.new(migration_jobs.failed) - iterator.each_batch(of: 100) do |batch| - transaction do + def retry + if composite? && failed? + children.failed.each(&:retry) + enqueued! + true + elsif failed? + iterator = BatchIterator.new(migration_jobs.failed) + iterator.each_batch(of: 100) do |batch| batch.each(&:retry) - enqueued! end + enqueued! + true + else + false end end + alias retry_failed_jobs retry # Returns the time this migration started running. def started_at diff --git a/lib/online_migrations/background_migrations/migration_job.rb b/lib/online_migrations/background_migrations/migration_job.rb index e9ffa18..0d291a8 100644 --- a/lib/online_migrations/background_migrations/migration_job.rb +++ b/lib/online_migrations/background_migrations/migration_job.rb @@ -47,7 +47,7 @@ class MigrationJob < ApplicationRecord delegate :migration_name, :migration_class, :migration_object, :migration_relation, :batch_column_name, :arguments, :batch_pause, to: :migration - belongs_to :migration + belongs_to :migration, inverse_of: :migration_jobs validates :min_value, :max_value, presence: true, numericality: { greater_than: 0 } validate :values_in_migration_range, if: :min_value? @@ -69,15 +69,23 @@ def stuck? # This is used when retrying failed jobs. # def retry - update!( - status: self.class.statuses[:enqueued], - attempts: 0, - started_at: nil, - finished_at: nil, - error_class: nil, - error_message: nil, - backtrace: nil - ) + if failed? + transaction do + update!( + status: self.class.statuses[:enqueued], + attempts: 0, + started_at: nil, + finished_at: nil, + error_class: nil, + error_message: nil, + backtrace: nil + ) + migration.enqueued! if migration.failed? + end + true + else + false + end end private diff --git a/lib/online_migrations/background_schema_migrations/migration.rb b/lib/online_migrations/background_schema_migrations/migration.rb index 50543e8..21dfea6 100644 --- a/lib/online_migrations/background_schema_migrations/migration.rb +++ b/lib/online_migrations/background_schema_migrations/migration.rb @@ -55,8 +55,8 @@ class Migration < ApplicationRecord enum status: STATUSES.index_with(&:to_s) end - belongs_to :parent, class_name: name, optional: true - has_many :children, class_name: name, foreign_key: :parent_id + belongs_to :parent, class_name: name, optional: true, inverse_of: :children + has_many :children, class_name: name, foreign_key: :parent_id, inverse_of: :parent validates :table_name, presence: true, length: { maximum: MAX_IDENTIFIER_LENGTH } validates :definition, presence: true @@ -109,8 +109,10 @@ def progress # This is used to manually retrying failed migrations. # def retry - if composite? + if composite? && failed? children.failed.each(&:retry) + enqueued! + true elsif failed? update!( status: self.class.statuses[:enqueued], @@ -121,6 +123,9 @@ def retry error_message: nil, backtrace: nil ) + true + else + false end end diff --git a/test/background_migrations/migration_job_test.rb b/test/background_migrations/migration_job_test.rb index 1981b45..51b3b40 100644 --- a/test/background_migrations/migration_job_test.rb +++ b/test/background_migrations/migration_job_test.rb @@ -65,7 +65,9 @@ def test_retry assert_equal "Boom!", error.message j = m.last_job - j.retry + assert j.failed? + assert j.retry + assert m.running? # not updated to `enqueued`, because it was not `failed` assert j.enqueued? assert_equal 0, j.attempts assert_nil j.started_at diff --git a/test/background_migrations/migration_test.rb b/test/background_migrations/migration_test.rb index 01d26a4..40e7cec 100644 --- a/test/background_migrations/migration_test.rb +++ b/test/background_migrations/migration_test.rb @@ -221,20 +221,43 @@ def test_interval_elapsed_p end end - def test_retry_failed_jobs + def test_retry 2.times { User.create! } m = create_migration(batch_size: 1, sub_batch_size: 1) 3.times { run_migration_job(m) } assert m.succeeded? + assert_equal false, m.retry m.update_column(:status, "failed") m.migration_jobs.update_all(status: "failed") - m.retry_failed_jobs + assert m.retry assert m.migration_jobs.all?(&:enqueued?) assert m.enqueued? end + def test_retry_composite + m = create_migration(migration_name: "MakeAllDogsNice") + child1, child2, child3 = m.children.to_a + run_all_migration_jobs(m) + + migrations = [m, child1, child2, child3] + assert migrations.all?(&:succeeded?) + assert_equal false, m.retry + + m.update_column(:status, "failed") + child1.update_column(:status, "failed") + child1.migration_jobs.update_all(status: "failed") + + assert m.retry + + migrations.each(&:reload) + assert m.enqueued? + assert child1.enqueued? + assert child3.succeeded? + assert child1.migration_jobs.all?(&:enqueued?) + end + def test_started_at m = create_migration assert_equal m.created_at, m.started_at diff --git a/test/background_schema_migrations/migration_test.rb b/test/background_schema_migrations/migration_test.rb index 59ef66d..98227ec 100644 --- a/test/background_schema_migrations/migration_test.rb +++ b/test/background_schema_migrations/migration_test.rb @@ -159,7 +159,7 @@ def test_retry end assert m.failed? - m.retry + assert m.retry m.reload assert m.enqueued? @@ -171,6 +171,27 @@ def test_retry assert_nil m.backtrace end + def test_retry_composite + m = create_sharded_migration + run_migration(m) + assert m.succeeded? + + child1, child2, child3 = m.children.to_a + migrations = [m, child1, child2, child3] + assert migrations.all?(&:succeeded?) + assert_equal false, m.retry + + m.update_column(:status, "failed") + child1.update_column(:status, "failed") + + assert m.retry + + migrations.each(&:reload) + assert m.enqueued? + assert child1.enqueued? + assert child3.succeeded? + end + private def create_migration( name: "index_users_on_email",