Skip to content
Permalink
Browse files

REFACTOR: Restoring of backups and migration of uploads to S3

  • Loading branch information
gschlager committed Jan 12, 2020
1 parent f10078e commit e474cda3219d1bfd65ee7386cddece99a9e64374
Showing with 2,453 additions and 1,028 deletions.
  1. +10 −10 .github/workflows/ci.yml
  2. +1 −0 .gitignore
  3. +25 −10 lib/backup_restore.rb
  4. +96 −0 lib/backup_restore/backup_file_handler.rb
  5. +2 −2 lib/backup_restore/backup_store.rb
  6. +182 −0 lib/backup_restore/database_restorer.rb
  7. +34 −0 lib/backup_restore/factory.rb
  8. +37 −0 lib/backup_restore/logger.rb
  9. +60 −0 lib/backup_restore/meta_data_handler.rb
  10. +49 −567 lib/backup_restore/restorer.rb
  11. +102 −0 lib/backup_restore/system_interface.rb
  12. +136 −0 lib/backup_restore/uploads_restorer.rb
  13. +10 −0 lib/file_store/local_store.rb
  14. +26 −0 lib/file_store/s3_store.rb
  15. +346 −0 lib/file_store/to_s3_migration.rb
  16. +4 −0 lib/migration/safe_migrate.rb
  17. +8 −308 lib/tasks/uploads.rake
  18. +3 −2 script/discourse
  19. BIN spec/fixtures/backups/backup_since_v1.6.tar.gz
  20. BIN spec/fixtures/backups/backup_till_v1.5.tar.gz
  21. BIN spec/fixtures/backups/sql_only_backup.sql.gz
  22. +17 −0 spec/fixtures/db/post_migrate/drop_column/20990309014014_drop_post_columns.rb
  23. +2 −0 spec/fixtures/db/post_migrate/drop_table/20990309014013_drop_email_logs_table.rb
  24. +10 −0 spec/fixtures/db/restore/error.sql
  25. +31 −0 spec/fixtures/db/restore/postgresql_10.11.sql
  26. +49 −0 spec/fixtures/db/restore/postgresql_11.6.sql
  27. +49 −0 spec/fixtures/db/restore/postgresql_12.1.sql
  28. +29 −0 spec/fixtures/db/restore/postgresql_9.3.11.sql
  29. +31 −0 spec/fixtures/db/restore/postgresql_9.5.10.sql
  30. +31 −0 spec/fixtures/db/restore/postgresql_9.5.5.sql
  31. +77 −0 spec/lib/backup_restore/backup_file_handler_spec.rb
  32. +188 −0 spec/lib/backup_restore/database_restorer_spec.rb
  33. +81 −0 spec/lib/backup_restore/meta_data_handler_spec.rb
  34. +0 −129 spec/lib/backup_restore/restorer_spec.rb
  35. +9 −0 spec/lib/backup_restore/shared_context_for_backup_restore.rb
  36. +152 −0 spec/lib/backup_restore/system_interface_spec.rb
  37. +566 −0 spec/lib/backup_restore/uploads_restorer_spec.rb
@@ -1,13 +1,13 @@
name: CI

on:
push:
branches:
- master
pull_request:
branches-ignore:
- 'tests-passed'

jobs:
build:
name: "${{ matrix.target }}-${{ matrix.build_types }}"
@@ -38,7 +38,7 @@ jobs:
services:
postgres:
image: postgres:${{ matrix.postgres }}
ports:
ports:
- 5432:5432
env:
POSTGRES_USER: discourse
@@ -88,14 +88,14 @@ jobs:
key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }}
restore-keys: |
${{ runner.os }}-gem-
- name: Setup gems
run: bundle install --without development --deployment --jobs 4 --retry 3

- name: Get yarn cache directory
id: yarn-cache-dir
run: echo "::set-output name=dir::$(yarn cache dir)"

- name: Yarn cache
uses: actions/cache@v1
id: yarn-cache
@@ -113,7 +113,7 @@ jobs:
run: bin/rake plugin:install_all_official

- name: Create database
if: env.BUILD_TYPE != 'LINT'
if: env.BUILD_TYPE != 'LINT'
run: bin/rake db:create && bin/rake db:migrate

- name: Create parallel databases
@@ -123,7 +123,7 @@ jobs:
- name: Rubocop
if: env.BUILD_TYPE == 'LINT'
run: bundle exec rubocop .

- name: ESLint
if: env.BUILD_TYPE == 'LINT'
run: yarn eslint app/assets/javascripts test/javascripts && yarn eslint --ext .es6 app/assets/javascripts test/javascripts plugins
@@ -133,7 +133,7 @@ jobs:
run: |
yarn prettier -v
yarn prettier --list-different "app/assets/stylesheets/**/*.scss" "app/assets/javascripts/**/*.es6" "test/javascripts/**/*.es6" "plugins/**/*.scss" "plugins/**/*.es6"
- name: Core RSpec
if: env.BUILD_TYPE == 'BACKEND' && env.TARGET == 'CORE'
run: bin/turbo_rspec && bin/rake plugin:spec
@@ -146,12 +146,12 @@ jobs:
if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
run: bundle exec rake qunit:test['1200000']
timeout-minutes: 30

- name: Wizard QUnit
if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
run: bundle exec rake qunit:test['1200000','/wizard/qunit']
timeout-minutes: 30

- name: Plugin QUnit # Tests core plugins in TARGET=CORE, and all plugins in TARGET=PLUGINS
if: env.BUILD_TYPE == 'FRONTEND'
run: bundle exec rake plugin:qunit
@@ -32,6 +32,7 @@ config/discourse.conf
# Ignore the default SQLite database and db dumps
*.sql
*.sql.gz
!/spec/fixtures/**/*.sql
/db/*.sqlite3
/db/structure.sql
/db/schema.rb
@@ -4,10 +4,8 @@ module BackupRestore

class OperationRunningError < RuntimeError; end

VERSION_PREFIX = "v".freeze
DUMP_FILE = "dump.sql.gz".freeze
OLD_DUMP_FILE = "dump.sql".freeze
METADATA_FILE = "meta.json"
VERSION_PREFIX = "v"
DUMP_FILE = "dump.sql.gz"
LOGS_CHANNEL = "/admin/backups/logs"

def self.backup!(user_id, opts = {})
@@ -19,7 +17,16 @@ def self.backup!(user_id, opts = {})
end

def self.restore!(user_id, opts = {})
start! BackupRestore::Restorer.new(user_id, opts)
restorer = BackupRestore::Restorer.new(
user_id: user_id,
filename: opts[:filename],
factory: BackupRestore::Factory.new(
user_id: user_id,
client_id: opts[:client_id]
)
)

start! restorer
end

def self.rollback!
@@ -75,16 +82,18 @@ def self.current_version
end

def self.move_tables_between_schemas(source, destination)
DB.exec(move_tables_between_schemas_sql(source, destination))
ActiveRecord::Base.transaction do
DB.exec(move_tables_between_schemas_sql(source, destination))
end
end

def self.move_tables_between_schemas_sql(source, destination)
<<-SQL
<<~SQL
DO $$DECLARE row record;
BEGIN
-- create <destination> schema if it does not exists already
-- NOTE: DROP & CREATE SCHEMA is easier, but we don't want to drop the public schema
-- ortherwise extensions (like hstore & pg_trgm) won't work anymore...
-- otherwise extensions (like hstore & pg_trgm) won't work anymore...
CREATE SCHEMA IF NOT EXISTS #{destination};
-- move all <source> tables to <destination> schema
FOR row IN SELECT tablename FROM pg_tables WHERE schemaname = '#{source}'
@@ -108,11 +117,17 @@ def self.database_configuration
config = ActiveRecord::Base.connection_pool.spec.config
config = config.with_indifferent_access

# credentials for PostgreSQL in CI environment
if Rails.env.test?
username = ENV["PGUSER"]
password = ENV["PGPASSWORD"]
end

DatabaseConfiguration.new(
config["backup_host"] || config["host"],
config["backup_port"] || config["port"],
config["username"] || ENV["USER"] || "postgres",
config["password"],
config["username"] || username || ENV["USER"] || "postgres",
config["password"] || password,
config["database"]
)
end
@@ -0,0 +1,96 @@
# frozen_string_literal: true

module BackupRestore
class BackupFileHandler
OLD_DUMP_FILENAME = "dump.sql"

delegate :log, to: :@logger, private: true

def initialize(logger, filename, current_db, root_tmp_directory = Rails.root)
@logger = logger
@filename = filename
@current_db = current_db
@root_tmp_directory = root_tmp_directory
@is_archive = !(@filename =~ /\.sql\.gz$/)
end

def decompress
create_tmp_directory
@archive_path = File.join(@tmp_directory, @filename)

copy_archive_to_tmp_directory
decompress_archive
extract_db_dump

[@tmp_directory, @db_dump_path]
end

def clean_up
return if @tmp_directory.blank?

log "Removing tmp '#{@tmp_directory}' directory..."
FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present?
rescue => ex
log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex
end

protected

def create_tmp_directory

This comment has been minimized.

Copy link
@eviltrout

eviltrout Jan 15, 2020

Member

https://stackoverflow.com/a/17512070

Have you looked at this? It's nice to let the stdlib do this kind of thing instead of us.

This comment has been minimized.

Copy link
@gschlager

gschlager Jan 15, 2020

Author Member

Yes, I'm aware of this function, but I didn't want to change all the existing functionality at once. There's still room for improvement and with all the specs it's a lot easier to change things. I'll put it on my list of things to change.

timestamp = Time.zone.now.strftime("%Y-%m-%d-%H%M%S")
@tmp_directory = File.join(@root_tmp_directory, "tmp", "restores", @current_db, timestamp)
ensure_directory_exists(@tmp_directory)
end

def ensure_directory_exists(directory)
log "Making sure #{directory} exists..."
FileUtils.mkdir_p(directory)
end

def copy_archive_to_tmp_directory
store = BackupRestore::BackupStore.create

if store.remote?
log "Downloading archive to tmp directory..."
failure_message = "Failed to download archive to tmp directory."
else
log "Copying archive to tmp directory..."
failure_message = "Failed to copy archive to tmp directory."
end

store.download_file(@filename, @archive_path, failure_message)
end

def decompress_archive
return if !@is_archive

log "Unzipping archive, this may take a while..."
pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new])
unzipped_path = pipeline.decompress(@tmp_directory, @archive_path, available_size)
pipeline.strip_directory(unzipped_path, @tmp_directory)
end

def extract_db_dump
@db_dump_path =
if @is_archive
# for compatibility with backups from Discourse v1.5 and below
old_dump_path = File.join(@tmp_directory, OLD_DUMP_FILENAME)
File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE)
else
File.join(@tmp_directory, @filename)
end

if File.extname(@db_dump_path) == '.gz'
log "Extracting dump file..."
Compression::Gzip.new.decompress(@tmp_directory, @db_dump_path, available_size)
@db_dump_path.delete_suffix!('.gz')
end

@db_dump_path
end

def available_size
SiteSetting.decompressed_backup_max_file_size_mb
end
end
end
@@ -3,8 +3,8 @@
module BackupRestore
# @abstract
class BackupStore
class BackupFileExists < RuntimeError; end
class StorageError < RuntimeError; end
BackupFileExists = Class.new(RuntimeError)
StorageError = Class.new(RuntimeError)

# @return [BackupStore]
def self.create(opts = {})

1 comment on commit e474cda

@discoursereviewbot

This comment has been minimized.

Copy link

discoursereviewbot commented on e474cda Jan 15, 2020

Robin Ward posted:

This is a huge improvement. Thanks 👏

Please sign in to comment.
You can’t perform that action at this time.