From 8a6fb19aca3fee2b2a1877a7e3c0321a820a5616 Mon Sep 17 00:00:00 2001 From: "Brian D. Burns" Date: Sat, 20 Jul 2013 08:52:34 -0400 Subject: [PATCH] update Cloud Syncers --- Gemfile | 1 - Gemfile.lock | 2 - backup.gemspec | 1 - lib/backup.rb | 3 +- lib/backup/syncer/cloud/base.rb | 307 +++++------ lib/backup/syncer/cloud/cloud_files.rb | 82 ++- lib/backup/syncer/cloud/local_file.rb | 93 ++++ lib/backup/syncer/cloud/s3.rb | 106 +++- spec/support/shared_examples/syncer/cloud.rb | 532 +++++++++++++++++++ spec/syncer/cloud/base_spec.rb | 527 ------------------ spec/syncer/cloud/cloud_files_spec.rb | 303 ++++++----- spec/syncer/cloud/local_file_spec.rb | 60 +++ spec/syncer/cloud/s3_spec.rb | 316 ++++++----- templates/cli/syncer/cloud_files | 34 +- templates/cli/syncer/s3 | 30 +- vagrant/spec/live.yml.template | 19 + vagrant/spec/live/syncer/cloud_files_spec.rb | 149 ++++++ vagrant/spec/live/syncer/s3_spec.rb | 189 +++++++ vagrant/spec/support/example_helpers.rb | 54 ++ 19 files changed, 1693 insertions(+), 1115 deletions(-) create mode 100644 lib/backup/syncer/cloud/local_file.rb create mode 100644 spec/support/shared_examples/syncer/cloud.rb delete mode 100644 spec/syncer/cloud/base_spec.rb create mode 100644 spec/syncer/cloud/local_file_spec.rb create mode 100644 vagrant/spec/live/syncer/cloud_files_spec.rb create mode 100644 vagrant/spec/live/syncer/s3_spec.rb diff --git a/Gemfile b/Gemfile index c7fc458f3..ad5e3b6b9 100644 --- a/Gemfile +++ b/Gemfile @@ -27,7 +27,6 @@ group :production do gem 'net-ssh' gem 'net-scp' gem 'net-sftp' - gem 'parallel' gem 'mail' gem 'twitter' gem 'hipchat' diff --git a/Gemfile.lock b/Gemfile.lock index b2c8106e6..6ba4b93bc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -64,7 +64,6 @@ GEM net-ssh (2.6.7) nokogiri (1.5.9) open4 (1.3.0) - parallel (0.6.5) polyglot (0.3.3) pry (0.9.12.2) coderay (~> 1.0.5) @@ -118,7 +117,6 @@ DEPENDENCIES net-sftp net-ssh open4 - parallel rb-fsevent rb-inotify redcarpet diff --git a/backup.gemspec b/backup.gemspec index 9c8a99d8a..c856352ff 100644 --- a/backup.gemspec +++ b/backup.gemspec @@ -43,7 +43,6 @@ Gem::Specification.new do |gem| gem.add_dependency 'net-ssh', '= 2.6.7' gem.add_dependency 'nokogiri', '= 1.5.9' gem.add_dependency 'open4', '= 1.3.0' - gem.add_dependency 'parallel', '= 0.6.5' gem.add_dependency 'polyglot', '= 0.3.3' gem.add_dependency 'ruby-hmac', '= 0.4.0' gem.add_dependency 'simple_oauth', '= 0.2.0' diff --git a/lib/backup.rb b/lib/backup.rb index 98d27156f..2cd91f71d 100644 --- a/lib/backup.rb +++ b/lib/backup.rb @@ -20,11 +20,11 @@ module Backup # Backup's internal paths LIBRARY_PATH = File.join(File.dirname(__FILE__), 'backup') STORAGE_PATH = File.join(LIBRARY_PATH, 'storage') + SYNCER_PATH = File.join(LIBRARY_PATH, 'syncer') DATABASE_PATH = File.join(LIBRARY_PATH, 'database') COMPRESSOR_PATH = File.join(LIBRARY_PATH, 'compressor') ENCRYPTOR_PATH = File.join(LIBRARY_PATH, 'encryptor') NOTIFIER_PATH = File.join(LIBRARY_PATH, 'notifier') - SYNCER_PATH = File.join(LIBRARY_PATH, 'syncer') TEMPLATE_PATH = File.expand_path('../../templates', __FILE__) ## @@ -49,6 +49,7 @@ module Syncer autoload :Base, File.join(SYNCER_PATH, 'base') module Cloud autoload :Base, File.join(SYNCER_PATH, 'cloud', 'base') + autoload :LocalFile, File.join(SYNCER_PATH, 'cloud', 'local_file') autoload :CloudFiles, File.join(SYNCER_PATH, 'cloud', 'cloud_files') autoload :S3, File.join(SYNCER_PATH, 'cloud', 's3') end diff --git a/lib/backup/syncer/cloud/base.rb b/lib/backup/syncer/cloud/base.rb index c373f1c11..883a154e2 100644 --- a/lib/backup/syncer/cloud/base.rb +++ b/lib/backup/syncer/cloud/base.rb @@ -1,229 +1,178 @@ # encoding: utf-8 -require 'fog' -require 'parallel' - module Backup module Syncer module Cloud + Error = Errors::Syncer::Cloud::Error + class Base < Syncer::Base + MUTEX = Mutex.new + + ## + # Number of threads to use for concurrency. + # + # Default: 0 (no concurrency) + attr_accessor :thread_count ## - # Concurrency setting - defaults to false, but can be set to: - # - :threads - # - :processes - attr_accessor :concurrency_type + # Number of times to retry failed operations. + # + # Default: 10 + attr_accessor :max_retries ## - # Concurrency level - the number of threads or processors to use. - # Defaults to 2. - attr_accessor :concurrency_level + # Time in seconds to pause before each retry. + # + # Default: 30 + attr_accessor :retry_waitsec def initialize(syncer_id = nil, &block) super instance_eval(&block) if block_given? - @concurrency_type ||= false - @concurrency_level ||= 2 + @thread_count ||= 0 + @max_retries ||= 10 + @retry_waitsec ||= 30 + @path ||= 'backups' @path = path.sub(/^\//, '') end - ## - # Performs the Sync operation def perform! log!(:started) - Logger.info( - "\s\sConcurrency: #{ @concurrency_type } Level: #{ @concurrency_level }" - ) - - @directories.each do |directory| - SyncContext.new( - File.expand_path(directory), repository_object, @path - ).sync! @mirror, @concurrency_type, @concurrency_level - end - + @transfer_count = 0 + @unchanged_count = 0 + @skipped_count = 0 + @orphans = thread_count > 0 ? Queue.new : [] + + directories.each {|dir| sync_directory(dir) } + orphans_result = process_orphans + + Logger.info "\nSummary:" + Logger.info "\s\sTransferred Files: #{ @transfer_count }" + Logger.info "\s\s#{ orphans_result }" + Logger.info "\s\sUnchanged Files: #{ @unchanged_count }" + Logger.warn "\s\sSkipped Files: #{ @skipped_count }" if @skipped_count > 0 log!(:finished) end private - class SyncContext - include Utilities::Helpers - - attr_reader :directory, :bucket, :path, :remote_base - - ## - # Creates a new SyncContext object which handles a single directory - # from the Syncer::Base @directories array. - def initialize(directory, bucket, path) - @directory, @bucket, @path = directory, bucket, path - @remote_base = File.join(path, File.basename(directory)) - end + def sync_directory(dir) + remote_base = File.join(path, File.basename(dir)) + Logger.info "Gathering remote data for '#{ remote_base }'..." + remote_files = get_remote_files(remote_base) + + Logger.info("Gathering local data for '#{ File.expand_path(dir) }'...") + local_files = LocalFile.find(dir) + + relative_paths = (local_files.keys | remote_files.keys).sort + if relative_paths.empty? + Logger.info 'No local or remote files found' + else + Logger.info 'Syncing...' + sync_block = Proc.new do |relative_path| + local_file = local_files[relative_path] + remote_md5 = remote_files[relative_path] + remote_path = File.join(remote_base, relative_path) + sync_file(local_file, remote_path, remote_md5) + end - ## - # Performs the sync operation using the provided techniques - # (mirroring/concurrency). - def sync!(mirror = false, concurrency_type = false, concurrency_level = 2) - block = Proc.new { |relative_path| sync_file relative_path, mirror } - - case concurrency_type - when FalseClass - all_file_names.each(&block) - when :threads - Parallel.each all_file_names, - :in_threads => concurrency_level, &block - when :processes - Parallel.each all_file_names, - :in_processes => concurrency_level, &block + if thread_count > 0 + sync_in_threads(relative_paths, sync_block) else - raise Errors::Syncer::Cloud::ConfigurationError, - "Unknown concurrency_type setting: #{ concurrency_type.inspect }" + relative_paths.each(&sync_block) end end + end - private - - ## - # Gathers all the relative paths to the local files - # and merges them with the , removing - # duplicate keys if any, and sorts the in alphabetical order. - def all_file_names - @all_file_names ||= (local_files.keys | remote_files.keys).sort - end - - ## - # Returns a Hash of local files, validated to ensure the path - # does not contain invalid UTF-8 byte sequences. - # The keys are the filesystem paths, relative to @directory. - # The values are the LocalFile objects for that given file. - def local_files - @local_files ||= begin - hash = {} - local_hashes.lines.map do |line| - LocalFile.new(@directory, line) - end.compact.each do |file| - hash.merge!(file.relative_path => file) + def sync_in_threads(relative_paths, sync_block) + queue = Queue.new + queue << relative_paths.shift until relative_paths.empty? + num_threads = [thread_count, queue.size].min + Logger.info "\s\sUsing #{ num_threads } Threads" + threads = num_threads.times.map do + Thread.new do + loop do + path = queue.shift(true) rescue nil + path ? sync_block.call(path) : break end - hash end end - ## - # Returns a String of file paths and their md5 hashes. - # - # Utilities#run is not used here because this would produce too much - # log output, and Pipeline does not support capturing output. - def local_hashes - Logger.info("\s\sGenerating checksums for '#{ @directory }'") - cmd = "#{ utility(:find) } -L '#{ @directory }' -type f -print0 | " + - "#{ utility(:xargs) } -0 #{ utility(:openssl) } md5 2> /dev/null" - %x[#{ cmd }] - end - - ## - # Returns a Hash of remote files - # The keys are the remote paths, relative to @remote_base - # The values are the Fog file objects for that given file - def remote_files - @remote_files ||= begin - hash = {} - @bucket.files.all(:prefix => @remote_base).each do |file| - hash.merge!(file.key.sub("#{ @remote_base }/", '') => file) - end - hash + # abort if any thread raises an exception + while threads.any?(&:alive?) + if threads.any? {|thr| thr.status.nil? } + threads.each(&:kill) + Thread.pass while threads.any?(&:alive?) + break end + sleep num_threads * 0.1 end + threads.each(&:join) + end - ## - # Performs a sync operation on a file. When mirroring is enabled - # and a local file has been removed since the last sync, it will also - # remove it from the remote location. It will no upload files that - # have not changed since the last sync. Checks are done using an md5 - # hash. If a file has changed, or has been newly added, the file will - # be transferred/overwritten. - def sync_file(relative_path, mirror) - local_file = local_files[relative_path] - remote_file = remote_files[relative_path] - remote_path = File.join(@remote_base, relative_path) - - if local_file && File.exist?(local_file.path) - unless remote_file && remote_file.etag == local_file.md5 - Logger.info("\s\s[transferring] '#{ remote_path }'") - File.open(local_file.path, 'r') do |file| - @bucket.files.create( - :key => remote_path, - :body => file - ) - end - else - Logger.info("\s\s[skipping] '#{ remote_path }'") - end - elsif remote_file - if mirror - Logger.info("\s\s[removing] '#{ remote_path }'") - remote_file.destroy - else - Logger.info("\s\s[leaving] '#{ remote_path }'") + # If an exception is raised in multiple threads, only the exception + # raised in the first thread that Thread#join is called on will be + # handled. So all exceptions are logged first with their details, + # then a generic exception is raised. + def sync_file(local_file, remote_path, remote_md5) + if local_file && File.exist?(local_file.path) + if local_file.md5 == remote_md5 + MUTEX.synchronize { @unchanged_count += 1 } + else + Logger.info("\s\s[transferring] '#{ remote_path }'") + begin + cloud_io.upload(local_file.path, remote_path) + MUTEX.synchronize { @transfer_count += 1 } + rescue CloudIO::FileSizeError => err + MUTEX.synchronize { @skipped_count += 1 } + Logger.warn Error.wrap(err, "Skipping '#{ remote_path }'") + rescue => err + Logger.error(err) + raise Error, <<-EOS + Syncer Failed! + See the Retry [info] and [error] messages (if any) + for details on each failed operation. + EOS end end + elsif remote_md5 + @orphans << remote_path end - end # class SyncContext - - class LocalFile - attr_reader :path, :relative_path, :md5 - - ## - # Return a new LocalFile object if it's valid. - # Otherwise, log a warning and return nil. - def self.new(*args) - local_file = super(*args) - if local_file.invalid? - Logger.warn( - "\s\s[skipping] #{ local_file.path }\n" + - "\s\sPath Contains Invalid UTF-8 byte sequences" - ) - return nil - end - local_file - end + end - ## - # Creates a new LocalFile object using the given directory and line - # from the md5 hash checkup. This object figures out the path, - # relative_path and md5 hash for the file. - def initialize(directory, line) - @invalid = false - @directory = sanitize(directory) - line = sanitize(line).chomp - @path = line.slice(4..-36) - @md5 = line.slice(-32..-1) - @relative_path = @path.sub(@directory + '/', '') + def process_orphans + if @orphans.empty? + return mirror ? 'Deleted Files: 0' : 'Orphaned Files: 0' end - def invalid? - @invalid + if @orphans.is_a?(Queue) + @orphans = @orphans.size.times.map { @orphans.shift } end - private - - ## - # Sanitize string and replace any invalid UTF-8 characters. - # If replacements are made, flag the LocalFile object as invalid. - def sanitize(str) - str.each_char.map do |char| - begin - char if !!char.unpack('U') - rescue - @invalid = true - "\xEF\xBF\xBD" # => "\uFFFD" - end - end.join + if mirror + Logger.info @orphans.map {|path| + "\s\s[removing] '#{ path }'" + }.join("\n") + + begin + cloud_io.delete(@orphans) + "Deleted Files: #{ @orphans.count }" + rescue => err + Logger.warn Error.wrap(err, 'Delete Operation Failed') + "Attempted to Delete: #{ @orphans.count } " + + "(See log messages for actual results)" + end + else + Logger.info @orphans.map {|path| + "\s\s[orphaned] '#{ path }'" + }.join("\n") + "Orphaned Files: #{ @orphans.count }" end + end - end # class LocalFile - - end # class Base < Syncer::Base - end # module Cloud + end + end end end diff --git a/lib/backup/syncer/cloud/cloud_files.rb b/lib/backup/syncer/cloud/cloud_files.rb index c60cb8a1a..eefbb90c7 100644 --- a/lib/backup/syncer/cloud/cloud_files.rb +++ b/lib/backup/syncer/cloud/cloud_files.rb @@ -1,66 +1,92 @@ # encoding: utf-8 +require 'backup/cloud_io/cloud_files' module Backup module Syncer module Cloud class CloudFiles < Base + Error = Errors::Syncer::Cloud::CloudFiles::Error ## # Rackspace CloudFiles Credentials - attr_accessor :api_key, :username + attr_accessor :username, :api_key ## # Rackspace CloudFiles Container attr_accessor :container ## - # Rackspace AuthURL allows you to connect - # to a different Rackspace datacenter - # - https://auth.api.rackspacecloud.com (Default: US) - # - https://lon.auth.api.rackspacecloud.com (UK) + # Rackspace AuthURL (optional) attr_accessor :auth_url ## - # Improve performance and avoid data transfer costs - # by setting @servicenet to `true` - # This only works if Backup runs on a Rackspace server + # Rackspace Region (optional) + attr_accessor :region + + ## + # Rackspace Service Net + # (LAN-based transfers to avoid charges and improve performance) attr_accessor :servicenet def initialize(syncer_id = nil) super @servicenet ||= false + + check_configuration end private - ## - # Established and creates a new Fog storage object for CloudFiles. - def connection - @connection ||= Fog::Storage.new( - :provider => provider, - :rackspace_username => username, - :rackspace_api_key => api_key, - :rackspace_auth_url => auth_url, - :rackspace_servicenet => servicenet + def cloud_io + @cloud_io ||= CloudIO::CloudFiles.new( + :username => username, + :api_key => api_key, + :auth_url => auth_url, + :region => region, + :servicenet => servicenet, + :container => container, + :max_retries => max_retries, + :retry_waitsec => retry_waitsec, + # Syncer can not use SLOs. + :segments_container => nil, + :segment_size => 0 ) end - ## - # Creates a new @repository_object (container). - # Fetches it from Cloud Files if it already exists, - # otherwise it will create it first and fetch use that instead. - def repository_object - @repository_object ||= connection.directories.get(container) || - connection.directories.create(:key => container) + def get_remote_files(remote_base) + hash = {} + cloud_io.objects(remote_base).each do |object| + relative_path = object.name.sub(remote_base + '/', '') + hash[relative_path] = object.hash + end + hash end - ## - # This is the provider that Fog uses for the Cloud Files - def provider - "Rackspace" + def check_configuration + required = %w{ username api_key container } + raise Error, <<-EOS if required.map {|name| send(name) }.any?(&:nil?) + Configuration Error + #{ required.map {|name| "##{ name }"}.join(', ') } are all required + EOS end + attr_deprecate :concurrency_type, :version => '3.7.0', + :message => 'Use #thread_count instead.', + :action => lambda {|klass, val| + if val == :threads + klass.thread_count = 2 unless klass.thread_count + else + klass.thread_count = 0 + end + } + + attr_deprecate :concurrency_level, :version => '3.7.0', + :message => 'Use #thread_count instead.', + :action => lambda {|klass, val| + klass.thread_count = val unless klass.thread_count == 0 + } + end # class Cloudfiles < Base end # module Cloud end diff --git a/lib/backup/syncer/cloud/local_file.rb b/lib/backup/syncer/cloud/local_file.rb new file mode 100644 index 000000000..9e1f416b7 --- /dev/null +++ b/lib/backup/syncer/cloud/local_file.rb @@ -0,0 +1,93 @@ +# encoding: utf-8 + +module Backup + module Syncer + module Cloud + class LocalFile + attr_reader :path, :md5 + + class << self + include Utilities::Helpers + + # Returns a Hash of LocalFile objects for each file within +dir+. + # Hash keys are the file's path relative to +dir+. + def find(dir) + dir = File.expand_path(dir) + hash = {} + find_md5(dir).each do |path, md5| + file = new(path, md5) + hash[path.sub(dir + '/', '')] = file if file + end + hash + end + + # Return a new LocalFile object if it's valid. + # Otherwise, log a warning and return nil. + def new(*args) + file = super + if file.invalid? + Logger.warn("\s\s[skipping] #{ file.path }\n" + + "\s\sPath Contains Invalid UTF-8 byte sequences") + file = nil + end + file + end + + private + + # Returns an Array of file paths and their md5 hashes. + # + # Lines output from `cmd` are formatted like: + # MD5(/dir/subdir/file)= 7eaabd1f53024270347800d0fdb34357 + # However, if +dir+ is empty, the following is returned: + # (stdin)= d41d8cd98f00b204e9800998ecf8427e + # Which extracts as: ['in', 'd41d8cd98f00b204e9800998ecf8427e'] + # I'm not sure I can rely on the fact this doesn't begin with 'MD5', + # so I'll reject entries with a path that doesn't start with +dir+. + # + # String#slice avoids `invalid byte sequence in UTF-8` errors + # that String#split would raise. + # + # Utilities#run is not used here because this would produce too much + # log output, and Pipeline does not support capturing output. + def find_md5(dir) + cmd = "#{ utility(:find) } -L '#{ dir }' -type f -print0 | " + + "#{ utility(:xargs) } -0 #{ utility(:openssl) } md5 2> /dev/null" + %x[#{ cmd }].lines.map do |line| + line.chomp! + entry = [line.slice(4..-36), line.slice(-32..-1)] + entry[0].to_s.start_with?(dir) ? entry : nil + end.compact + end + end + + # If +path+ contains invalid UTF-8, it will be sanitized + # and the LocalFile object will be flagged as invalid. + # This is done so @file.path may be logged. + def initialize(path, md5) + @path = sanitize(path) + @md5 = md5 + end + + def invalid? + !!@invalid + end + + private + + def sanitize(str) + str.each_char.map do |char| + begin + char.unpack('U') + char + rescue + @invalid = true + "\xEF\xBF\xBD" # => "\uFFFD" + end + end.join + end + + end + end + end +end diff --git a/lib/backup/syncer/cloud/s3.rb b/lib/backup/syncer/cloud/s3.rb index 46d705558..626187cb7 100644 --- a/lib/backup/syncer/cloud/s3.rb +++ b/lib/backup/syncer/cloud/s3.rb @@ -1,50 +1,114 @@ # encoding: utf-8 +require 'backup/cloud_io/s3' module Backup module Syncer module Cloud class S3 < Base + Error = Errors::Syncer::Cloud::S3::Error ## # Amazon Simple Storage Service (S3) Credentials attr_accessor :access_key_id, :secret_access_key ## - # The S3 bucket to store files to + # Amazon S3 bucket name attr_accessor :bucket ## - # The AWS region of the specified S3 bucket + # Region of the specified S3 bucket attr_accessor :region - private + ## + # Encryption algorithm to use for Amazon Server-Side Encryption + # + # Supported values: + # + # - :aes256 + # + # Default: nil + attr_accessor :encryption ## - # Established and creates a new Fog storage object for S3. - def connection - @connection ||= Fog::Storage.new( - :provider => provider, - :aws_access_key_id => access_key_id, - :aws_secret_access_key => secret_access_key, - :region => region + # Storage class to use for the S3 objects uploaded + # + # Supported values: + # + # - :standard (default) + # - :reduced_redundancy + # + # Default: :standard + attr_accessor :storage_class + + def initialize(syncer_id = nil) + super + + @storage_class ||= :standard + + check_configuration + end + + private + + def cloud_io + @cloud_io ||= CloudIO::S3.new( + :access_key_id => access_key_id, + :secret_access_key => secret_access_key, + :bucket => bucket, + :region => region, + :encryption => encryption, + :storage_class => storage_class, + :max_retries => max_retries, + :retry_waitsec => retry_waitsec, + # Syncer can not use multipart upload. + :chunk_size => 0 ) end - ## - # Creates a new @repository_object (bucket). - # Fetches it from S3 if it already exists, - # otherwise it will create it first and fetch use that instead. - def repository_object - @repository_object ||= connection.directories.get(bucket) || - connection.directories.create(:key => bucket, :location => region) + def get_remote_files(remote_base) + hash = {} + cloud_io.objects(remote_base).each do |object| + relative_path = object.key.sub(remote_base + '/', '') + hash[relative_path] = object.etag + end + hash end - ## - # This is the provider that Fog uses for the Cloud Files - def provider - "AWS" + def check_configuration + required = %w{ access_key_id secret_access_key bucket } + raise Error, <<-EOS if required.map {|name| send(name) }.any?(&:nil?) + Configuration Error + #{ required.map {|name| "##{ name }"}.join(', ') } are all required + EOS + + raise Error, <<-EOS if encryption && encryption.to_s.upcase != 'AES256' + Configuration Error + #encryption must be :aes256 or nil + EOS + + classes = ['STANDARD', 'REDUCED_REDUNDANCY'] + raise Error, <<-EOS unless classes.include?(storage_class.to_s.upcase) + Configuration Error + #storage_class must be :standard or :reduced_redundancy + EOS end + attr_deprecate :concurrency_type, :version => '3.7.0', + :message => 'Use #thread_count instead.', + :action => lambda {|klass, val| + if val == :threads + klass.thread_count = 2 unless klass.thread_count + else + klass.thread_count = 0 + end + } + + attr_deprecate :concurrency_level, :version => '3.7.0', + :message => 'Use #thread_count instead.', + :action => lambda {|klass, val| + klass.thread_count = val unless klass.thread_count == 0 + } + end # Class S3 < Base end # module Cloud end diff --git a/spec/support/shared_examples/syncer/cloud.rb b/spec/support/shared_examples/syncer/cloud.rb new file mode 100644 index 000000000..d3ae9271e --- /dev/null +++ b/spec/support/shared_examples/syncer/cloud.rb @@ -0,0 +1,532 @@ +# encoding: utf-8 + +module Backup +shared_examples 'a subclass of Syncer::Cloud::Base' do + let(:syncer_name) { described_class.name.sub('Backup::', '') } + let(:s) { sequence '' } + + describe '#initialize' do + + it 'strips leading path separator' do + pre_config = required_config + klass = described_class.new do |syncer| + pre_config.call(syncer) + syncer.path = '/this/path' + end + expect( klass.path ).to eq 'this/path' + end + + end # describe '#initialize' + + describe '#perform' do + let(:syncer) { described_class.new(&required_config) } + let(:cloud_io) { mock } + let(:find_md5_data) { + [['/local/path/sync_dir/unchanged_01', 'unchanged_01_md5'], + ['/local/path/sync_dir/sub_dir/unchanged_02', 'unchanged_02_md5'], + ['/local/path/sync_dir/changed_01', 'changed_01_md5'], + ['/local/path/sync_dir/sub_dir/changed_02', 'changed_02_md5'], + ['/local/path/sync_dir/missing_01', 'missing_01_md5']] + } + let(:remote_files_data) { + { 'unchanged_01' => 'unchanged_01_md5', + 'sub_dir/unchanged_02' => 'unchanged_02_md5', + 'changed_01' => 'changed_01_md5_old', + 'sub_dir/changed_02' => 'changed_02_md5_old', + 'orphan_01' => 'orphan_01_md5', + 'sub_dir/orphan_02' => 'orphan_02_md5' } + } + + before do + syncer.path = 'my_backups' + syncer.directories { add '/local/path/sync_dir' } + syncer.stubs(:cloud_io).returns(cloud_io) + cloud_io.stubs(:upload) + cloud_io.stubs(:delete) + File.stubs(:exist?).returns(true) + end + + context 'when no local or remote files are found' do + before do + syncer.stubs(:get_remote_files). + with('my_backups/sync_dir').returns({}) + Syncer::Cloud::LocalFile.expects(:find_md5). + with('/local/path/sync_dir').returns([]) + end + + it 'does not attempt to sync' do + expected_messages = <<-EOS.gsub(/^ +/, '').chomp + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + No local or remote files found + + Summary: + Transferred Files: 0 + Orphaned Files: 0 + Unchanged Files: 0 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(false) + expect( + Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + ).to eq expected_messages + end + end + + context 'without threads' do + before do + syncer.stubs(:get_remote_files). + with('my_backups/sync_dir').returns(remote_files_data) + Syncer::Cloud::LocalFile.expects(:find_md5). + with('/local/path/sync_dir').returns(find_md5_data) + end + + context 'without mirror' do + + it 'leaves orphaned files' do + expected_messages = <<-EOS.gsub(/^ +/, '').chomp + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + [transferring] 'my_backups/sync_dir/changed_01' + [transferring] 'my_backups/sync_dir/missing_01' + [transferring] 'my_backups/sync_dir/sub_dir/changed_02' + [orphaned] 'my_backups/sync_dir/orphan_01' + [orphaned] 'my_backups/sync_dir/sub_dir/orphan_02' + + Summary: + Transferred Files: 3 + Orphaned Files: 2 + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(false) + expect( + Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + ).to eq expected_messages + end + + end # context 'without mirror' + + context 'with mirror' do + before { syncer.mirror = true } + + it 'deletes orphaned files' do + expected_messages = <<-EOS.gsub(/^ +/, '').chomp + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + [transferring] 'my_backups/sync_dir/changed_01' + [transferring] 'my_backups/sync_dir/missing_01' + [transferring] 'my_backups/sync_dir/sub_dir/changed_02' + [removing] 'my_backups/sync_dir/orphan_01' + [removing] 'my_backups/sync_dir/sub_dir/orphan_02' + + Summary: + Transferred Files: 3 + Deleted Files: 2 + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(false) + expect( + Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + ).to eq expected_messages + end + + it 'warns if delete fails' do + cloud_io.stubs(:delete).raises('Delete Error') + + expected_messages = <<-EOS.gsub(/^ +/, '').chomp + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + [transferring] 'my_backups/sync_dir/changed_01' + [transferring] 'my_backups/sync_dir/missing_01' + [transferring] 'my_backups/sync_dir/sub_dir/changed_02' + [removing] 'my_backups/sync_dir/orphan_01' + [removing] 'my_backups/sync_dir/sub_dir/orphan_02' + Syncer::Cloud::Error: Delete Operation Failed + --- Wrapped Exception --- + RuntimeError: Delete Error + + Summary: + Transferred Files: 3 + Attempted to Delete: 2 (See log messages for actual results) + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(true) + expect( + Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + ).to eq expected_messages + end + + end # context 'with mirror' + + it 'skips files that are too large' do + cloud_io.stubs(:upload).with( + '/local/path/sync_dir/changed_01', 'my_backups/sync_dir/changed_01' + ).raises(CloudIO::FileSizeError) + + expected_messages = <<-EOS.gsub(/^ +/, '').chomp + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + [transferring] 'my_backups/sync_dir/changed_01' + Syncer::Cloud::Error: Skipping 'my_backups/sync_dir/changed_01' + --- Wrapped Exception --- + CloudIO::FileSizeError + [transferring] 'my_backups/sync_dir/missing_01' + [transferring] 'my_backups/sync_dir/sub_dir/changed_02' + [orphaned] 'my_backups/sync_dir/orphan_01' + [orphaned] 'my_backups/sync_dir/sub_dir/orphan_02' + + Summary: + Transferred Files: 2 + Orphaned Files: 2 + Unchanged Files: 2 + Skipped Files: 1 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(true) + expect( + Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + ).to eq expected_messages + end + + it 'logs and raises error on upload failure' do + cloud_io.stubs(:upload).raises('upload failure') + Logger.expects(:error).with do |err| + expect( err.message ).to eq 'upload failure' + end + expect do + syncer.perform! + end.to raise_error(Syncer::Cloud::Error) + end + + end # context 'without threads' + + context 'with threads' do + before do + syncer.stubs(:get_remote_files). + with('my_backups/sync_dir').returns(remote_files_data) + Syncer::Cloud::LocalFile.expects(:find_md5). + with('/local/path/sync_dir').returns(find_md5_data) + + syncer.thread_count = 20 + syncer.stubs(:sleep) # quicker tests + end + + context 'without mirror' do + + it 'leaves orphaned files' do + expected_head = <<-EOS.gsub(/^ +/, '') + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + Using 7 Threads + EOS + expected_tail = <<-EOS.gsub(/^ +/, '').chomp + [orphaned] 'my_backups/sync_dir/orphan_01' + [orphaned] 'my_backups/sync_dir/sub_dir/orphan_02' + + Summary: + Transferred Files: 3 + Orphaned Files: 2 + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.mirror = false + syncer.perform! + + expect( Logger.has_warnings? ).to be(false) + messages = Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + expect( messages ).to start_with expected_head + expect( messages ).to end_with expected_tail + end + + end # context 'without mirror' + + context 'with mirror' do + before { syncer.mirror = true } + + it 'deletes orphaned files' do + expected_head = <<-EOS.gsub(/^ +/, '') + #{ syncer_name } Started... + Gathering remote data for 'my_backups/sync_dir'... + Gathering local data for '/local/path/sync_dir'... + Syncing... + Using 7 Threads + EOS + expected_tail = <<-EOS.gsub(/^ +/, '').chomp + [removing] 'my_backups/sync_dir/orphan_01' + [removing] 'my_backups/sync_dir/sub_dir/orphan_02' + + Summary: + Transferred Files: 3 + Deleted Files: 2 + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(false) + messages = Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + expect( messages ).to start_with expected_head + expect( messages ).to end_with expected_tail + end + + it 'warns if delete fails' do + cloud_io.stubs(:delete).raises('Delete Error') + + expected_tail = <<-EOS.gsub(/^ +/, '').chomp + Summary: + Transferred Files: 3 + Attempted to Delete: 2 (See log messages for actual results) + Unchanged Files: 2 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(true) + messages = Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + expect( messages ).to end_with expected_tail + expect( messages ).to include(<<-EOS.gsub(/^ +/, '')) + Syncer::Cloud::Error: Delete Operation Failed + --- Wrapped Exception --- + RuntimeError: Delete Error + EOS + end + + end # context 'with mirror' + + it 'skips files that are too large' do + cloud_io.stubs(:upload).with( + '/local/path/sync_dir/changed_01', 'my_backups/sync_dir/changed_01' + ).raises(CloudIO::FileSizeError) + + expected_tail = <<-EOS.gsub(/^ +/, '').chomp + Summary: + Transferred Files: 2 + Orphaned Files: 2 + Unchanged Files: 2 + Skipped Files: 1 + #{ syncer_name } Finished! + EOS + + syncer.perform! + + expect( Logger.has_warnings? ).to be(true) + messages = Logger.messages.map(&:lines).flatten.map(&:strip).join("\n") + expect( messages ).to end_with expected_tail + expect( messages ).to include(<<-EOS.gsub(/^ +/, '')) + Syncer::Cloud::Error: Skipping 'my_backups/sync_dir/changed_01' + --- Wrapped Exception --- + CloudIO::FileSizeError + EOS + end + + it 'logs and raises error on upload failure' do + cloud_io.stubs(:upload).raises('upload failure') + Logger.expects(:error).at_least_once.with do |err| + expect( err.message ).to eq 'upload failure' + end + expect do + syncer.perform! + end.to raise_error(Syncer::Cloud::Error) + end + + end # context 'with threads' + + end # describe '#perform' + +end # shared_examples 'a subclass of Syncer::Cloud::Base' + +shared_examples 'Deprecation: #concurrency_type and #concurrency_level' do + after { described_class.clear_defaults! } + + context 'when desired #concurrency_type is :threads' do + context 'when only #concurrency_type is set' do + before do + Logger.expects(:warn).with {|err| + expect( err ).to be_an_instance_of Errors::ConfigurationError + expect( err.message ).to match(/Use #thread_count instead/) + } + end + + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_type = :threads + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 2 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_type = :threads + end + expect( syncer.thread_count ).to be 2 + end + end + + context 'when both #concurrency_type and #concurrency_level are set' do + before do + Logger.expects(:warn).twice.with {|err| + expect( err ).to be_an_instance_of Errors::ConfigurationError + expect( err.message ).to match(/Use #thread_count instead/) + } + end + + context 'when #concurrency_type is set before #concurrency_level' do + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_type = :threads + klass.concurrency_level = 5 + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 5 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_type = :threads + klass.concurrency_level = 5 + end + expect( syncer.thread_count ).to be 5 + end + end + + context 'when #concurrency_level is set before #concurrency_type' do + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_level = 5 + klass.concurrency_type = :threads + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 5 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_level = 5 + klass.concurrency_type = :threads + end + expect( syncer.thread_count ).to be 5 + end + end + end + end + + context 'when desired #concurrency_type is :processes' do + context 'when only #concurrency_type is set' do + before do + Logger.expects(:warn).with {|err| + expect( err ).to be_an_instance_of Errors::ConfigurationError + expect( err.message ).to match(/Use #thread_count instead/) + } + end + + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_type = :processes + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 0 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_type = :processes + end + expect( syncer.thread_count ).to be 0 + end + end + + context 'when both #concurrency_type and #concurrency_level are set' do + before do + Logger.expects(:warn).twice.with {|err| + expect( err ).to be_an_instance_of Errors::ConfigurationError + expect( err.message ).to match(/Use #thread_count instead/) + } + end + + context 'when #concurrency_type is set before #concurrency_level' do + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_type = :processes + klass.concurrency_level = 5 + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 0 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_type = :processes + klass.concurrency_level = 5 + end + expect( syncer.thread_count ).to be 0 + end + end + + context 'when #concurrency_level is set before #concurrency_type' do + specify 'set as a default' do + described_class.defaults do |klass| + klass.concurrency_level = 5 + klass.concurrency_type = :processes + end + syncer = described_class.new(&required_config) + expect( syncer.thread_count ).to be 0 + end + + specify 'set directly' do + pre_config = required_config + syncer = described_class.new do |klass| + pre_config.call(klass) + klass.concurrency_level = 5 + klass.concurrency_type = :processes + end + expect( syncer.thread_count ).to be 0 + end + end + end + end +end # shared_examples 'deprecation: #concurrency_type and #concurrency_level' + +end diff --git a/spec/syncer/cloud/base_spec.rb b/spec/syncer/cloud/base_spec.rb deleted file mode 100644 index 5d039415c..000000000 --- a/spec/syncer/cloud/base_spec.rb +++ /dev/null @@ -1,527 +0,0 @@ -# encoding: utf-8 -require File.expand_path('../../../spec_helper.rb', __FILE__) - -describe 'Backup::Syncer::Cloud::Base' do - let(:syncer) { Backup::Syncer::Cloud::Base.new } - let(:s) { sequence '' } - - it 'should be a subclass of Syncer::Base' do - Backup::Syncer::Cloud::Base. - superclass.should == Backup::Syncer::Base - end - - describe '#initialize' do - after { Backup::Syncer::Cloud::Base.clear_defaults! } - - it 'should load pre-configured defaults through Syncer::Base' do - Backup::Syncer::Cloud::Base.any_instance.expects(:load_defaults!) - syncer - end - - context 'when no pre-configured defaults have been set' do - it 'should use default values if none are given' do - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.concurrency_type.should == false - syncer.concurrency_level.should == 2 - end - end # context 'when no pre-configured defaults have been set' - - context 'when pre-configured defaults have been set' do - before do - Backup::Syncer::Cloud::Base.defaults do |cloud| - cloud.concurrency_type = 'default_concurrency_type' - cloud.concurrency_level = 'default_concurrency_level' - end - end - - it 'should use pre-configured defaults' do - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.concurrency_type.should == 'default_concurrency_type' - syncer.concurrency_level.should == 'default_concurrency_level' - end - end # context 'when pre-configured defaults have been set' - end # describe '#initialize' - - describe '#perform' do - let(:sync_context) { mock } - - before do - syncer.stubs(:repository_object).returns(:a_repository_object) - end - - it 'should sync each directory' do - syncer.directories do - add '/dir/one' - add '/dir/two' - end - - Backup::Syncer::Cloud::Base::SyncContext.expects(:new).in_sequence(s).with( - '/dir/one', :a_repository_object, 'backups' - ).returns(sync_context) - sync_context.expects(:sync!).in_sequence(s).with( - false, false, 2 - ) - Backup::Syncer::Cloud::Base::SyncContext.expects(:new).in_sequence(s).with( - '/dir/two', :a_repository_object, 'backups' - ).returns(sync_context) - sync_context.expects(:sync!).in_sequence(s).with( - false, false, 2 - ) - - syncer.perform! - end - - it 'should ensure each directory path is expanded with no trailing slash' do - syncer.directories do - add '/dir/one/' - add 'dir/two' - end - - Backup::Syncer::Cloud::Base::SyncContext.expects(:new).with( - '/dir/one', :a_repository_object, 'backups' - ).returns(sync_context) - - Backup::Syncer::Cloud::Base::SyncContext.expects(:new).with( - File.expand_path('dir/two'), :a_repository_object, 'backups' - ).returns(sync_context) - - sync_context.stubs(:sync!) - - syncer.perform! - end - - describe 'logging messages' do - it 'logs started/finished messages' do - Backup::Logger.expects(:info).with('Syncer::Cloud::Base Started...') - Backup::Logger.expects(:info).with("\s\sConcurrency: false Level: 2") - Backup::Logger.expects(:info).with('Syncer::Cloud::Base Finished!') - syncer.perform! - end - - it 'logs messages using optional syncer_id' do - syncer = Backup::Syncer::Cloud::Base.new('My Syncer') - - Backup::Logger.expects(:info).with('Syncer::Cloud::Base (My Syncer) Started...') - Backup::Logger.expects(:info).with("\s\sConcurrency: false Level: 2") - Backup::Logger.expects(:info).with('Syncer::Cloud::Base (My Syncer) Finished!') - syncer.perform! - end - end - end # describe '#perform' - - describe 'Cloud::Base::SyncContext' do - let(:bucket) { mock } - let(:sync_context) do - Backup::Syncer::Cloud::Base::SyncContext.new( - '/dir/to/sync', bucket, 'backups' - ) - end - - it 'should include Utilities::Helpers' do - Backup::Syncer::Cloud::Base. - include?(Backup::Utilities::Helpers).should be_true - end - - describe '#initialize' do - it 'should set variables' do - sync_context.directory.should == '/dir/to/sync' - sync_context.bucket.should == bucket - sync_context.path.should == 'backups' - sync_context.remote_base.should == 'backups/sync' - end - end - - describe '#sync!' do - let(:all_files_array) { mock } - - before do - sync_context.stubs(:all_file_names).returns(all_files_array) - end - - context 'when concurrency_type is set to `false`' do - it 'syncs files without concurrency' do - all_files_array.expects(:each).in_sequence(s). - multiple_yields('foo.file', 'foo_dir/foo.file') - - sync_context.expects(:sync_file).in_sequence(s). - with('foo.file', :mirror) - sync_context.expects(:sync_file).in_sequence(s). - with('foo_dir/foo.file', :mirror) - - sync_context.sync!(:mirror, false, :foo) - end - end - - context 'when concurrency_type is set to `:threads`' do - it 'uses `concurrency_level` number of threads for concurrency' do - Parallel.expects(:each).in_sequence(s).with( - all_files_array, :in_threads => :num_of_threads - ).multiple_yields('foo.file', 'foo_dir/foo.file') - - sync_context.expects(:sync_file).in_sequence(s). - with('foo.file', :mirror) - sync_context.expects(:sync_file).in_sequence(s). - with('foo_dir/foo.file', :mirror) - - sync_context.sync!(:mirror, :threads, :num_of_threads) - end - end - - context 'when concurrency_type is set to `:processes`' do - it 'uses `concurrency_level` number of processes for concurrency' do - Parallel.expects(:each).in_sequence(s).with( - all_files_array, :in_processes => :num_of_processes - ).multiple_yields('foo.file', 'foo_dir/foo.file') - - sync_context.expects(:sync_file).in_sequence(s). - with('foo.file', :mirror) - sync_context.expects(:sync_file).in_sequence(s). - with('foo_dir/foo.file', :mirror) - - sync_context.sync!(:mirror, :processes, :num_of_processes) - end - end - - context 'when concurrency_type setting is invalid' do - it 'should raise an error' do - expect do - sync_context.sync!(:foo, 'unknown type', :foo) - end.to raise_error( - Backup::Errors::Syncer::Cloud::ConfigurationError, - 'Syncer::Cloud::ConfigurationError: ' + - "Unknown concurrency_type setting: \"unknown type\"" - ) - end - end - end # describe '#sync!' - - describe '#all_file_names' do - let(:local_files_hash) do - { 'file_b' => :foo, 'file_a' => :foo, 'dir_a/file_b' => :foo } - end - let(:remote_files_hash) do - { 'file_c' => :foo, 'file_a' => :foo, 'dir_a/file_a' => :foo } - end - let(:local_remote_union_array) do - ['dir_a/file_a', 'dir_a/file_b', 'file_a', 'file_b', 'file_c'] - end - - it 'returns and caches a sorted union of local and remote file names' do - sync_context.expects(:local_files).once.returns(local_files_hash) - sync_context.expects(:remote_files).once.returns(remote_files_hash) - - sync_context.send(:all_file_names).should == local_remote_union_array - sync_context.instance_variable_get(:@all_file_names). - should == local_remote_union_array - sync_context.send(:all_file_names).should == local_remote_union_array - end - end # describe '#all_file_names' - - describe '#local_files' do - let(:local_file_class) { Backup::Syncer::Cloud::Base::LocalFile } - let(:local_hashes_data) { "line1\nline2\nbad\xFFline\nline3" } - - let(:local_file_a) { stub(:relative_path => 'file_a') } - let(:local_file_b) { stub(:relative_path => 'file_b') } - let(:local_file_c) { stub(:relative_path => 'file_c') } - let(:local_files_hash) do - { 'file_a' => local_file_a, - 'file_b' => local_file_b, - 'file_c' => local_file_c } - end - - it 'should return and caches a hash of LocalFile objects' do - sync_context.expects(:local_hashes).once.returns(local_hashes_data) - - local_file_class.expects(:new).once.with('/dir/to/sync', "line1\n"). - returns(local_file_a) - local_file_class.expects(:new).once.with('/dir/to/sync', "line2\n"). - returns(local_file_b) - local_file_class.expects(:new).once.with('/dir/to/sync', "bad\xFFline\n"). - returns(nil) - local_file_class.expects(:new).once.with('/dir/to/sync', "line3"). - returns(local_file_c) - - sync_context.send(:local_files).should == local_files_hash - sync_context.instance_variable_get(:@local_files). - should == local_files_hash - sync_context.send(:local_files).should == local_files_hash - end - - # Note: don't use methods that validate encoding - it 'will raise an Exception if String#split is used', - :if => RUBY_VERSION >= '1.9' do - expect do - "line1\nbad\xFFline\nline3".split("\n") - end.to raise_error(ArgumentError, 'invalid byte sequence in UTF-8') - end - end # describe '#local_files' - - describe '#local_hashes' do - before do - sync_context.expects(:utility).with(:find).returns('find') - sync_context.expects(:utility).with(:xargs).returns('xargs') - sync_context.expects(:utility).with(:openssl).returns('openssl') - end - - it 'should collect file paths and MD5 checksums for @directory' do - Backup::Logger.expects(:info).with( - "\s\sGenerating checksums for '/dir/to/sync'" - ) - sync_context.expects(:`).with( - "find -L '/dir/to/sync' -type f -print0 | xargs -0 openssl md5 2> /dev/null" - ).returns('MD5(tmp/foo)= 0123456789abcdefghijklmnopqrstuv') - - sync_context.send(:local_hashes).should == - 'MD5(tmp/foo)= 0123456789abcdefghijklmnopqrstuv' - end - end - - describe '#remote_files' do - let(:repository_object) { mock } - let(:repository_files) { mock } - let(:file_objects) { mock } - let(:file_obj_a) { stub(:key => 'file_a') } - let(:file_obj_b) { stub(:key => 'file_b') } - let(:file_obj_c) { stub(:key => 'dir/file_c') } - let(:remote_files_hash) do - { 'file_a' => file_obj_a, - 'file_b' => file_obj_b, - 'dir/file_c' => file_obj_c } - end - - before do - sync_context.instance_variable_set(:@bucket, repository_object) - - repository_object.expects(:files).once.returns(repository_files) - repository_files.expects(:all).once.with(:prefix => 'backups/sync'). - returns(file_objects) - file_objects.expects(:each).once.multiple_yields( - file_obj_a, file_obj_b, file_obj_c - ) - - # this is to avoid: unexpected invocation: #.to_a() - # only 1.9.2 seems affected by this - if RUBY_VERSION == '1.9.2' - file_obj_a.stubs(:to_a) - file_obj_b.stubs(:to_a) - file_obj_c.stubs(:to_a) - end - end - - context 'when it returns and caches a hash of repository file objects' do - it 'should remove the @remote_base from the path for the hash key' do - sync_context.send(:remote_files).should == remote_files_hash - sync_context.instance_variable_get(:@remote_files). - should == remote_files_hash - sync_context.send(:remote_files).should == remote_files_hash - end - end - end # describe '#remote_files' - - describe '#sync_file' do - let(:local_file) do - stub( - :path => '/dir/to/sync/sync.file', - :md5 => '0123456789abcdefghijklmnopqrstuv') - end - let(:remote_file) do - stub(:path => 'backups/sync/sync.file') - end - let(:file) { mock } - let(:repository_object) { mock } - let(:repository_files) { mock } - - before do - sync_context.instance_variable_set(:@bucket, repository_object) - repository_object.stubs(:files).returns(repository_files) - end - - context 'when the requested file to sync exists locally' do - before do - sync_context.stubs(:local_files).returns( - { 'sync.file' => local_file } - ) - File.expects(:exist?).with('/dir/to/sync/sync.file').returns(true) - end - - context 'when the MD5 checksum matches the remote file' do - before do - remote_file.stubs(:etag).returns('0123456789abcdefghijklmnopqrstuv') - sync_context.stubs(:remote_files).returns( - { 'sync.file' => remote_file } - ) - end - - it 'should skip the file' do - File.expects(:open).never - Backup::Logger.expects(:info).with( - "\s\s[skipping] 'backups/sync/sync.file'" - ) - - sync_context.send(:sync_file, 'sync.file', :foo) - end - end - - context 'when the MD5 checksum does not match the remote file' do - before do - remote_file.stubs(:etag).returns('vutsrqponmlkjihgfedcba9876543210') - sync_context.stubs(:remote_files).returns( - { 'sync.file' => remote_file } - ) - end - - it 'should upload the file' do - Backup::Logger.expects(:info).with( - "\s\s[transferring] 'backups/sync/sync.file'" - ) - - File.expects(:open).with('/dir/to/sync/sync.file', 'r').yields(file) - repository_files.expects(:create).with( - :key => 'backups/sync/sync.file', - :body => file - ) - - sync_context.send(:sync_file, 'sync.file', :foo) - end - end - - context 'when the requested file does not exist on the remote' do - before do - sync_context.stubs(:remote_files).returns({}) - end - - it 'should upload the file' do - Backup::Logger.expects(:info).with( - "\s\s[transferring] 'backups/sync/sync.file'" - ) - - File.expects(:open).with('/dir/to/sync/sync.file', 'r').yields(file) - repository_files.expects(:create).with( - :key => 'backups/sync/sync.file', - :body => file - ) - - sync_context.send(:sync_file, 'sync.file', :foo) - end - end - end - - context 'when the requested file does not exist locally' do - before do - sync_context.stubs(:remote_files).returns( - { 'sync.file' => remote_file } - ) - sync_context.stubs(:local_files).returns({}) - end - - context 'when the `mirror` option is set to true' do - it 'should remove the file from the remote' do - Backup::Logger.expects(:info).with( - "\s\s[removing] 'backups/sync/sync.file'" - ) - - remote_file.expects(:destroy) - - sync_context.send(:sync_file, 'sync.file', true) - end - end - - context 'when the `mirror` option is set to false' do - it 'should leave the file on the remote' do - Backup::Logger.expects(:info).with( - "\s\s[leaving] 'backups/sync/sync.file'" - ) - - remote_file.expects(:destroy).never - - sync_context.send(:sync_file, 'sync.file', false) - end - end - end - end # describe '#sync_file' - end # describe 'Cloud::Base::SyncContext' - - describe 'Cloud::Base::LocalFile' do - let(:local_file_class) { Backup::Syncer::Cloud::Base::LocalFile } - - describe '#new' do - describe 'wrapping #initialize and using #sanitize to validate objects' do - context 'when the path is valid UTF-8' do - let(:local_file) do - local_file_class.new( - 'foo', - 'MD5(foo)= 0123456789abcdefghijklmnopqrstuv' - ) - end - - it 'should return the new object' do - Backup::Logger.expects(:warn).never - - local_file.should be_an_instance_of local_file_class - end - end - - context 'when the path contains invalid UTF-8' do - let(:local_file) do - local_file_class.new( - "/bad/pa\xFFth", - "MD5(/bad/pa\xFFth/to/file)= 0123456789abcdefghijklmnopqrstuv" - ) - end - it 'should return nil and log a warning' do - Backup::Logger.expects(:warn).with( - "\s\s[skipping] /bad/pa\xEF\xBF\xBDth/to/file\n" + - "\s\sPath Contains Invalid UTF-8 byte sequences" - ) - - local_file.should be_nil - end - end - end - end # describe '#new' - - describe '#initialize' do - let(:local_file) do - local_file_class.new(:directory, :line) - end - - before do - local_file_class.any_instance.expects(:sanitize).with(:directory). - returns('/dir/to/sync') - local_file_class.any_instance.expects(:sanitize).with(:line). - returns("MD5(/dir/to/sync/subdir/sync.file)= 0123456789abcdefghijklmnopqrstuv\n") - end - - it 'should determine @path, @relative_path and @md5' do - local_file.path.should == '/dir/to/sync/subdir/sync.file' - local_file.relative_path.should == 'subdir/sync.file' - local_file.md5.should == '0123456789abcdefghijklmnopqrstuv' - end - - it 'should return nil if the object is invalid' do - local_file_class.any_instance.expects(:invalid?).returns(true) - Backup::Logger.expects(:warn) - local_file.should be_nil - end - end # describe '#initialize' - - describe '#sanitize' do - let(:local_file) do - local_file_class.new('foo', 'MD5(foo)= 0123456789abcdefghijklmnopqrstuv') - end - - it 'should replace any invalid UTF-8 characters' do - local_file.send(:sanitize, "/path/to/d\xFFir/subdir/sync\xFFfile"). - should == "/path/to/d\xEF\xBF\xBDir/subdir/sync\xEF\xBF\xBDfile" - end - - it 'should flag the LocalFile object as invalid' do - local_file.send(:sanitize, "/path/to/d\xFFir/subdir/sync\xFFfile") - local_file.invalid?.should be_true - end - end # describe '#sanitize' - end # describe 'Cloud::Base::LocalFile' -end diff --git a/spec/syncer/cloud/cloud_files_spec.rb b/spec/syncer/cloud/cloud_files_spec.rb index c047ac7b8..753530769 100644 --- a/spec/syncer/cloud/cloud_files_spec.rb +++ b/spec/syncer/cloud/cloud_files_spec.rb @@ -1,181 +1,174 @@ # encoding: utf-8 require File.expand_path('../../../spec_helper.rb', __FILE__) -describe 'Backup::Syncer::Cloud::CloudFiles' do - let(:syncer) do - Backup::Syncer::Cloud::CloudFiles.new do |cf| - cf.api_key = 'my_api_key' - cf.username = 'my_username' - cf.container = 'my_container' - cf.auth_url = 'my_auth_url' - cf.servicenet = true +module Backup +describe Syncer::Cloud::CloudFiles do + let(:required_config) { + Proc.new do |cf| + cf.username = 'my_username' + cf.api_key = 'my_api_key' + cf.container = 'my_container' end - end + } + let(:syncer) { Syncer::Cloud::CloudFiles.new(&required_config) } - it 'should be a subclass of Syncer::Cloud::Base' do - Backup::Syncer::Cloud::CloudFiles. - superclass.should == Backup::Syncer::Cloud::Base - end + it_behaves_like 'a class that includes Configuration::Helpers' - describe '#initialize' do - after { Backup::Syncer::Cloud::CloudFiles.clear_defaults! } - - it 'should load pre-configured defaults through Syncer::Cloud::Base' do - Backup::Syncer::Cloud::CloudFiles.any_instance.expects(:load_defaults!) - syncer - end + it_behaves_like 'a subclass of Syncer::Cloud::Base' - it 'should strip any leading slash in path' do - syncer = Backup::Syncer::Cloud::CloudFiles.new do |cloud| - cloud.path = '/cleaned/path' - end - syncer.path.should == 'cleaned/path' + describe '#initialize' do + it 'provides default values' do + # required + expect( syncer.username ).to eq 'my_username' + expect( syncer.api_key ).to eq 'my_api_key' + expect( syncer.container ).to eq 'my_container' + + # defaults + expect( syncer.auth_url ).to be_nil + expect( syncer.region ).to be_nil + expect( syncer.servicenet ).to be(false) + + # from Syncer::Cloud::Base + expect( syncer.thread_count ).to be 0 + expect( syncer.max_retries ).to be 10 + expect( syncer.retry_waitsec ).to be 30 + expect( syncer.path ).to eq 'backups' + + # from Syncer::Base + expect( syncer.syncer_id ).to be_nil + expect( syncer.mirror ).to be(false) + expect( syncer.directories ).to eq [] end - context 'when no pre-configured defaults have been set' do - it 'should use the values given' do - syncer.api_key.should == 'my_api_key' - syncer.username.should == 'my_username' - syncer.container.should == 'my_container' - syncer.auth_url.should == 'my_auth_url' - syncer.servicenet.should == true - end - - it 'should use default values if none are given' do - syncer = Backup::Syncer::Cloud::CloudFiles.new - - # from Syncer::Base - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.directories.should == [] - - # from Syncer::Cloud::Base - syncer.concurrency_type.should == false - syncer.concurrency_level.should == 2 - - syncer.api_key.should == nil - syncer.username.should == nil - syncer.container.should == nil - syncer.auth_url.should == nil - syncer.servicenet.should == false - end - end # context 'when no pre-configured defaults have been set' - - context 'when pre-configured defaults have been set' do - before do - Backup::Syncer::Cloud::CloudFiles.defaults do |cloud| - cloud.api_key = 'default_api_key' - cloud.username = 'default_username' - cloud.container = 'default_container' - cloud.auth_url = 'default_auth_url' - cloud.servicenet = 'default_servicenet' + it 'configures the syncer' do + syncer = Syncer::Cloud::CloudFiles.new(:my_id) do |cf| + cf.username = 'my_username' + cf.api_key = 'my_api_key' + cf.container = 'my_container' + cf.auth_url = 'my_auth_url' + cf.region = 'my_region' + cf.servicenet = true + cf.thread_count = 5 + cf.max_retries = 15 + cf.retry_waitsec = 45 + cf.path = 'my_backups' + cf.mirror = true + + cf.directories do + add '/this/path' + add 'that/path' end end - it 'should use pre-configured defaults' do - syncer = Backup::Syncer::Cloud::CloudFiles.new - - # from Syncer::Base - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.directories.should == [] - - # from Syncer::Cloud::Base - syncer.concurrency_type.should == false - syncer.concurrency_level.should == 2 - - syncer.api_key.should == 'default_api_key' - syncer.username.should == 'default_username' - syncer.container.should == 'default_container' - syncer.auth_url.should == 'default_auth_url' - syncer.servicenet.should == 'default_servicenet' - end + expect( syncer.username ).to eq 'my_username' + expect( syncer.api_key ).to eq 'my_api_key' + expect( syncer.container ).to eq 'my_container' + expect( syncer.auth_url ).to eq 'my_auth_url' + expect( syncer.region ).to eq 'my_region' + expect( syncer.servicenet ).to be(true) + expect( syncer.thread_count ).to be 5 + expect( syncer.max_retries ).to be 15 + expect( syncer.retry_waitsec ).to be 45 + expect( syncer.path ).to eq 'my_backups' + expect( syncer.syncer_id ).to eq :my_id + expect( syncer.mirror ).to be(true) + expect( syncer.directories ).to eq ['/this/path', 'that/path'] + end - it 'should override pre-configured defaults' do - syncer = Backup::Syncer::Cloud::CloudFiles.new do |cloud| - cloud.path = 'new_path' - cloud.mirror = 'new_mirror' - cloud.concurrency_type = 'new_concurrency_type' - cloud.concurrency_level = 'new_concurrency_level' - - cloud.api_key = 'new_api_key' - cloud.username = 'new_username' - cloud.container = 'new_container' - cloud.auth_url = 'new_auth_url' - cloud.servicenet = 'new_servicenet' + it 'requires username' do + pre_config = required_config + expect do + Syncer::Cloud::CloudFiles.new do |cf| + pre_config.call(cf) + cf.username = nil end - - syncer.path.should == 'new_path' - syncer.mirror.should == 'new_mirror' - syncer.directories.should == [] - syncer.concurrency_type.should == 'new_concurrency_type' - syncer.concurrency_level.should == 'new_concurrency_level' - - syncer.api_key.should == 'new_api_key' - syncer.username.should == 'new_username' - syncer.container.should == 'new_container' - syncer.auth_url.should == 'new_auth_url' - syncer.servicenet.should == 'new_servicenet' - end - end # context 'when pre-configured defaults have been set' - end # describe '#initialize' - - describe '#connection' do - let(:connection) { mock } - - before do - Fog::Storage.expects(:new).once.with( - :provider => 'Rackspace', - :rackspace_username => 'my_username', - :rackspace_api_key => 'my_api_key', - :rackspace_auth_url => 'my_auth_url', - :rackspace_servicenet => true - ).returns(connection) + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } end - it 'should establish and re-use the connection' do - syncer.send(:connection).should == connection - syncer.instance_variable_get(:@connection).should == connection - syncer.send(:connection).should == connection + it 'requires api_key' do + pre_config = required_config + expect do + Syncer::Cloud::CloudFiles.new do |cf| + pre_config.call(cf) + cf.api_key = nil + end + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } end - end - - describe '#repository_object' do - let(:connection) { mock } - let(:directories) { mock } - let(:container) { mock } - before do - syncer.stubs(:connection).returns(connection) - connection.stubs(:directories).returns(directories) + it 'requires container' do + pre_config = required_config + expect do + Syncer::Cloud::CloudFiles.new do |cf| + pre_config.call(cf) + cf.container = nil + end + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } end - context 'when the @container does not exist' do - before do - directories.expects(:get).once.with('my_container').returns(nil) - directories.expects(:create).once.with( - :key => 'my_container' - ).returns(container) - end + end # describe '#initialize' - it 'should create and re-use the container' do - syncer.send(:repository_object).should == container - syncer.instance_variable_get(:@repository_object).should == container - syncer.send(:repository_object).should == container - end + describe '#cloud_io' do + it 'caches a new CloudIO instance' do + CloudIO::CloudFiles.expects(:new).once.with( + :username => 'my_username', + :api_key => 'my_api_key', + :auth_url => nil, + :region => nil, + :servicenet => false, + :container => 'my_container', + :max_retries => 10, + :retry_waitsec => 30, + :segments_container => nil, + :segment_size => 0 + ).returns(:cloud_io) + + expect( syncer.send(:cloud_io) ).to eq :cloud_io + expect( syncer.send(:cloud_io) ).to eq :cloud_io + end + end # describe '#cloud_io' + + describe '#get_remote_files' do + let(:cloud_io) { mock } + let(:object_a) { + stub( + :name => 'my/path/dir_to_sync/some_dir/object_a', + :hash => '12345' + ) + } + let(:object_b) { + stub( + :name => 'my/path/dir_to_sync/another_dir/object_b', + :hash => '67890' + ) + } + before { syncer.stubs(:cloud_io).returns(cloud_io) } + + it 'returns a hash of relative paths and checksums for remote objects' do + cloud_io.expects(:objects).with('my/path/dir_to_sync'). + returns([object_a, object_b]) + + expect( + syncer.send(:get_remote_files, 'my/path/dir_to_sync') + ).to eq( + { 'some_dir/object_a' => '12345', 'another_dir/object_b' => '67890' } + ) end - context 'when the @container does exist' do - before do - directories.expects(:get).once.with('my_container').returns(container) - directories.expects(:create).never - end - - it 'should retrieve and re-use the container' do - syncer.send(:repository_object).should == container - syncer.instance_variable_get(:@repository_object).should == container - syncer.send(:repository_object).should == container - end + it 'returns an empty hash if no remote objects are found' do + cloud_io.expects(:objects).returns([]) + expect( syncer.send(:get_remote_files, 'foo') ).to eq({}) end + end # describe '#get_remote_files' + + describe 'Deprecations' do + include_examples 'Deprecation: #concurrency_type and #concurrency_level' end + +end end diff --git a/spec/syncer/cloud/local_file_spec.rb b/spec/syncer/cloud/local_file_spec.rb new file mode 100644 index 000000000..a8321acea --- /dev/null +++ b/spec/syncer/cloud/local_file_spec.rb @@ -0,0 +1,60 @@ +# encoding: utf-8 +require File.expand_path('../../../spec_helper.rb', __FILE__) + +module Backup +describe Syncer::Cloud::LocalFile do + + describe '.find' do + + before do + @tmpdir = Dir.mktmpdir('backup_spec') + SandboxFileUtils.activate!(@tmpdir) + FileUtils.mkdir_p File.join(@tmpdir, 'sync_dir/sub_dir') + Utilities.unstub(:utility) + end + + after do + FileUtils.rm_r(@tmpdir, :force => true, :secure => true) + end + + it 'returns a Hash of LocalFile objects, keyed by relative path' do + test_files = { + 'sync_dir/one.file' => 'c9f90c31589526ef50cc974a614038d5', + 'sync_dir/two.file' => '1d26903171cef8b1d7eb035ca049f492', + 'sync_dir/sub_dir/three.file' => '4ccdba38597e718ed00e3344dc78b6a1' + } + + Dir.chdir(@tmpdir) do + test_files.keys.each do |path| + File.open(path, 'w') {|file| file.write path } + end + bad_file = "sync_dir/bad\xFFfile" + sanitized_bad_file = "sync_dir/bad\xEF\xBF\xBDfile" + FileUtils.touch bad_file + + Logger.expects(:warn).with( + "\s\s[skipping] #{ File.expand_path(sanitized_bad_file) }\n" + + "\s\sPath Contains Invalid UTF-8 byte sequences" + ) + + local_files = described_class.find('sync_dir') + expect( local_files.keys.count ).to be 3 + local_files.each do |relative_path, local_file| + expect( local_file.path ).to eq( + File.expand_path("sync_dir/#{ relative_path }") + ) + expect( local_file.md5 ).to eq( + test_files["sync_dir/#{ relative_path }"] + ) + end + end + end + + it 'returns an empty hash if no files are found' do + expect( described_class.find(@tmpdir) ).to eq({}) + end + + end + +end +end diff --git a/spec/syncer/cloud/s3_spec.rb b/spec/syncer/cloud/s3_spec.rb index 3dead98f7..4298b0789 100644 --- a/spec/syncer/cloud/s3_spec.rb +++ b/spec/syncer/cloud/s3_spec.rb @@ -1,174 +1,206 @@ # encoding: utf-8 require File.expand_path('../../../spec_helper.rb', __FILE__) -describe 'Backup::Syncer::Cloud::S3' do - let(:syncer) do - Backup::Syncer::Cloud::S3.new do |s3| +module Backup +describe Syncer::Cloud::S3 do + let(:required_config) { + Proc.new do |s3| s3.access_key_id = 'my_access_key_id' s3.secret_access_key = 'my_secret_access_key' s3.bucket = 'my_bucket' - s3.region = 'my_region' end + } + let(:syncer) { Syncer::Cloud::S3.new(&required_config) } + + it_behaves_like 'a class that includes Configuration::Helpers' do + let(:default_overrides) { + { 'encryption' => :aes256, + 'storage_class' => :reduced_redundancy } + } + let(:new_overrides) { + { 'encryption' => 'aes256', + 'storage_class' => 'standard' } + } end - it 'should be a subclass of Syncer::Cloud::Base' do - Backup::Syncer::Cloud::S3. - superclass.should == Backup::Syncer::Cloud::Base - end + it_behaves_like 'a subclass of Syncer::Cloud::Base' describe '#initialize' do - after { Backup::Syncer::Cloud::S3.clear_defaults! } - - it 'should load pre-configured defaults through Syncer::Cloud::Base' do - Backup::Syncer::Cloud::S3.any_instance.expects(:load_defaults!) - syncer - end - - it 'should strip any leading slash in path' do - syncer = Backup::Syncer::Cloud::S3.new do |cloud| - cloud.path = '/cleaned/path' - end - syncer.path.should == 'cleaned/path' + it 'provides default values' do + # required + expect( syncer.access_key_id ).to eq 'my_access_key_id' + expect( syncer.secret_access_key ).to eq 'my_secret_access_key' + expect( syncer.bucket ).to eq 'my_bucket' + + # defaults + expect( syncer.region ).to be_nil + expect( syncer.encryption ).to be_nil + expect( syncer.storage_class ).to eq :standard + + # from Syncer::Cloud::Base + expect( syncer.thread_count ).to be 0 + expect( syncer.max_retries ).to be 10 + expect( syncer.retry_waitsec ).to be 30 + expect( syncer.path ).to eq 'backups' + + # from Syncer::Base + expect( syncer.syncer_id ).to be_nil + expect( syncer.mirror ).to be(false) + expect( syncer.directories ).to eq [] end - context 'when no pre-configured defaults have been set' do - it 'should use the values given' do - syncer.access_key_id.should == 'my_access_key_id' - syncer.secret_access_key.should == 'my_secret_access_key' - syncer.bucket.should == 'my_bucket' - syncer.region.should == 'my_region' - end - - it 'should use default values if none are given' do - syncer = Backup::Syncer::Cloud::S3.new - - # from Syncer::Base - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.directories.should == [] - - # from Syncer::Cloud::Base - syncer.concurrency_type.should == false - syncer.concurrency_level.should == 2 - - syncer.access_key_id.should be_nil - syncer.secret_access_key.should be_nil - syncer.bucket.should be_nil - syncer.region.should be_nil - end - end # context 'when no pre-configured defaults have been set' - - context 'when pre-configured defaults have been set' do - before do - Backup::Syncer::Cloud::S3.defaults do |cloud| - cloud.access_key_id = 'default_access_key_id' - cloud.secret_access_key = 'default_secret_access_key' - cloud.bucket = 'default_bucket' - cloud.region = 'default_region' + it 'configures the syncer' do + syncer = Syncer::Cloud::S3.new(:my_id) do |s3| + s3.access_key_id = 'my_access_key_id' + s3.secret_access_key = 'my_secret_access_key' + s3.bucket = 'my_bucket' + s3.region = 'my_region' + s3.encryption = :aes256 + s3.storage_class = :reduced_redundancy + s3.thread_count = 5 + s3.max_retries = 15 + s3.retry_waitsec = 45 + s3.path = 'my_backups' + s3.mirror = true + + s3.directories do + add '/this/path' + add 'that/path' end end - it 'should use pre-configured defaults' do - syncer = Backup::Syncer::Cloud::S3.new - - # from Syncer::Base - syncer.path.should == 'backups' - syncer.mirror.should == false - syncer.directories.should == [] - - # from Syncer::Cloud::Base - syncer.concurrency_type.should == false - syncer.concurrency_level.should == 2 - - syncer.access_key_id.should == 'default_access_key_id' - syncer.secret_access_key.should == 'default_secret_access_key' - syncer.bucket.should == 'default_bucket' - syncer.region.should == 'default_region' - end + expect( syncer.access_key_id ).to eq 'my_access_key_id' + expect( syncer.secret_access_key ).to eq 'my_secret_access_key' + expect( syncer.bucket ).to eq 'my_bucket' + expect( syncer.region ).to eq 'my_region' + expect( syncer.encryption ).to eq :aes256 + expect( syncer.storage_class ).to eq :reduced_redundancy + expect( syncer.thread_count ).to be 5 + expect( syncer.max_retries ).to be 15 + expect( syncer.retry_waitsec ).to be 45 + expect( syncer.path ).to eq 'my_backups' + expect( syncer.syncer_id ).to eq :my_id + expect( syncer.mirror ).to be(true) + expect( syncer.directories ).to eq ['/this/path', 'that/path'] + end - it 'should override pre-configured defaults' do - syncer = Backup::Syncer::Cloud::S3.new do |cloud| - cloud.path = 'new_path' - cloud.mirror = 'new_mirror' - cloud.concurrency_type = 'new_concurrency_type' - cloud.concurrency_level = 'new_concurrency_level' - - cloud.access_key_id = 'new_access_key_id' - cloud.secret_access_key = 'new_secret_access_key' - cloud.bucket = 'new_bucket' - cloud.region = 'new_region' + it 'requires access_key_id' do + pre_config = required_config + expect do + Syncer::Cloud::S3.new do |s3| + pre_config.call(s3) + s3.access_key_id = nil end + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } + end - syncer.path.should == 'new_path' - syncer.mirror.should == 'new_mirror' - syncer.directories.should == [] - syncer.concurrency_type.should == 'new_concurrency_type' - syncer.concurrency_level.should == 'new_concurrency_level' - - syncer.access_key_id.should == 'new_access_key_id' - syncer.secret_access_key.should == 'new_secret_access_key' - syncer.bucket.should == 'new_bucket' - syncer.region.should == 'new_region' - end - end # context 'when pre-configured defaults have been set' - end # describe '#initialize' - - describe '#connection' do - let(:connection) { mock } - - before do - Fog::Storage.expects(:new).once.with( - :provider => 'AWS', - :aws_access_key_id => 'my_access_key_id', - :aws_secret_access_key => 'my_secret_access_key', - :region => 'my_region' - ).returns(connection) + it 'requires secret_access_key' do + pre_config = required_config + expect do + Syncer::Cloud::S3.new do |s3| + pre_config.call(s3) + s3.secret_access_key = nil + end + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } end - it 'should establish and re-use the connection' do - syncer.send(:connection).should == connection - syncer.instance_variable_get(:@connection).should == connection - syncer.send(:connection).should == connection + it 'requires bucket' do + pre_config = required_config + expect do + Syncer::Cloud::S3.new do |s3| + pre_config.call(s3) + s3.bucket = nil + end + end.to raise_error {|err| + expect( err.message ).to match(/are all required/) + } end - end - describe '#repository_object' do - let(:connection) { mock } - let(:directories) { mock } - let(:bucket) { mock } + it 'validates encryption' do + pre_config = required_config + expect do + Syncer::Cloud::S3.new do |s3| + pre_config.call(s3) + s3.encryption = :aes512 + end + end.to raise_error {|err| + expect( err.message ).to match(/must be :aes256 or nil/) + } + end - before do - syncer.stubs(:connection).returns(connection) - connection.stubs(:directories).returns(directories) + it 'validates storage_class' do + pre_config = required_config + expect do + Syncer::Cloud::S3.new do |s3| + pre_config.call(s3) + s3.storage_class = :glacier + end + end.to raise_error {|err| + expect( err.message ).to match(/must be :standard or :reduced_redundancy/) + } end - context 'when the @bucket does not exist' do - before do - directories.expects(:get).once.with('my_bucket').returns(nil) - directories.expects(:create).once.with( - :key => 'my_bucket', - :location => 'my_region' - ).returns(bucket) - end + end # describe '#initialize' - it 'should create and re-use the bucket' do - syncer.send(:repository_object).should == bucket - syncer.instance_variable_get(:@repository_object).should == bucket - syncer.send(:repository_object).should == bucket - end + describe '#cloud_io' do + it 'caches a new CloudIO instance' do + CloudIO::S3.expects(:new).once.with( + :access_key_id => 'my_access_key_id', + :secret_access_key => 'my_secret_access_key', + :bucket => 'my_bucket', + :region => nil, + :encryption => nil, + :storage_class => :standard, + :max_retries => 10, + :retry_waitsec => 30, + :chunk_size => 0 + ).returns(:cloud_io) + + expect( syncer.send(:cloud_io) ).to eq :cloud_io + expect( syncer.send(:cloud_io) ).to eq :cloud_io + end + end # describe '#cloud_io' + + describe '#get_remote_files' do + let(:cloud_io) { mock } + let(:object_a) { + stub( + :key => 'my/path/dir_to_sync/some_dir/object_a', + :etag => '12345' + ) + } + let(:object_b) { + stub( + :key => 'my/path/dir_to_sync/another_dir/object_b', + :etag => '67890' + ) + } + before { syncer.stubs(:cloud_io).returns(cloud_io) } + + it 'returns a hash of relative paths and checksums for remote objects' do + cloud_io.expects(:objects).with('my/path/dir_to_sync'). + returns([object_a, object_b]) + + expect( + syncer.send(:get_remote_files, 'my/path/dir_to_sync') + ).to eq( + { 'some_dir/object_a' => '12345', 'another_dir/object_b' => '67890' } + ) end - context 'when the @bucket does exist' do - before do - directories.expects(:get).once.with('my_bucket').returns(bucket) - directories.expects(:create).never - end - - it 'should retrieve and re-use the bucket' do - syncer.send(:repository_object).should == bucket - syncer.instance_variable_get(:@repository_object).should == bucket - syncer.send(:repository_object).should == bucket - end + it 'returns an empty hash if no remote objects are found' do + cloud_io.expects(:objects).returns([]) + expect( syncer.send(:get_remote_files, 'foo') ).to eq({}) end + end # describe '#get_remote_files' + + describe 'Deprecations' do + include_examples 'Deprecation: #concurrency_type and #concurrency_level' end + +end end diff --git a/templates/cli/syncer/cloud_files b/templates/cli/syncer/cloud_files index 4e0b9c6cb..f17ecd6c5 100644 --- a/templates/cli/syncer/cloud_files +++ b/templates/cli/syncer/cloud_files @@ -1,43 +1,15 @@ ## # Rackspace Cloud Files [Syncer] # - # Available Auth URLs: - # - # - https://auth.api.rackspacecloud.com (US - Default) - # - https://lon.auth.api.rackspacecloud.com (UK) - # - # Servicenet: - # - # Set this to 'true' if Backup runs on a Rackspace server. - # It will avoid transfer charges and it's more performant. - # - # Mirroring: - # - # When enabled it will keep an exact mirror of your filesystem on Cloud Files. - # This means that when you remove a file from the filesystem, - # it will also remote it from Cloud Files. - # - # Concurrency: - # - # `concurrency_type` may be set to: - # - # - false (default) - # - :threads - # - :processes - # - # Set `concurrency_level` to the number of threads/processes to use. - # Defaults to 2. - # + # See the documentation on the Wiki for details. + # https://github.com/meskyanichi/backup/wiki/Syncers sync_with Cloud::CloudFiles do |cf| cf.username = "my_username" cf.api_key = "my_api_key" cf.container = "my_container" - cf.auth_url = "https://auth.api.rackspacecloud.com" - cf.servicenet = false cf.path = "/backups" cf.mirror = true - cf.concurrency_type = false - cf.concurrency_level = 2 + cf.thread_count = 10 cf.directories do |directory| directory.add "/path/to/directory/to/sync" diff --git a/templates/cli/syncer/s3 b/templates/cli/syncer/s3 index 37093ff84..7c6bb4c48 100644 --- a/templates/cli/syncer/s3 +++ b/templates/cli/syncer/s3 @@ -1,31 +1,8 @@ ## # Amazon S3 [Syncer] # - # Available Regions: - # - # - ap-northeast-1 - # - ap-southeast-1 - # - eu-west-1 - # - us-east-1 - # - us-west-1 - # - # Mirroring: - # - # When enabled it will keep an exact mirror of your filesystem on S3. - # This means that when you remove a file from the filesystem, - # it will also remote it from S3. - # - # Concurrency: - # - # `concurrency_type` may be set to: - # - # - false (default) - # - :threads - # - :processes - # - # Set `concurrency_level` to the number of threads/processes to use. - # Defaults to 2. - # + # See the documentation on the Wiki for details. + # https://github.com/meskyanichi/backup/wiki/Syncers sync_with Cloud::S3 do |s3| s3.access_key_id = "my_access_key_id" s3.secret_access_key = "my_secret_access_key" @@ -33,8 +10,7 @@ s3.region = "us-east-1" s3.path = "/backups" s3.mirror = true - s3.concurrency_type = false - s3.concurrency_level = 2 + s3.thread_count = 10 s3.directories do |directory| directory.add "/path/to/directory/to/sync" diff --git a/vagrant/spec/live.yml.template b/vagrant/spec/live.yml.template index bc95cd3ed..0507f0670 100644 --- a/vagrant/spec/live.yml.template +++ b/vagrant/spec/live.yml.template @@ -25,6 +25,25 @@ storage: container: segments_container: path: backup_testing +syncer: + cloud: + s3: + specs_enabled: false + access_key_id: + secret_access_key: + region: + bucket: + # change if needed + path: backup_testing + cloudfiles: + specs_enabled: false + username: + api_key: + auth_url: # if needed + region: # if needed + servicenet: false + container: + path: backup_testing notifier: mail: specs_enabled: false diff --git a/vagrant/spec/live/syncer/cloud_files_spec.rb b/vagrant/spec/live/syncer/cloud_files_spec.rb new file mode 100644 index 000000000..6bf2b3d54 --- /dev/null +++ b/vagrant/spec/live/syncer/cloud_files_spec.rb @@ -0,0 +1,149 @@ +# encoding: utf-8 + +require File.expand_path('../../../spec_helper', __FILE__) + +# To run these tests, you need to setup your Cloudfiles credentials in +# /vagrant/spec/live.yml +# +# It's recommended you use a dedicated Container for this, like: +# backup.testing.container +# +# Note: Expectations will occasionally fail due to eventual consistency. +module Backup +describe Syncer::Cloud::CloudFiles, + :if => BackupSpec::LIVE['syncer']['cloud']['cloudfiles']['specs_enabled'] == true do + + before { prepare_local_sync_files; clean_remote } + after { clean_remote } + + shared_examples 'sync test (cf)' do + + it 'works' do + create_model :my_backup, <<-EOS + Backup::Model.new(:my_backup, 'a description') do + config = BackupSpec::LIVE['syncer']['cloud']['cloudfiles'] + sync_with Cloud::CloudFiles do |cf| + cf.username = config['username'] + cf.api_key = config['api_key'] + cf.auth_url = config['auth_url'] + cf.region = config['region'] + cf.servicenet = config['servicenet'] + cf.container = config['container'] + cf.path = config['path'] + cf.thread_count = #{ use_threads ? 2 : 0 } + cf.mirror = #{ mirror } + + cf.directories do + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_a') + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_b') + end + end + end + EOS + + job = backup_perform :my_backup, :exit_status => 1 + + expect( + objects_on_remote.map {|obj| [obj.name, obj.hash] } + ).to eq( + expected_on_remote(:before_update, mirror) + ) + expect( skipped_file_logged?(job) ).to be_true + + update_local_sync_files + + job = backup_perform :my_backup, :exit_status => 1 + + expect( + objects_on_remote.map {|obj| [obj.name, obj.hash] } + ).to eq( + expected_on_remote(:after_update, mirror) + ) + expect( skipped_file_logged?(job) ).to be_true + end + + end # shared_examples 'sync test (cf)' + + context 'with threads', :live do + let(:use_threads) { true } + + context 'with mirroring' do + let(:mirror) { true } + include_examples 'sync test (cf)' + end + + context 'without mirroring' do + let(:mirror) { false } + include_examples 'sync test (cf)' + end + end + + context 'without threads', :live do + let(:use_threads) { false } + + context 'with mirroring' do + let(:mirror) { true } + include_examples 'sync test (cf)' + end + + context 'without mirroring' do + let(:mirror) { false } + include_examples 'sync test (cf)' + end + end + + private + + def cloud_io + config = BackupSpec::LIVE['syncer']['cloud']['cloudfiles'] + @cloud_io ||= CloudIO::CloudFiles.new( + :username => config['username'], + :api_key => config['api_key'], + :auth_url => config['auth_url'], + :region => config['region'], + :servicenet => config['servicenet'], + :container => config['container'], + :max_retries => 3, + :retry_waitsec => 5, + # Syncers can not use multipart upload. + :segments_container => nil, + :segment_size => 0 + ) + end + + def remote_path + BackupSpec::LIVE['syncer']['cloud']['cloudfiles']['path'] + end + + def objects_on_remote + cloud_io.objects(remote_path).sort_by(&:name) + end + + def clean_remote + cloud_io.delete(objects_on_remote) + end + + def expected_on_remote(state, mirror) + case state + when :before_update + files = [['dir_a/one.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/dir_c/three.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/two.file', 'd3b07384d113edec49eaa6238ad5ff00']] + when :after_update + files = [['dir_a/dir_d/two.new', '14758f1afd44c09b7992073ccf00b43d'], + ['dir_a/one.file', '14758f1afd44c09b7992073ccf00b43d'], + ['dir_b/dir_c/three.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/one.new', '14758f1afd44c09b7992073ccf00b43d']] + files << ['dir_b/two.file', 'd3b07384d113edec49eaa6238ad5ff00'] unless mirror + end + files.map {|path, md5| [File.join(remote_path, path), md5] }.sort_by(&:first) + end + + def skipped_file_logged?(job) + messages = job.logger.messages.map {|m| m.formatted_lines }.flatten + file = File.join(BackupSpec::LOCAL_SYNC_PATH, "dir_b/bad\uFFFDfile") + messages.any? {|line| line.include? "[warn] [skipping] #{ file }" } + end + +end +end diff --git a/vagrant/spec/live/syncer/s3_spec.rb b/vagrant/spec/live/syncer/s3_spec.rb new file mode 100644 index 000000000..616b7eb1b --- /dev/null +++ b/vagrant/spec/live/syncer/s3_spec.rb @@ -0,0 +1,189 @@ +# encoding: utf-8 + +require File.expand_path('../../../spec_helper', __FILE__) + +# To run these tests, you need to setup your AWS S3 credentials in +# /vagrant/spec/live.yml +# +# It's recommended you use a dedicated Bucket for this, like: +# .backup.testing +# +# Note: The S3 Bucket you use should have read-after-write consistency. +# So don't use the US Standard region. +module Backup +describe Syncer::Cloud::S3, + :if => BackupSpec::LIVE['syncer']['cloud']['s3']['specs_enabled'] == true do + + before { prepare_local_sync_files; clean_remote } + after { clean_remote } + + shared_examples 'sync test (s3)' do + + it 'works' do + create_model :my_backup, <<-EOS + Backup::Model.new(:my_backup, 'a description') do + config = BackupSpec::LIVE['syncer']['cloud']['s3'] + sync_with Cloud::S3 do |s3| + s3.access_key_id = config['access_key_id'] + s3.secret_access_key = config['secret_access_key'] + s3.region = config['region'] + s3.bucket = config['bucket'] + s3.path = config['path'] + s3.thread_count = #{ use_threads ? 2 : 0 } + s3.mirror = #{ mirror } + + s3.directories do + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_a') + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_b') + end + end + end + EOS + + job = backup_perform :my_backup, :exit_status => 1 + + expect( + objects_on_remote.map {|obj| [obj.key, obj.etag] } + ).to eq( + expected_on_remote(:before_update, mirror) + ) + + expect( skipped_file_logged?(job) ).to be_true + + update_local_sync_files + + job = backup_perform :my_backup, :exit_status => 1 + objects = objects_on_remote + + expect( + objects.map {|obj| [obj.key, obj.etag] } + ).to eq( + expected_on_remote(:after_update, mirror) + ) + + expect( skipped_file_logged?(job) ).to be_true + + expect( + objects.all? {|obj| obj.storage_class == 'STANDARD' } + ).to be(true) + + expect( + objects.all? {|obj| obj.encryption.nil? } + ).to be(true) + end + + end # shared_examples 'sync test (s3)' + + context 'with threads', :live do + let(:use_threads) { true } + + context 'with mirroring' do + let(:mirror) { true } + include_examples 'sync test (s3)' + end + + context 'without mirroring' do + let(:mirror) { false } + include_examples 'sync test (s3)' + end + end + + context 'without threads', :live do + let(:use_threads) { false } + + context 'with mirroring' do + let(:mirror) { true } + include_examples 'sync test (s3)' + end + + context 'without mirroring' do + let(:mirror) { false } + include_examples 'sync test (s3)' + end + end + + it 'uses :storage_class and :encryption', :live do + create_model :my_backup, <<-EOS + Backup::Model.new(:my_backup, 'a description') do + config = BackupSpec::LIVE['syncer']['cloud']['s3'] + sync_with Cloud::S3 do |s3| + s3.access_key_id = config['access_key_id'] + s3.secret_access_key = config['secret_access_key'] + s3.region = config['region'] + s3.bucket = config['bucket'] + s3.path = config['path'] + s3.storage_class = :reduced_redundancy + s3.encryption = :aes256 + + s3.directories do + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_a') + add File.join(BackupSpec::LOCAL_SYNC_PATH, 'dir_b') + end + end + end + EOS + + backup_perform :my_backup, :exit_status => 1 + objects = objects_on_remote + + expect( + objects.all? {|obj| obj.storage_class == 'REDUCED_REDUNDANCY' } + ).to be(true) + + expect( + objects.all? {|obj| obj.encryption == 'AES256' } + ).to be_true + end + + private + + def cloud_io + config = BackupSpec::LIVE['syncer']['cloud']['s3'] + @cloud_io ||= CloudIO::S3.new( + :access_key_id => config['access_key_id'], + :secret_access_key => config['secret_access_key'], + :region => config['region'], + :bucket => config['bucket'], + :max_retries => 3, + :retry_waitsec => 5, + # Syncers can not use multipart upload. + :chunk_size => 0 + ) + end + + def remote_path + BackupSpec::LIVE['syncer']['cloud']['s3']['path'] + end + + def objects_on_remote + cloud_io.objects(remote_path).sort_by(&:key) + end + + def clean_remote + cloud_io.delete(objects_on_remote) + end + + def expected_on_remote(state, mirror) + case state + when :before_update + files = [['dir_a/one.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/dir_c/three.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/two.file', 'd3b07384d113edec49eaa6238ad5ff00']] + when :after_update + files = [['dir_a/dir_d/two.new', '14758f1afd44c09b7992073ccf00b43d'], + ['dir_a/one.file', '14758f1afd44c09b7992073ccf00b43d'], + ['dir_b/dir_c/three.file', 'd3b07384d113edec49eaa6238ad5ff00'], + ['dir_b/one.new', '14758f1afd44c09b7992073ccf00b43d']] + files << ['dir_b/two.file', 'd3b07384d113edec49eaa6238ad5ff00'] unless mirror + end + files.map {|path, md5| [File.join(remote_path, path), md5] }.sort_by(&:first) + end + + def skipped_file_logged?(job) + messages = job.logger.messages.map {|m| m.formatted_lines }.flatten + file = File.join(BackupSpec::LOCAL_SYNC_PATH, "dir_b/bad\uFFFDfile") + messages.any? {|line| line.include? "[warn] [skipping] #{ file }" } + end + +end +end diff --git a/vagrant/spec/support/example_helpers.rb b/vagrant/spec/support/example_helpers.rb index 8a1554951..af0ecc3df 100644 --- a/vagrant/spec/support/example_helpers.rb +++ b/vagrant/spec/support/example_helpers.rb @@ -5,6 +5,7 @@ module BackupSpec CONFIG_TEMPLATE = File.readlines(File.join(PROJECT_ROOT, 'templates/cli/config')) LOCAL_STORAGE_PATH = '/home/vagrant/Storage' ALT_CONFIG_PATH = '/home/vagrant/Backup_alt' + LOCAL_SYNC_PATH = '/home/vagrant/sync_root' module ExampleHelpers @@ -143,5 +144,58 @@ def dir_contents(path) Dir["#{ path }/**/*"].map {|e| e.sub(/^#{ path }/, '') }.sort end + # Initial Files are MD5: d3b07384d113edec49eaa6238ad5ff00 + # + # ├── dir_a + # │   └── one.file + # └── dir_b + # ├── dir_c + # │   └── three.file + # ├── bad\xFFfile + # └── two.file + # + def prepare_local_sync_files + FileUtils.rm_rf LOCAL_SYNC_PATH + + %w{ dir_a + dir_b/dir_c }.each do |path| + FileUtils.mkdir_p File.join(LOCAL_SYNC_PATH, path) + end + + %W{ dir_a/one.file + dir_b/two.file + dir_b/bad\xFFfile + dir_b/dir_c/three.file }.each do |path| + File.open(File.join(LOCAL_SYNC_PATH, path), 'w') do |file| + file.puts 'foo' + end + end + end + + # Added/Updated Files are MD5: 14758f1afd44c09b7992073ccf00b43d + # + # ├── dir_a + # │   ├── dir_d (add) + # │   │   └── two.new (add) + # │   └── one.file (update) + # └── dir_b + # ├── dir_c + # │   └── three.file + # ├── bad\377file + # ├── one.new (add) + # └── two.file (remove) + # + def update_local_sync_files + FileUtils.mkdir_p File.join(LOCAL_SYNC_PATH, 'dir_a/dir_d') + %w{ dir_a/one.file + dir_b/one.new + dir_a/dir_d/two.new }.each do |path| + File.open(File.join(LOCAL_SYNC_PATH, path), 'w') do |file| + file.puts 'foobar' + end + end + FileUtils.rm File.join(LOCAL_SYNC_PATH, 'dir_b/two.file') + end + end end