From b30416ba7c5525eec60d325c066e4e378bc071d9 Mon Sep 17 00:00:00 2001 From: Brian Cobb Date: Mon, 23 Jun 2014 16:53:00 -0500 Subject: [PATCH 1/3] Factor out CookbookUpload::Archive This sets Supermarket up to alter its archive processing logic independently of its file regex logic, which will make a future (temporary) change to using libarchive transparent and revertible. --- app/models/cookbook_upload/archive.rb | 103 +++++++++++++++++++++++ app/models/cookbook_upload/parameters.rb | 79 ++++++----------- 2 files changed, 128 insertions(+), 54 deletions(-) create mode 100644 app/models/cookbook_upload/archive.rb diff --git a/app/models/cookbook_upload/archive.rb b/app/models/cookbook_upload/archive.rb new file mode 100644 index 000000000..35e024436 --- /dev/null +++ b/app/models/cookbook_upload/archive.rb @@ -0,0 +1,103 @@ +require 'rubygems/package' + +class CookbookUpload + # + # This class provides access to cookbook archives. In particular, it supports + # finding entries in the archive which have a path matching some regular + # expression, and it can read the contents of files given their path. These + # are the two primary operations required by Supermarket to locate and + # process metadata and README files. + # + class Archive + # + # Indicates that the uploaded archive may not be an upload, since it does + # not have a +path+ + # + NoPath = Class.new(RuntimeError) + + # + # Indicates that the source could not be processed + # + Error = Class.new(RuntimeError) + + # + # Creates a new Archive + # + # @param source [File] the source archive + # + def initialize(source) + @source = source + end + + # + # Returns the paths of entries in the archive which match the given regular expression + # + # @param pattern [Regexp] + # + # @return [Array] matching paths + # + def find(pattern) + matches = [] + + each do |entry| + next unless entry.full_name.match(pattern) + + matches << entry.full_name + end + + matches + end + + # + # Reads the contents of the file at the given path + # + # @param path [String] + # + # @return [String] the contents if a file exists at the given path + # @return [nil] if no such file exists + # + def read(path) + match = nil + + each do |entry| + next unless entry.full_name == path + + match = entry.read + + break + end + + match + end + + private + + # + # Iterates through each entry in the source archive + # + # @raise [NoPath] if the source has no path + # @raise [Error] if the source is not a compatible archive + # + # @yieldparam [::Gem::Package::TarReader::Entry] entry + # + # @example + # archive = CookbookUpload::Archive.new(tarball) + # archive.each do |entry| + # puts "#{entry.full_name} has the following content:\n#{entry.read}" + # end + # + def each + raise NoPath unless @source.respond_to?(:path) + + begin + Zlib::GzipReader.open(@source.path) do |gzip| + Gem::Package::TarReader.new(gzip) do |tar| + tar.each { |entry| yield entry } + end + end + rescue Zlib::GzipFile::Error => e + raise Error, e.message + end + end + end +end diff --git a/app/models/cookbook_upload/parameters.rb b/app/models/cookbook_upload/parameters.rb index 98e66d49a..202876104 100644 --- a/app/models/cookbook_upload/parameters.rb +++ b/app/models/cookbook_upload/parameters.rb @@ -1,8 +1,8 @@ require 'active_model/errors' +require 'cookbook_upload/archive' require 'cookbook_upload/metadata' require 'cookbook_upload/readme' require 'json' -require 'rubygems/package' require 'set' class CookbookUpload @@ -14,20 +14,10 @@ class Parameters attr_reader :tarball # - # Indicates that the uploaded tarball has no metdata.json entry + # @!attribute [r] archive + # @return [Archive] An interface to +tarball+ # - MissingMetadata = Class.new(RuntimeError) - - # - # Indicates that the uploaded tarball has no README entry - # - MissingReadme = Class.new(RuntimeError) - - # - # Indicates that the uploaded tarball may not be an upload, since it does - # not have a +path+ - # - TarballHasNoPath = Class.new(RuntimeError) + attr_reader :archive # # Creates a new set of cookbook upload parameters @@ -43,6 +33,7 @@ class Parameters def initialize(params) @cookbook_data = params.fetch(:cookbook) @tarball = params.fetch(:tarball) + @archive = Archive.new(@tarball) end # @@ -149,30 +140,20 @@ def parse_tarball_metadata(&block) errors = ActiveModel::Errors.new([]) begin - raise TarballHasNoPath unless tarball.respond_to?(:path) - - Zlib::GzipReader.open(tarball.path) do |gzip| - Gem::Package::TarReader.new(gzip) do |tar| - entry = tar.find do |e| - e.full_name.downcase.match(%r{^(\.\/)?[^\/]+\/metadata.json}) - end - - if entry - metadata = Metadata.new(JSON.parse(entry.read)) - else - raise MissingMetadata - end - end + path = archive.find(%r{^(\.\/)?[^\/]+\/metadata.json}).first + + if path + metadata = Metadata.new(JSON.parse(archive.read(path))) + else + errors.add(:base, I18n.t('api.error_messages.missing_metadata')) end rescue JSON::ParserError errors.add(:base, I18n.t('api.error_messages.metadata_not_json')) - rescue MissingMetadata - errors.add(:base, I18n.t('api.error_messages.missing_metadata')) rescue Virtus::CoercionError errors.add(:base, I18n.t('api.error_messages.invalid_metadata')) - rescue Zlib::GzipFile::Error + rescue Archive::Error errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) - rescue TarballHasNoPath + rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end @@ -187,34 +168,24 @@ def parse_tarball_metadata(&block) # @yieldparam metadata [Readme] the cookbook's README # def extract_tarball_readme(&block) + cookbook = metadata.name readme = nil - effective_cookbook_name = metadata.name.downcase errors = ActiveModel::Errors.new([]) begin - raise TarballHasNoPath unless tarball.respond_to?(:path) - - Zlib::GzipReader.open(tarball.path) do |gzip| - Gem::Package::TarReader.new(gzip) do |tar| - entry = tar.find do |e| - e.full_name.downcase.include?("#{effective_cookbook_name}/readme") - end - - if entry - extension = entry.header.name.split('.').last.strip - contents = entry.read - - readme = Readme.new(contents: contents, extension: extension) - else - raise MissingReadme - end - end + path = archive.find(%r{\A(\.\/)?#{cookbook}\/readme(\.\w+)?\Z}i).first + + if path + readme = Readme.new( + contents: archive.read(path), + extension: path.split('.').last.strip + ) + else + errors.add(:base, I18n.t('api.error_messages.missing_readme')) end - rescue MissingReadme - errors.add(:base, I18n.t('api.error_messages.missing_readme')) - rescue Zlib::GzipFile::Error + rescue Archive::Error errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) - rescue TarballHasNoPath + rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end From 9e8f5ace031ea2c00be7c636ac68fab3f97ea2ac Mon Sep 17 00:00:00 2001 From: Brian Cobb Date: Mon, 23 Jun 2014 16:59:23 -0500 Subject: [PATCH 2/3] Make metadata.json pattern more precise Namely: use \A instead of ^, include \Z in the pattern so that matches are exact, and escape the period preceeding the extension. --- app/models/cookbook_upload/parameters.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/cookbook_upload/parameters.rb b/app/models/cookbook_upload/parameters.rb index 202876104..3fde78473 100644 --- a/app/models/cookbook_upload/parameters.rb +++ b/app/models/cookbook_upload/parameters.rb @@ -140,7 +140,7 @@ def parse_tarball_metadata(&block) errors = ActiveModel::Errors.new([]) begin - path = archive.find(%r{^(\.\/)?[^\/]+\/metadata.json}).first + path = archive.find(%r{\A(\.\/)?[^\/]+\/metadata\.json\Z}).first if path metadata = Metadata.new(JSON.parse(archive.read(path))) From 21114b276a68dd677038479f1cfab56b24778235 Mon Sep 17 00:00:00 2001 From: Brian Cobb Date: Mon, 23 Jun 2014 17:23:01 -0500 Subject: [PATCH 3/3] Process archives in a backwards-compatible way For the time being, we'll use libarchive, along with a relaxed and less efficient metadata seeking process to process cookbook archives. Doing so allows the data migration to complete. This commit should revert cleanly once the backwards-compatible period is over. --- .travis.yml | 3 ++ Gemfile | 1 + Gemfile.lock | 8 ++++ app/models/cookbook_upload/archive.rb | 35 +++++++++++------- app/models/cookbook_upload/parameters.rb | 26 +++++++++---- config/locales/en.yml | 2 +- .../models/cookbook_upload/parameters_spec.rb | 20 ++++++++++ spec/models/cookbook_upload_spec.rb | 6 +-- .../not-actually-gzipped.tgz | Bin 2048 -> 0 bytes .../cookbook_fixtures/not-an-archive.tgz | 1 + 10 files changed, 76 insertions(+), 26 deletions(-) delete mode 100644 spec/support/cookbook_fixtures/not-actually-gzipped.tgz create mode 100644 spec/support/cookbook_fixtures/not-an-archive.tgz diff --git a/.travis.yml b/.travis.yml index 0fe2c46b0..eae1e7dee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,9 @@ notifications: on_start: false # default: false rvm: - 2.0.0 +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq libarchive-dev before_script: - psql -c 'create database supermarket_test;' -U postgres - psql supermarket_test -c 'create extension pg_trgm' -U postgres diff --git a/Gemfile b/Gemfile index fecc8eb13..bf5020991 100644 --- a/Gemfile +++ b/Gemfile @@ -34,6 +34,7 @@ gem 'semverse' gem 'sitemap_generator' gem 'redis-rails' gem 'yajl-ruby' +gem 'libarchive', github: 'bcobb/winebarrel-libarchive-ruby-clone', ref: 'format-security-fix' gem 'sentry-raven', '~> 0.8.0', require: false gem 'statsd-ruby', require: 'statsd' diff --git a/Gemfile.lock b/Gemfile.lock index 23491ec67..f40064f8f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,10 @@ +GIT + remote: git://github.com/bcobb/winebarrel-libarchive-ruby-clone.git + revision: a5a748c731415cb622282350dc92c82f8de77e5d + ref: format-security-fix + specs: + libarchive (0.1.2) + GIT remote: git://github.com/gofullstack/chef-legacy.git revision: 3a27451f99f06b93b178b10e660e0b65f4dbfa99 @@ -460,6 +467,7 @@ DEPENDENCIES jbuilder kaminari launchy + libarchive! license_finder magiconf mail_view diff --git a/app/models/cookbook_upload/archive.rb b/app/models/cookbook_upload/archive.rb index 35e024436..ee12b0e7a 100644 --- a/app/models/cookbook_upload/archive.rb +++ b/app/models/cookbook_upload/archive.rb @@ -1,4 +1,4 @@ -require 'rubygems/package' +require 'libarchive' class CookbookUpload # @@ -39,10 +39,10 @@ def initialize(source) def find(pattern) matches = [] - each do |entry| - next unless entry.full_name.match(pattern) + each do |entry, _| + next unless entry.pathname.match(pattern) - matches << entry.full_name + matches << entry.pathname end matches @@ -59,10 +59,10 @@ def find(pattern) def read(path) match = nil - each do |entry| - next unless entry.full_name == path + each do |entry, archive| + next unless entry.pathname == path - match = entry.read + match = archive.read_data break end @@ -78,24 +78,31 @@ def read(path) # @raise [NoPath] if the source has no path # @raise [Error] if the source is not a compatible archive # - # @yieldparam [::Gem::Package::TarReader::Entry] entry + # @yieldparam [::Archive::Entry] entry + # @yieldparam [::Archive::Reader] archive # # @example # archive = CookbookUpload::Archive.new(tarball) - # archive.each do |entry| - # puts "#{entry.full_name} has the following content:\n#{entry.read}" + # archive.each do |entry, archive| + # puts "#{entry.pathname} has the following content:\n#{archive.read_data}" # end # def each raise NoPath unless @source.respond_to?(:path) begin - Zlib::GzipReader.open(@source.path) do |gzip| - Gem::Package::TarReader.new(gzip) do |tar| - tar.each { |entry| yield entry } + ::Archive.read_open_filename(@source.path) do |archive| + loop do + entry = archive.next_header + + if entry + yield entry, archive + else + break + end end end - rescue Zlib::GzipFile::Error => e + rescue ::Archive::Error => e raise Error, e.message end end diff --git a/app/models/cookbook_upload/parameters.rb b/app/models/cookbook_upload/parameters.rb index 3fde78473..d986231f7 100644 --- a/app/models/cookbook_upload/parameters.rb +++ b/app/models/cookbook_upload/parameters.rb @@ -140,19 +140,29 @@ def parse_tarball_metadata(&block) errors = ActiveModel::Errors.new([]) begin - path = archive.find(%r{\A(\.\/)?[^\/]+\/metadata\.json\Z}).first + paths = archive.find(%r{\A(\.\/)?[^\/]+\/metadata\.json}) - if path - metadata = Metadata.new(JSON.parse(archive.read(path))) + json, non_json = paths.partition do |path| + begin + JSON.parse(archive.read(path)) + rescue JSON::ParserError + false + end + end + + if json.empty? + if non_json.any? + errors.add(:base, I18n.t('api.error_messages.metadata_not_json')) + else + errors.add(:base, I18n.t('api.error_messages.missing_metadata')) + end else - errors.add(:base, I18n.t('api.error_messages.missing_metadata')) + metadata = Metadata.new(JSON.parse(archive.read(json.first))) end - rescue JSON::ParserError - errors.add(:base, I18n.t('api.error_messages.metadata_not_json')) rescue Virtus::CoercionError errors.add(:base, I18n.t('api.error_messages.invalid_metadata')) rescue Archive::Error - errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) + errors.add(:base, I18n.t('api.error_messages.tarball_not_archive')) rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end @@ -184,7 +194,7 @@ def extract_tarball_readme(&block) errors.add(:base, I18n.t('api.error_messages.missing_readme')) end rescue Archive::Error - errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) + errors.add(:base, I18n.t('api.error_messages.tarball_not_archive')) rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end diff --git a/config/locales/en.yml b/config/locales/en.yml index 0605b8768..ddea09359 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -40,7 +40,7 @@ en: missing_tarball: "Mulipart POST must include a part named 'tarball'" tarball_has_no_path: "Multipart POST part 'tarball' must be a file" non_existent_category: "Category '%{category_name}' does not exist" - tarball_not_gzipped: "Multipart POST part 'tarball' must be GZipped" + tarball_not_archive: "Multipart POST part 'tarball' must be a file archive" missing_metadata: "Multipart POST part 'tarball' must contain a metadata.json entry" missing_readme: "Multipart POST part 'tarball' must contain a README" metadata_not_json: "Tarball entry named 'metadata.json' could not be parsed as JSON" diff --git a/spec/models/cookbook_upload/parameters_spec.rb b/spec/models/cookbook_upload/parameters_spec.rb index e2bd3f4ad..942c0f5f9 100644 --- a/spec/models/cookbook_upload/parameters_spec.rb +++ b/spec/models/cookbook_upload/parameters_spec.rb @@ -60,6 +60,26 @@ def params(hash) expect(params.metadata.name).to eql('multiple') end + it 'is not extracted from metadata.json~' do + tarball = Tempfile.new('weird-metadata', 'tmp').tap do |file| + io = AndFeathers.build('weird-metadata') do |base| + base.file('metadata.json') do + JSON.dump(name: 'correct') + end + base.file('metadata.json~') do + 'this is not json' + end + end.to_io(AndFeathers::GzippedTarball, :reverse_each) + + file.write(io.read) + file.rewind + end + + params = params(cookbook: '{}', tarball: tarball) + + expect(params.metadata.name).to eql('correct') + end + it 'is blank if the tarball parameter is not a file' do params = params(cookbook: '{}', tarball: 'tarball!') diff --git a/spec/models/cookbook_upload_spec.rb b/spec/models/cookbook_upload_spec.rb index 82bdcc283..e03d2f33f 100644 --- a/spec/models/cookbook_upload_spec.rb +++ b/spec/models/cookbook_upload_spec.rb @@ -99,14 +99,14 @@ to include(I18n.t('api.error_messages.tarball_has_no_path')) end - it 'yields an error if the tarball is not GZipped' do - tarball = File.open('spec/support/cookbook_fixtures/not-actually-gzipped.tgz') + it 'yields an error if the tarball is not an archive' do + tarball = File.open('spec/support/cookbook_fixtures/not-an-archive.tgz') upload = CookbookUpload.new(user, cookbook: '{}', tarball: tarball) errors = upload.finish { |e, _| e } expect(errors.full_messages). - to include(I18n.t('api.error_messages.tarball_not_gzipped')) + to include(I18n.t('api.error_messages.tarball_not_archive')) end it 'yields an error if the tarball has no metadata.json entry' do diff --git a/spec/support/cookbook_fixtures/not-actually-gzipped.tgz b/spec/support/cookbook_fixtures/not-actually-gzipped.tgz deleted file mode 100644 index 164ea5b66ed7fe5dd575410f5e0eb07a3ca55ad6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2048 zcmeHE%MODe5ad(#2W(ei;d{VG)5NN=c&J}r#2yJD(Lgz=^ZXAum#=0A JvI9Tvz#BEbI=TP= diff --git a/spec/support/cookbook_fixtures/not-an-archive.tgz b/spec/support/cookbook_fixtures/not-an-archive.tgz new file mode 100644 index 000000000..e965047ad --- /dev/null +++ b/spec/support/cookbook_fixtures/not-an-archive.tgz @@ -0,0 +1 @@ +Hello