diff --git a/.travis.yml b/.travis.yml index 0fe2c46b0..eae1e7dee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,9 @@ notifications: on_start: false # default: false rvm: - 2.0.0 +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq libarchive-dev before_script: - psql -c 'create database supermarket_test;' -U postgres - psql supermarket_test -c 'create extension pg_trgm' -U postgres diff --git a/Gemfile b/Gemfile index fecc8eb13..bf5020991 100644 --- a/Gemfile +++ b/Gemfile @@ -34,6 +34,7 @@ gem 'semverse' gem 'sitemap_generator' gem 'redis-rails' gem 'yajl-ruby' +gem 'libarchive', github: 'bcobb/winebarrel-libarchive-ruby-clone', ref: 'format-security-fix' gem 'sentry-raven', '~> 0.8.0', require: false gem 'statsd-ruby', require: 'statsd' diff --git a/Gemfile.lock b/Gemfile.lock index 23491ec67..f40064f8f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,10 @@ +GIT + remote: git://github.com/bcobb/winebarrel-libarchive-ruby-clone.git + revision: a5a748c731415cb622282350dc92c82f8de77e5d + ref: format-security-fix + specs: + libarchive (0.1.2) + GIT remote: git://github.com/gofullstack/chef-legacy.git revision: 3a27451f99f06b93b178b10e660e0b65f4dbfa99 @@ -460,6 +467,7 @@ DEPENDENCIES jbuilder kaminari launchy + libarchive! license_finder magiconf mail_view diff --git a/app/models/cookbook_upload/archive.rb b/app/models/cookbook_upload/archive.rb new file mode 100644 index 000000000..ee12b0e7a --- /dev/null +++ b/app/models/cookbook_upload/archive.rb @@ -0,0 +1,110 @@ +require 'libarchive' + +class CookbookUpload + # + # This class provides access to cookbook archives. In particular, it supports + # finding entries in the archive which have a path matching some regular + # expression, and it can read the contents of files given their path. These + # are the two primary operations required by Supermarket to locate and + # process metadata and README files. + # + class Archive + # + # Indicates that the uploaded archive may not be an upload, since it does + # not have a +path+ + # + NoPath = Class.new(RuntimeError) + + # + # Indicates that the source could not be processed + # + Error = Class.new(RuntimeError) + + # + # Creates a new Archive + # + # @param source [File] the source archive + # + def initialize(source) + @source = source + end + + # + # Returns the paths of entries in the archive which match the given regular expression + # + # @param pattern [Regexp] + # + # @return [Array] matching paths + # + def find(pattern) + matches = [] + + each do |entry, _| + next unless entry.pathname.match(pattern) + + matches << entry.pathname + end + + matches + end + + # + # Reads the contents of the file at the given path + # + # @param path [String] + # + # @return [String] the contents if a file exists at the given path + # @return [nil] if no such file exists + # + def read(path) + match = nil + + each do |entry, archive| + next unless entry.pathname == path + + match = archive.read_data + + break + end + + match + end + + private + + # + # Iterates through each entry in the source archive + # + # @raise [NoPath] if the source has no path + # @raise [Error] if the source is not a compatible archive + # + # @yieldparam [::Archive::Entry] entry + # @yieldparam [::Archive::Reader] archive + # + # @example + # archive = CookbookUpload::Archive.new(tarball) + # archive.each do |entry, archive| + # puts "#{entry.pathname} has the following content:\n#{archive.read_data}" + # end + # + def each + raise NoPath unless @source.respond_to?(:path) + + begin + ::Archive.read_open_filename(@source.path) do |archive| + loop do + entry = archive.next_header + + if entry + yield entry, archive + else + break + end + end + end + rescue ::Archive::Error => e + raise Error, e.message + end + end + end +end diff --git a/app/models/cookbook_upload/parameters.rb b/app/models/cookbook_upload/parameters.rb index 98e66d49a..d986231f7 100644 --- a/app/models/cookbook_upload/parameters.rb +++ b/app/models/cookbook_upload/parameters.rb @@ -1,8 +1,8 @@ require 'active_model/errors' +require 'cookbook_upload/archive' require 'cookbook_upload/metadata' require 'cookbook_upload/readme' require 'json' -require 'rubygems/package' require 'set' class CookbookUpload @@ -14,20 +14,10 @@ class Parameters attr_reader :tarball # - # Indicates that the uploaded tarball has no metdata.json entry + # @!attribute [r] archive + # @return [Archive] An interface to +tarball+ # - MissingMetadata = Class.new(RuntimeError) - - # - # Indicates that the uploaded tarball has no README entry - # - MissingReadme = Class.new(RuntimeError) - - # - # Indicates that the uploaded tarball may not be an upload, since it does - # not have a +path+ - # - TarballHasNoPath = Class.new(RuntimeError) + attr_reader :archive # # Creates a new set of cookbook upload parameters @@ -43,6 +33,7 @@ class Parameters def initialize(params) @cookbook_data = params.fetch(:cookbook) @tarball = params.fetch(:tarball) + @archive = Archive.new(@tarball) end # @@ -149,30 +140,30 @@ def parse_tarball_metadata(&block) errors = ActiveModel::Errors.new([]) begin - raise TarballHasNoPath unless tarball.respond_to?(:path) - - Zlib::GzipReader.open(tarball.path) do |gzip| - Gem::Package::TarReader.new(gzip) do |tar| - entry = tar.find do |e| - e.full_name.downcase.match(%r{^(\.\/)?[^\/]+\/metadata.json}) - end - - if entry - metadata = Metadata.new(JSON.parse(entry.read)) - else - raise MissingMetadata - end + paths = archive.find(%r{\A(\.\/)?[^\/]+\/metadata\.json}) + + json, non_json = paths.partition do |path| + begin + JSON.parse(archive.read(path)) + rescue JSON::ParserError + false end end - rescue JSON::ParserError - errors.add(:base, I18n.t('api.error_messages.metadata_not_json')) - rescue MissingMetadata - errors.add(:base, I18n.t('api.error_messages.missing_metadata')) + + if json.empty? + if non_json.any? + errors.add(:base, I18n.t('api.error_messages.metadata_not_json')) + else + errors.add(:base, I18n.t('api.error_messages.missing_metadata')) + end + else + metadata = Metadata.new(JSON.parse(archive.read(json.first))) + end rescue Virtus::CoercionError errors.add(:base, I18n.t('api.error_messages.invalid_metadata')) - rescue Zlib::GzipFile::Error - errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) - rescue TarballHasNoPath + rescue Archive::Error + errors.add(:base, I18n.t('api.error_messages.tarball_not_archive')) + rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end @@ -187,34 +178,24 @@ def parse_tarball_metadata(&block) # @yieldparam metadata [Readme] the cookbook's README # def extract_tarball_readme(&block) + cookbook = metadata.name readme = nil - effective_cookbook_name = metadata.name.downcase errors = ActiveModel::Errors.new([]) begin - raise TarballHasNoPath unless tarball.respond_to?(:path) - - Zlib::GzipReader.open(tarball.path) do |gzip| - Gem::Package::TarReader.new(gzip) do |tar| - entry = tar.find do |e| - e.full_name.downcase.include?("#{effective_cookbook_name}/readme") - end - - if entry - extension = entry.header.name.split('.').last.strip - contents = entry.read - - readme = Readme.new(contents: contents, extension: extension) - else - raise MissingReadme - end - end + path = archive.find(%r{\A(\.\/)?#{cookbook}\/readme(\.\w+)?\Z}i).first + + if path + readme = Readme.new( + contents: archive.read(path), + extension: path.split('.').last.strip + ) + else + errors.add(:base, I18n.t('api.error_messages.missing_readme')) end - rescue MissingReadme - errors.add(:base, I18n.t('api.error_messages.missing_readme')) - rescue Zlib::GzipFile::Error - errors.add(:base, I18n.t('api.error_messages.tarball_not_gzipped')) - rescue TarballHasNoPath + rescue Archive::Error + errors.add(:base, I18n.t('api.error_messages.tarball_not_archive')) + rescue Archive::NoPath errors.add(:base, I18n.t('api.error_messages.tarball_has_no_path')) end diff --git a/config/locales/en.yml b/config/locales/en.yml index 0605b8768..ddea09359 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -40,7 +40,7 @@ en: missing_tarball: "Mulipart POST must include a part named 'tarball'" tarball_has_no_path: "Multipart POST part 'tarball' must be a file" non_existent_category: "Category '%{category_name}' does not exist" - tarball_not_gzipped: "Multipart POST part 'tarball' must be GZipped" + tarball_not_archive: "Multipart POST part 'tarball' must be a file archive" missing_metadata: "Multipart POST part 'tarball' must contain a metadata.json entry" missing_readme: "Multipart POST part 'tarball' must contain a README" metadata_not_json: "Tarball entry named 'metadata.json' could not be parsed as JSON" diff --git a/spec/models/cookbook_upload/parameters_spec.rb b/spec/models/cookbook_upload/parameters_spec.rb index e2bd3f4ad..942c0f5f9 100644 --- a/spec/models/cookbook_upload/parameters_spec.rb +++ b/spec/models/cookbook_upload/parameters_spec.rb @@ -60,6 +60,26 @@ def params(hash) expect(params.metadata.name).to eql('multiple') end + it 'is not extracted from metadata.json~' do + tarball = Tempfile.new('weird-metadata', 'tmp').tap do |file| + io = AndFeathers.build('weird-metadata') do |base| + base.file('metadata.json') do + JSON.dump(name: 'correct') + end + base.file('metadata.json~') do + 'this is not json' + end + end.to_io(AndFeathers::GzippedTarball, :reverse_each) + + file.write(io.read) + file.rewind + end + + params = params(cookbook: '{}', tarball: tarball) + + expect(params.metadata.name).to eql('correct') + end + it 'is blank if the tarball parameter is not a file' do params = params(cookbook: '{}', tarball: 'tarball!') diff --git a/spec/models/cookbook_upload_spec.rb b/spec/models/cookbook_upload_spec.rb index 82bdcc283..e03d2f33f 100644 --- a/spec/models/cookbook_upload_spec.rb +++ b/spec/models/cookbook_upload_spec.rb @@ -99,14 +99,14 @@ to include(I18n.t('api.error_messages.tarball_has_no_path')) end - it 'yields an error if the tarball is not GZipped' do - tarball = File.open('spec/support/cookbook_fixtures/not-actually-gzipped.tgz') + it 'yields an error if the tarball is not an archive' do + tarball = File.open('spec/support/cookbook_fixtures/not-an-archive.tgz') upload = CookbookUpload.new(user, cookbook: '{}', tarball: tarball) errors = upload.finish { |e, _| e } expect(errors.full_messages). - to include(I18n.t('api.error_messages.tarball_not_gzipped')) + to include(I18n.t('api.error_messages.tarball_not_archive')) end it 'yields an error if the tarball has no metadata.json entry' do diff --git a/spec/support/cookbook_fixtures/not-actually-gzipped.tgz b/spec/support/cookbook_fixtures/not-actually-gzipped.tgz deleted file mode 100644 index 164ea5b66..000000000 Binary files a/spec/support/cookbook_fixtures/not-actually-gzipped.tgz and /dev/null differ diff --git a/spec/support/cookbook_fixtures/not-an-archive.tgz b/spec/support/cookbook_fixtures/not-an-archive.tgz new file mode 100644 index 000000000..e965047ad --- /dev/null +++ b/spec/support/cookbook_fixtures/not-an-archive.tgz @@ -0,0 +1 @@ +Hello