Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.

### OGG
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `invalid_with_garbage_at_the_end.ogg` have been generated by the project contributors

### PDF
- PDF 2.0 files downloaded from the [PDF Association public Github repository](https://github.com/pdf-association/pdf20examples). These files are licensed under the Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.
Expand All @@ -236,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
### WAV
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
- c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)

### WEBP
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
Expand Down
14 changes: 14 additions & 0 deletions lib/format_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ module FormatParser
# The value will ensure the parser having it will be applied to the file last.
LEAST_PRIORITY = 99

@registered_natures = []
@registered_formats = []

# Register a parser object to be used to perform file format detection. Each parser FormatParser
# provides out of the box registers itself using this method.
#
Expand Down Expand Up @@ -68,9 +71,20 @@ def self.register_parser(callable_parser, formats:, natures:, priority: LEAST_PR
end
@parser_priorities ||= {}
@parser_priorities[callable_parser] = priority

@registered_natures |= parser_provided_natures
@registered_formats |= parser_provided_formats
end
end

def self.registered_natures
@registered_natures
end

def self.registered_formats
@registered_formats
end

# Deregister a parser object (makes FormatParser forget this parser existed). Is mostly used in
# tests, but can also be used to forcibly disable some formats completely.
#
Expand Down
2 changes: 1 addition & 1 deletion lib/format_parser/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module FormatParser
VERSION = '2.7.0'
VERSION = '2.7.1'
end
7 changes: 6 additions & 1 deletion lib/parsers/mp3_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def call(raw_io)
io.seek(0)
return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))

# Prevention against parsing WAV files.
io.seek(0)
wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'

# Read all the ID3 tags (or at least attempt to)
io.seek(0)
id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
Expand Down Expand Up @@ -315,5 +320,5 @@ def with_id3tag_local_configs
end
end

FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101
Comment thread
CostinTanasoiu marked this conversation as resolved.
end
File renamed without changes.
Binary file added spec/fixtures/WAV/c_SCAM_MIC_SOL001_RUN001.wav
Binary file not shown.
32 changes: 30 additions & 2 deletions spec/format_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@
end
end

it "fixtures with 'invalid' in the filename should fail to parse" do
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
file_name = File.basename(fixture_path)
next unless file_name.include? "invalid"
File.open(fixture_path, 'rb') do |file|
FormatParser.parse(file)
end
end
end

it "fixtures without 'invalid' in the filename should be parsed successfully" do
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
file_name = File.basename(fixture_path)
next if file_name.include? "invalid"
File.open(fixture_path, 'rb') do |file|
FormatParser.parse(file)
end
end
end

it 'triggers parsers in a certain order that corresponds to the parser priorities' do
file_contents = StringIO.new('a' * 4096)

Expand Down Expand Up @@ -189,12 +209,20 @@
'FormatParser::CR3Parser',
'FormatParser::DPXParser',
'FormatParser::FLACParser',
'FormatParser::MP3Parser',
'FormatParser::OggParser',
'FormatParser::TIFFParser',
'FormatParser::WAVParser'
'FormatParser::WAVParser',
'FormatParser::MP3Parser'
])
end

it 'ensures that MP3 parser is the last one among all' do
natures = FormatParser.registered_natures
formats = FormatParser.registered_formats
prioritised_parsers = FormatParser.parsers_for(natures, formats)
parser_class_names = prioritised_parsers.map { |parser| parser.class.name }
expect(parser_class_names.last).to eq 'FormatParser::MP3Parser'
end
end

describe '.register_parser and .deregister_parser' do
Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/flac_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
end

it 'raises an error when sample rate is 0' do
fpath = fixtures_dir + 'FLAC/sample_rate_0.flac'
fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac'

expect {
subject.call(File.open(fpath, 'rb'))
Expand Down
4 changes: 2 additions & 2 deletions spec/parsers/json_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ def file_size(file_name)

describe 'When reading objects invalid JSON files' do
it "rejects files with corrupted JSON data" do
io = load_file 'malformed.json'
io = load_file 'invalid_malformed.json'

parsed = subject.call(io)

expect(parsed).to be_nil
end

it "rejects invalid files early without reading the whole content" do
io = load_file 'lorem_ipsum.json'
io = load_file 'invalid_lorem_ipsum.json'

parsed = subject.call(io)

Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/m3u_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
end

describe 'an m3u file with missing header' do
let(:m3u_file) { 'plain_text.m3u' }
let(:m3u_file) { 'invalid_plain_text.m3u' }

it 'does not parse the file successfully' do
expect(parsed_m3u).to be_nil
Expand Down
6 changes: 6 additions & 0 deletions spec/parsers/mp3_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
expect(parsed).to be_nil
end

it 'does not misdetect a WAV' do
fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav'
parsed = subject.call(File.open(fpath, 'rb'))
expect(parsed).to be_nil
end

describe 'title/artist/album attributes' do
let(:parsed) { subject.call(File.open(fpath, 'rb')) }

Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/ogg_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
end

it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_garbage_at_the_end.ogg', 'rb'))
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/invalid_with_garbage_at_the_end.ogg', 'rb'))
expect(parse_result).to be_nil
end

Expand Down
6 changes: 3 additions & 3 deletions spec/parsers/pdf_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ def parse_pdf(pdf_filename)

describe 'broken PDF files should not parse' do
it 'PDF with missing version header' do
parsed_pdf = parse_pdf 'not_a.pdf'
parsed_pdf = parse_pdf 'invalid_not_a.pdf'
expect(parsed_pdf).to be_nil
end

it 'PDF 2.0 with offset start' do
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf'
expect(parsed_pdf).to be_nil
end

it 'exceeds the PDF read limit' do
parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf'
parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf'
expect(parsed_pdf).to be_nil
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/wav_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
expect {
subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
}.to raise_error(FormatParser::IOUtils::InvalidRead)
end
end
2 changes: 1 addition & 1 deletion spec/parsers/webp_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
end

it 'does not parse files with an unrecognised variant' do
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb'))
expect(result).to be_nil
end

Expand Down
37 changes: 37 additions & 0 deletions spec/remote_fetching_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,43 @@
expect(file_information.format).to eq(:png)
end

describe 'correctly parses WAV files without falling back to another filetype' do
['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename|
it "parses WAV file #{filename}" do
remote_url = 'http://localhost:9399/WAV/' + filename
file_information = FormatParser.parse_http(remote_url)
expect(file_information).not_to be_nil
expect(file_information.format).to eq(:wav)
end
end
end

describe "correctly parses files over HTTP without filename hint" do
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
file_name = File.basename(fixture_path)
next if file_name.include? "invalid"

file_type_dir = fixture_path.delete_prefix(fixtures_dir).delete_suffix(file_name)
file_type_dir.delete_prefix!('/').delete_suffix!('/')
next if file_type_dir.empty?

# skipping this one because it's a special case
next if file_name == "arch_many_entries.zip"

it "parses #{file_type_dir} file: #{file_name}" do
url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20')
result_with_hint = FormatParser.parse_http(url, filename_hint: file_name)
result_no_hint = FormatParser.parse_http(url)

expect(result_with_hint).not_to be_nil
expect(result_no_hint).not_to be_nil

expect(result_no_hint.nature).to eq(result_with_hint.nature)
expect(result_no_hint.format).to eq(result_with_hint.format)
end
end
end

describe 'when parsing remote fixtures' do
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
filename = File.basename(fixture_path)
Expand Down