diff --git a/lib/anystyle/normalizer/names.rb b/lib/anystyle/normalizer/names.rb index a73e1c7..3843de4 100644 --- a/lib/anystyle/normalizer/names.rb +++ b/lib/anystyle/normalizer/names.rb @@ -43,7 +43,7 @@ def repeater?(value) def strip(value) value .gsub(/^[Ii]n:?\s+/, '') - .gsub(/\b[EÉeé]d(s?\.|itors?|ited|iteurs?|ité)(\s+(by|par)\s+|\b|$)?/, '') + .gsub(/\b[EÉeé]d(s?\.|itors?\.?|ited|iteurs?|ité)(\s+(by|par)\s+|\b|$)/, '') .gsub(/\b([Hh](rsg|gg?)\.|Herausgeber)\s+/, '') .gsub(/\b[Hh]erausgegeben von\s+/, '') .gsub(/\b((d|ein)er )?[Üü]ber(s\.|setzt|setzung|tragen|tragung) v(\.|on)\s+/, '') @@ -52,14 +52,21 @@ def strip(value) .gsub(/\b([Dd]ir(\.|ected))(\s+by)?\s+/, '') .gsub(/\b([Pp]rod(\.|uce[rd]))(\s+by)?\s+/, '') .gsub(/\b([Pp]erf(\.|orme[rd]))(\s+by)?\s+/, '') + .gsub(/\*/, '') .gsub(/\([^\)]*\)?/, '') + .gsub(/\[[^\]]*\)?/, '') .gsub(/[;:]/, ',') - .strip + .gsub(/^\p{^L}+|\s+\p{^L}+$/, '') + .gsub(/[\s,]+$/, '') + .gsub(/,{2,}/, ',') + .gsub(/\s+\./, '.') end def parse(value) + raise ArgumentError if value.empty? + others = value.sub!( - /(\bet\s+(al|coll)\b|\bu\.\s*a\.|(\band|\&)\s+others).*$/, '' + /(,\s+)?((\&\s+)?\bet\s+(al|coll)\b|\bu\.\s*a\b|(\band|\&)\s+others).*$/, '' ) || value.sub!(/\.\.\.|…/, '') # Add surname/initial punctuation separator for Vancouver-style names diff --git a/spec/anystyle/normalizer/names_spec.rb b/spec/anystyle/normalizer/names_spec.rb index dac24d1..4bac68f 100644 --- a/spec/anystyle/normalizer/names_spec.rb +++ b/spec/anystyle/normalizer/names_spec.rb @@ -80,14 +80,14 @@ def n(author, **opts) { family: 'Kelly', given: 'J.W.' } ]], ['Bouchard J-P.', [{ family: 'Bouchard', given: 'J.-P.' }]], - ['Edgar A. Poe; Herman Melville', [poe, melville]], + ['- Edgar A. Poe; Herman Melville', [poe, melville]], ['Poe, Edgar A., Melville, Herman', [poe, melville]], ['Aeschlimann Magnin, E.', [{ family: 'Aeschlimann Magnin', given: 'E.' }]], ['Yang, Q., Mudambi, R., & Meyer, K. E.', [ { family: 'Yang', given: 'Q.' }, { family: 'Mudambi', given: 'R.' }, { family: 'Meyer', given: 'K.E.' } - ]] + ]], ].each do |input, output| expect(n(input)).to eq(output) end @@ -99,6 +99,7 @@ def n(author, **opts) expect(n('In: D. Knuth (ed.)')).to eq([knuth]) expect(n('in: D. Knuth ed.')).to eq([knuth]) expect(n('in D. Knuth (ed)')).to eq([knuth]) + expect(n('In D. Knuth, editor')).to eq([knuth]) end it "does not strip 'ed' etc. from names" do @@ -126,16 +127,20 @@ def n(author, **opts) it "strips and resolves 'et al' / others" do expect(n('J Doe et al')).to eq([doe, others]) - expect(n('J Doe et al.')).to eq([doe, others]) + expect(n('Doe, J., et al.')).to eq([doe, others]) expect(n('J Doe u.a.')).to eq([doe, others]) expect(n('J Doe u. a.')).to eq([doe, others]) expect(n('J Doe and others')).to eq([doe, others]) expect(n('J Doe & others')).to eq([doe, others]) expect(n('J Doe et coll.')).to eq([doe, others]) - expect(n('J Doe ...')).to eq([doe, others]) end end + it "#strip" do + expect(subject.strip('Piveteau, J. (ed.).')).to eq('Piveteau, J.') + expect(subject.strip('In D. Knuth, editor.')).to eq('D. Knuth') + end + describe "Parsed Core Data" do before :all do @data = resource('parser/core.xml') @@ -148,8 +153,8 @@ def n(author, **opts) let(:lit) { @data.select { |name| !name[:literal].nil? } } let(:nam) { @data.select { |name| name[:literal].nil? } } - it "accepts more than 95% of names" do - expect(nam.length.to_f / @data.length).to be > 0.95 + it "accepts more than 98% of names" do + expect(nam.length.to_f / @data.length).to be > 0.98 end end end