From e07f243849974a17e33cda0e40c949637a3406c5 Mon Sep 17 00:00:00 2001 From: Sylvester Keil Date: Tue, 29 May 2018 17:46:10 +0200 Subject: [PATCH] Add name repeater normalizer Closes #98 --- lib/anystyle/normalizer/names.rb | 27 ++++++++++++++++++-------- spec/anystyle/normalizer/names_spec.rb | 15 ++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 spec/anystyle/normalizer/names_spec.rb diff --git a/lib/anystyle/normalizer/names.rb b/lib/anystyle/normalizer/names.rb index 6aa2bef..b8910a8 100644 --- a/lib/anystyle/normalizer/names.rb +++ b/lib/anystyle/normalizer/names.rb @@ -9,8 +9,8 @@ class Names < Normalizer attr_accessor :namae - def initialize(**options) - super(**options) + def initialize(**opts) + super(**opts) @namae = Namae::Parser.new({ prefer_comma_as_separator: true, @@ -20,16 +20,26 @@ def initialize(**options) }) end - def normalize(item, **opts) - map_values(item) do |_, value| - begin - parse(strip(value)) - rescue - [{ literal: value }] + def normalize(item, prev: [], **opts) + map_values(item) do |key, value| + value.gsub!(/(^[\(\[]|[,;:\)\]]+$)/, '') + case + when repeater?(value) && prev.length > 0 + prev[-1][key][0] || prev[1][:author][0] + else + begin + parse(strip(value)) + rescue + [{ literal: value }] + end end end end + def repeater?(value) + value =~ /^[\p{P}\s]+$/ + end + def strip(value) value .gsub(/^[Ii]n:?\s+/, '') @@ -43,6 +53,7 @@ def strip(value) .gsub(/\b([Pp]rod(\.|uce[rd]))(\s+by)?\s+/, '') .gsub(/\b([Pp]erf(\.|orme[rd]))(\s+by)?\s+/, '') .gsub(/\([^\)]*\)/, '') + .gsub(/^\p{P}+\s/, '') .gsub(/[;:]/, ',') .strip end diff --git a/spec/anystyle/normalizer/names_spec.rb b/spec/anystyle/normalizer/names_spec.rb new file mode 100644 index 0000000..dadd576 --- /dev/null +++ b/spec/anystyle/normalizer/names_spec.rb @@ -0,0 +1,15 @@ +module AnyStyle + describe Normalizer::Names do + let(:n) { Normalizer::Names.new } + + let(:item) {{ + author: [names(:derrida)] + }} + + it "resolves repeaters" do + expect( + n.normalize({ author: ['-----.,'] }, prev: [n.normalize(item)])[:author][0] + ).to include(family: 'Derrida') + end + end +end