Skip to content

Commit

Permalink
Pass prev context to normalizers
Browse files Browse the repository at this point in the history
  • Loading branch information
inukshuk committed May 29, 2018
1 parent 411c60c commit e268f50
Show file tree
Hide file tree
Showing 18 changed files with 24 additions and 42 deletions.
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer.rb
Expand Up @@ -18,7 +18,7 @@ def name
self.class.name
end

def normalize(item)
def normalize(item, **opts)
raise NotImplementedError
end

Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/brackets.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Brackets < Normalizer
@keys = [:'citation-number', :note]

def normalize(item)
def normalize(item, **opts)
each_value(item) do |_, value|
value.gsub!(/^[\(\[\{]|[\]\)\}]$/, '')
end
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/container.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Container < Normalizer
@keys = [:'container-title']

def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
value.gsub(/^[Ii]n:?\s+/, '')
end
Expand Down
4 changes: 2 additions & 2 deletions lib/anystyle/normalizer/date.rb
Expand Up @@ -3,15 +3,15 @@ class Normalizer
class Date < Normalizer
@keys = [:date]

# TODO return seasons/ intervals in ISO/EDTF format
def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
case
when unknown?(value)
'XXXX'
when interval?(value)
value
# TODO AD/BC
# TODO Seasons
when iso?(value)
value
else
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/edition.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Edition < Normalizer
@keys = [:edition]

def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
value
.gsub(/rev\./, 'revised')
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/journal.rb
@@ -1,7 +1,7 @@
module AnyStyle
class Normalizer
class Journal < Normalizer
def normalize(item)
def normalize(item, **opts)
if item.key?(:journal)
item[:type] = 'article-journal'
item[:journal].each { |journal| append item, :'container-title', journal }
Expand Down
3 changes: 2 additions & 1 deletion lib/anystyle/normalizer/locale.rb
Expand Up @@ -7,14 +7,15 @@ def initialize
@ld = LanguageDetector.new if defined?(LanguageDetector)
end

def normalize(item)
def normalize(item, **opts)
return item if @ld.nil? || item.key?(:language)

sample = item.values_at(
:title,
:'container-title',
# :'collection-title',
:location,
:journal,
:publisher
# :note
).flatten.compact.join(' ')
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/location.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Location < Normalizer
@keys = [:location]

def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
location = strip value

Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/locator.rb
Expand Up @@ -5,7 +5,7 @@ class Normalizer
class Locator < Normalizer
@keys = [:isbn, :url]

def normalize(item)
def normalize(item, **opts)
map_values(item) do |key, value|
case key
when :isbn
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/names.rb
Expand Up @@ -20,7 +20,7 @@ def initialize(**options)
})
end

def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
begin
parse(strip(value))
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/page.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Page < Normalizer
@keys = [:pages]

def normalize(item)
def normalize(item, **opts)
map_values(item) do |_, value|
pages = case value
when /(\d+)(?:\.(\d+))?(?:\((\d{4})\))?:(\d.*)/
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/publisher.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class Publisher < Normalizer
@keys = [:publisher]

def normalize(item)
def normalize(item, **opts)
replace_author(item) if item.key?(:author)
item
end
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/pubmed.rb
Expand Up @@ -3,7 +3,7 @@ class Normalizer
class PubMed < Normalizer
@keys = [:note]

def normalize(item)
def normalize(item, **opts)
each_value(item) do |_, value|
if (value =~ /PMID:?\s*(\d+)/)
append :pmid, $1
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/punctuation.rb
Expand Up @@ -11,7 +11,7 @@ class Punctuation < Normalizer
:location
]

def normalize(item)
def normalize(item, **opts)
each_value(item) do |_, value|
value.gsub!(/[\)\]\.,:;\p{Pd}\p{Z}\p{C}]+$/, '')
value.gsub!(/^[\(\[]/, '')
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/quotes.rb
Expand Up @@ -4,7 +4,7 @@ class Quotes < Normalizer
QUOTES = /^[«‹»›„‚“‟‘‛”’"❛❜❟❝❞⹂〝〞〟\[]|[«‹»›„‚“‟‘‛”’"❛❜❟❝❞⹂〝〞〟\]]$/
@keys = [:title, :'citation-number', :medium]

def normalize(item)
def normalize(item, **opts)
each_value(item) do |_, value|
value.gsub! QUOTES, ''
end
Expand Down
2 changes: 1 addition & 1 deletion lib/anystyle/normalizer/type.rb
@@ -1,7 +1,7 @@
module AnyStyle
class Normalizer
class Type < Normalizer
def normalize(item)
def normalize(item, **opts)
item[:type] = classify item unless item.key?(:type)
item
end
Expand Down
23 changes: 1 addition & 22 deletions lib/anystyle/normalizer/volume.rb
Expand Up @@ -3,28 +3,7 @@ class Normalizer
class Volume < Normalizer
@keys = [:volume, :pages, :date]

def normalize(item)
# TODO
#if !hash.has_key?(:pages) && volume =~ /\D*(\d+):(\d+(?:[—–-]+)\d+)/
# hash[:volume], hash[:pages] = $1.to_i, $2
# hash = normalize_pages(hash)
#else
# case volume
# when /\D*(\d+)\D+(\d+[\s\/&—–-]+\d+)/
# hash[:volume], hash[:number] = $1.to_i, $2
# when /(\d+)?\D+no\.\s*(\d+\D+\d+)/
# hash[:volume] = $1.to_i unless $1.nil?
# hash[:number] = $2
# when /(\d+)?\D+no\.\s*(\d+)/
# hash[:volume] = $1.to_i unless $1.nil?
# hash[:number] = $2.to_i
# when /\D*(\d+)\D+(\d+)/
# hash[:volume], hash[:number] = $1.to_i, $2.to_i
# when /(\d+)/
# hash[:volume] = $1.to_i
# end
#end

def normalize(item, **opts)
map_values(item, [:volume]) do |_, volume|
case volume
when /(\p{Lu}?\d+)\s?\(([^)]+)\)/
Expand Down
8 changes: 5 additions & 3 deletions lib/anystyle/parser.rb
Expand Up @@ -54,10 +54,10 @@ def learn(input)
train(input, truncate: false)
end

def normalize(hash)
def normalize(hash, **opts)
normalizers.each do |n|
begin
hash = n.normalize(hash) unless n.skip?
hash = n.normalize(hash, **opts) unless n.skip?
rescue => e
warn "Error in #{n.name} normalizer: #{e.message}"
end
Expand Down Expand Up @@ -156,7 +156,9 @@ def expand(dataset)
end

def format_hash(dataset, symbolize_keys: true)
dataset.map { |seq| normalize(seq.to_h(symbolize_keys: symbolize_keys)) }
dataset.inject([]) { |out, seq|
out << normalize(seq.to_h(symbolize_keys: symbolize_keys), prev: out)
}
end

def flatten_values(hash, skip: [])
Expand Down

0 comments on commit e268f50

Please sign in to comment.