-
Notifications
You must be signed in to change notification settings - Fork 0
/
bib_record.rb
167 lines (144 loc) · 4.93 KB
/
bib_record.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# frozen_string_literal: true
require "marc"
require "traject"
require "traject/macros/marc21_semantics"
require "json"
require "place_of_publication"
require "us_fed_doc"
require "item_record"
class BibRecord
include USFedDoc
attr_accessor :marc
attr_writer :ht_bib_key, :oclc_num, :isbn, :issn, :lccn, :title, :imprint, :pub_place, :bib_fmt, :lang, :author, :us_gov_doc_flag
# TODO: This is a reimplementation of
# https://github.com/mlibrary/traject_umich_format/blob/7d355a5be133dc86f8795954fdd2e01355758309/lib/traject/umich_format/bib_format.rb#L18
def self.bib_fmt(rec_type:, bib_level:)
if ["a", "t"].include?(rec_type) && ["a", "c", "d", "m"].include?(bib_level)
"BK"
elsif rec_type == "m" && ["a", "c", "d", "m", "s"].include?(bib_level)
"CF"
elsif ["g", "k", "o", "r"].include?(rec_type) && ["a", "b", "c", "d", "m", "s"].include?(bib_level)
"VM"
elsif ["c", "d", "i", "j"].include?(rec_type) && ["a", "b", "c", "d", "m", "s"].include?(bib_level)
"MU"
elsif ["e", "f"].include?(rec_type) && ["a", "b", "c", "d", "m", "s"].include?(bib_level)
"MP"
elsif rec_type == "a" && ["b", "s", "i"].include?(bib_level)
"SE"
elsif ["b", "p"].include?(rec_type) && ["a", "b", "c", "d", "m", "s"].include?(bib_level)
"MX"
elsif bib_level == "s"
"SE"
else
"XX"
end
end
def initialize(marc_in_json)
@marc = MARC::Record.new_from_hash(JSON.parse(marc_in_json))
end
def ht_bib_key
@ht_bib_key ||= Traject::MarcExtractor.cached("001").extract(marc).first
end
def oclc_num
@oclc_num ||= Traject::MarcExtractor.cached("035a").extract(marc).collect! do |o|
# Preferred way to do it, but it doesn't match existing records
# Traject::Macros::Marc21Semantics.oclcnum_extract(o)
regexp = /(\(oco{0,1}lc\)|ocm|ocn)(\d+)/i
match = regexp.match(o)
match.nil? ? nil : match[2]
end.compact
.map { |o| o.to_i.to_s }.uniq.sort
end
def isbn
@isbn ||= Traject::MarcExtractor.cached("020a").extract(marc).map { |isbn| isbn.strip }&.uniq
end
def issn
@issn ||= Traject::MarcExtractor.cached("022a").extract(marc).map { |issn| issn.strip }&.uniq
end
def lccn
@lccn ||= Traject::MarcExtractor.cached("010a").extract(marc).map { |lccn| lccn.strip }&.uniq
end
def title
@title ||= Traject::MarcExtractor.cached("245abcnp").extract(marc).map { |title| title.strip }
end
def imprint
return @imprint unless @imprint.nil?
@imprint = Traject::MarcExtractor.cached("260bc").extract(marc).map { |imp| imp.strip }
if @imprint.none?
@imprint = Traject::MarcExtractor.cached("264|*1|bc").extract(marc).map { |imp| imp.strip }
end
@imprint
end
def u_and_f?
/^.{17}u.{10}f/.match? marc["008"]&.value
end
def pub_place
@pub_place ||= PlaceOfPublication.new(marc)
end
def bib_fmt
@bib_fmt ||= self.class.bib_fmt(rec_type: marc.leader[6], bib_level: marc.leader[7])
end
def lang
@lang ||= (Traject::MarcExtractor.cached("008[35-37]").extract(marc).first || " ")
end
def author
@author ||= [Traject::MarcExtractor.cached("100abcd").extract(marc),
Traject::MarcExtractor.cached("110abcd").extract(marc),
Traject::MarcExtractor.cached("111acd").extract(marc)]
.flatten.map { |a| a.strip }.uniq
end
def us_gov_doc_flag
return @us_gov_doc_flag unless @us_gov_doc_flag.nil?
@us_gov_doc_flag = 0
@us_gov_doc_flag = 1 if marc["008"].value[28] == "f" && pub_place.to_s[2] == "u" &&
!exception_to_rule?
@us_gov_doc_flag
end
def item_records
return enum_for(:item_records) unless block_given?
marc.each_by_tag("974") do |holding_field|
yield ItemRecord.new(holding_field, sdr_nums)
end
end
def hathifile_records
return enum_for(:hathifile_records) unless block_given?
item_records.each do |ir|
# merge bib and item level fields
yield to_h.merge(ir.to_h)
end
end
def to_h
{oclc_num: oclc_num,
isbn: isbn,
issn: issn,
lccn: lccn,
title: title,
imprint: imprint,
ht_bib_key: ht_bib_key,
pub_place: pub_place.to_s,
lang: lang,
bib_fmt: bib_fmt,
us_gov_doc_flag: us_gov_doc_flag,
author: author}
end
# Item records need a map of collection code to bib record id assembled from
# the 035s and the sdrnum_prefix_map
def sdr_nums
return @sdr_nums unless @sdr_nums.nil?
@sdr_nums = Hash.new { |h, coll| h[coll] = [] }
Traject::MarcExtractor.cached("035a").extract(marc).each do |sdr|
next unless /^sdr-/.match?(sdr)
# remove leading sdr-
sdr.gsub!(/^sdr-/, "")
Services.sdrnum_prefix_map.each do |collection_code, prefixes|
# e.g. yale"=>["yale-loc", "yale"]
prefixes.each do |prefix|
sdr_match = /^#{prefix}([.a-zA-Z0-9-]+)/.match(sdr)
next if sdr_match.nil?
@sdr_nums[collection_code] << sdr_match[1].gsub(/^\./, "")
end
end
end
@sdr_nums
end
end