-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibtex_import.rb
143 lines (114 loc) · 2.75 KB
/
bibtex_import.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
require 'bibtex'
class BibtexImport
extend ActiveModel::Naming
include ActiveModel::Conversion
attr_accessor :file, :bibtex_source, :user
def initialize(attributes = {})
attributes.each do |name, value|
send("#{name}=", value)
end
end
def persisted?
false
end
def file=(new_file)
@file = new_file
@bibtex_source = new_file.read
end
def new_articles
valid_bibliography_entries.map do |entry|
Article.new(entry.article_attributes).tap do |article|
article.user = user
end
end
end
def save
new_articles.each(&:save)
end
def filename
@file.try(:original_filename) || 'No file'
end
private
def valid_bibliography_entries
bibliography.data.map do |data|
next if ! data.is_a? BibTeX::Entry
entry = BibtexImport::Entry.new(data)
next if !entry.valid?
entry
end.compact
end
def bibliography
BibTeX::Bibliography.parse(bibtex_source).convert(:latex)
end
class Entry
def initialize(data, doi_class = Doi)
@data = data
@doi_class = doi_class
end
def article_attributes
{
title: strip_latex_html_tags(@data['title'].to_s),
author: strip_latex_html_tags(@data['author'].to_s),
source: "bibtex",
identifiers: identifiers
}
end
def valid?
article_attributes[:title].present? && article_attributes[:identifiers].any?
end
def identifiers
identifier_bodies.map { |body| Identifier.new(body: body) }
end
private
def identifier_bodies
identifier_pairs.map do |key, value|
"#{key.upcase}:#{value}" if value.present?
end.compact
end
def identifier_pairs
{
doi: parse_and_validate_doi,
url: @data['url'],
issn: @data['issn'],
isbn: @data['isbn'],
pmid: parse_pmid,
pmcid: parse_pmcid
}
end
def parse_and_validate_doi
doi = @data['doi'] || doi_by_url_from(note)
if doi && @doi_class.new(doi).exists?
doi
end
end
def doi_by_url_from(string)
uris = URI.extract(string, %w(http https)).map { |uri| URI.parse(uri) }
if doi_uri = uris.detect { |uri| uri.host == 'dx.doi.org' }
doi_uri.path.sub('/', '')
end
end
def parse_pmid
if @data['pmid']
@data['pmid'].to_s
elsif (note + annote) =~ /PMID: (\d+)/
$1
end
end
def parse_pmcid
if @data['pmcid']
@data['pmcid'].to_s
elsif (note + annote) =~ /PMCID: (PMC\d+)/
$1
end
end
def note
@data['note'].to_s
end
def annote
@data['annote'].to_s
end
def strip_latex_html_tags(string)
string.gsub(/\\textless\/?\w+\\textgreater/, '')
end
end
end