This repository has been archived by the owner on Oct 25, 2022. It is now read-only.
/
subfield.py
93 lines (71 loc) · 2.75 KB
/
subfield.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# ISIS-DM: the ISIS Data Model API
#
# Copyright (C) 2010 BIREME/PAHO/WHO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 2.1 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
MAIN_SUBFIELD_KEY = '_'
SUBFIELD_MARKER_RE = re.compile(r'\^([a-z0-9])', re.IGNORECASE)
DEFAULT_ENCODING = u'utf-8'
def expand(content, subkeys=None):
''' Parse a field into an association list of keys and subfields
>>> expand('zero^1one^2two^3three')
[('_', 'zero'), ('1', 'one'), ('2', 'two'), ('3', 'three')]
'''
if subkeys is None:
regex = SUBFIELD_MARKER_RE
elif subkeys == '':
return [(MAIN_SUBFIELD_KEY, content)]
else:
regex = re.compile(r'\^(['+subkeys+'])', re.IGNORECASE)
content = content.replace('^^', '^^ ')
parts = []
start = 0
key = MAIN_SUBFIELD_KEY
while True:
found = regex.search(content, start)
if found is None: break
parts.append((key, content[start:found.start()].rstrip()))
key = found.group(1).lower()
start = found.end()
parts.append((key, content[start:].rstrip()))
return parts
class CompositeString(object):
''' Represent an Isis field, with subfields, using
Python native datastructures
>>> author = CompositeString('John Tenniel^xillustrator',
... subkeys='x')
>>> unicode(author)
u'John Tenniel^xillustrator'
'''
def __init__(self, isis_raw, subkeys=None, encoding=DEFAULT_ENCODING):
if not isinstance(isis_raw, basestring):
raise Invalid('%r value must be unicode or str instance' % isis_raw)
self.__isis_raw = isis_raw.decode(encoding)
self.__expanded = expand(self.__isis_raw, subkeys)
def __getitem__(self, key):
for subfield in self.__expanded:
if subfield[0] == key:
return subfield[1]
else:
raise KeyError(key)
def __unicode__(self):
return self.__isis_raw
def __str__(self):
return str(self.__isis_raw)
def test():
import doctest
doctest.testmod()
if __name__=='__main__':
test()