Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
168 lines (136 sloc) 5.25 KB
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# ISO-2709 file reader
#
# Copyright (C) 2010 BIREME/PAHO/WHO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 2.1 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from struct import unpack
CR = '\x0D' # \r
LF = '\x0A' # \n
IS1 = '\x1F' # ECMA-48 Unit Separator
IS2 = '\x1E' # ECMA-48 Record Separator / ISO-2709 field separator
IS3 = '\x1D' # ECMA-48 Group Separator / ISO-2709 record separator
LABEL_LEN = 24
LABEL_FORMAT = '5s c 4s c c 5s 3s c c c c'
TAG_LEN = 3
DEFAULT_ENCODING = 'ASCII'
SUBFIELD_DELIMITER = '^'
class IsoFile(object):
def __init__(self, filename, encoding = DEFAULT_ENCODING):
self.file = open(filename, 'rb')
self.encoding = encoding
def __iter__(self):
return self
def next(self):
return IsoRecord(self)
__next__ = next # Python 3 compatibility
def read(self, size):
''' read and drop all CR and LF characters '''
# TODO: this is inneficient but works, patches accepted!
# NOTE: our fixtures include files which have no linebreaks,
# files with CR-LF linebreaks and files with LF linebreaks
chunks = []
count = 0
while count < size:
chunk = self.file.read(size-count)
if len(chunk) == 0:
break
chunk = chunk.replace(CR+LF,'')
if CR in chunk:
chunk = chunk.replace(CR,'')
if LF in chunk:
chunk = chunk.replace(LF,'')
count += len(chunk)
chunks.append(chunk)
return ''.join(chunks)
def close(self):
self.file.close()
class IsoRecord(object):
label_part_names = ('rec_len rec_status impl_codes indicator_len identifier_len'
' base_addr user_defined'
# directory map:
' fld_len_len start_len impl_len reserved').split()
rec_len = 0
def __init__(self, iso_file=None):
self.iso_file = iso_file
self.load_label()
self.load_directory()
self.load_fields()
def __len__(self):
return self.rec_len
def load_label(self):
label = self.iso_file.read(LABEL_LEN)
if len(label) == 0:
raise StopIteration
elif len(label) != 24:
raise ValueError('Invalid record label: "%s"' % label)
parts = unpack(LABEL_FORMAT, label)
for name, part in zip(self.label_part_names, parts):
if name.endswith('_len') or name.endswith('_addr'):
part = int(part)
setattr(self, name, part)
def show_label(self):
for name in self.label_part_names:
print('%15s : %r' % (name, getattr(self, name)))
def load_directory(self):
fmt_dir = '3s %ss %ss %ss' % (self.fld_len_len, self.start_len, self.impl_len)
entry_len = TAG_LEN + self.fld_len_len + self.start_len + self.impl_len
self.directory = []
while True:
char = self.iso_file.read(1)
if char.isdigit():
entry = char + self.iso_file.read(entry_len-1)
entry = Field(* unpack(fmt_dir, entry))
self.directory.append(entry)
else:
break
def load_fields(self):
for field in self.directory:
if self.indicator_len > 0:
field.indicator = self.iso_file.read(self.indicator_len)
# XXX: lilacs30.iso has an identifier_len == 2,
# but we need to ignore it to succesfully read the field contents
# TODO: find out when to ignore the idenfier_len,
# or fix the lilacs30.iso fixture
#
##if self.identifier_len > 0: #
## field.identifier = self.iso_file.read(self.identifier_len)
value = self.iso_file.read(len(field))
assert len(value) == len(field)
field.value = value[:-1] # remove trailing field separator
self.iso_file.read(1) # discard record separator
def __iter__(self):
return self
def next(self):
for field in self.directory:
yield(field)
__next__ = next # Python 3 compatibility
def dump(self):
for field in self.directory:
print('%3s %r' % (field.tag, field.value))
class Field(object):
def __init__(self, tag, len, start, impl):
self.tag = tag
self.len = int(len)
self.start = int(start)
self.impl = impl
def show(self):
for name in 'tag len start impl'.split():
print('%15s : %r' % (name, getattr(self, name)))
def __len__(self):
return self.len
def test():
import doctest
doctest.testfile('iso2709_test.txt')
if __name__=='__main__':
test()