diff --git a/scripts/datcom2xml.py b/scripts/datcom2xml.py index 51aa99b..858348f 100644 --- a/scripts/datcom2xml.py +++ b/scripts/datcom2xml.py @@ -4,100 +4,140 @@ # arklenna@gmail.com import sys +import os import re from math import floor, ceil -title_str = "-DERIVATIVE (PER DEGREE)-" -header_re = re.compile(r"0\s+ALPHA") -footer_re = re.compile(r"1\s+") - -if __name__ == "__main__": - headers = [] - rows = [] - - ### Collect correct table, header, and data lines - if len(sys.argv) == 2: - filename = sys.argv[1] - with open(filename) as fh: +class Datcom2xml: + ##### Constructor ##### + def __init__(self, infile, outfile): + #### Constants #### + self._title_str = "-DERIVATIVE (PER DEGREE)-" + self._header_re = re.compile(r"0\s+ALPHA") + self._footer_re = re.compile(r"1\s+") + + #### Files #### + if not os.access(infile, os.R_OK): + sys.stderr.write("Could not access datcom file '%s'") % infile + raise SystemExit + ## not sure how to check writability other than try: open(outfile, w) + self._datcom = infile + self._xml = outfile + + #### Containers #### + self._headers = [] + self._rows = [] + + self._readDatcom() + self._parseDatcom() + self._writeDatcom() + + + + ##### Methods ##### + def _readDatcom(self): + """Collect header and data lines from the table""" + with open(self._datcom) as fh: print "Opened file" + ### Set flags title = 0 header = 0 for line in fh: if title: - if re.match(header_re, line): - header += 1 - headers.append((header,line)) - print "Found header:", line - continue - elif header and re.match(footer_re, line): + ### Stop looping at footer + if header and re.match(self._footer_re, line): print "Found footer:", line break + ### Find, count, and store all header rows + elif re.match(self._header_re, line): + header += 1 + self._headers.append((header,line)) + print "Found header:", line + ### Store all data rows elif header: if line.strip() == "0": continue - rows.append((header,line)) + self._rows.append((header,line)) #print "Found row: %s (%s)" % (line, header) else: - if title_str in line: + ### Start storing at title + if self._title_str in line: title += 1 print "Found title:", line + + def _parseDatcom(self): + ### Loop through headers to detect whitespace to slice data lines + breakpoints = dict() + for head_num, line in self._headers: + if len(self._headers) == 0: + break + # replace leading 0 with space + line = re.sub(r"^0", " ", line) + # find all pairs of (whitespace, non-whitespace) + i = re.finditer(r"(\s*)([^\s]+)", line) + space_len = [] + columns = [] + col_len = [] + for m in i: + space = m.group(1) + space_len.append(len(space)) + col = m.group(2) + columns.append(col) + col_len.append(len(col)) + # first value is 0 for [0:x] slice + header_breakpoints = [0] + base = 0 - ### Loop through headers to detect whitespace to slice data lines - breakpoints = dict() - for head_num, line in headers: - if len(headers) == 0: - break - # replace leading 0 with space - line = re.sub(r"^0", " ", line) - # find all pairs of (whitespace, non-whitespace) - i = re.finditer(r"(\s*)([^\s]+)", line) - space_len = [] - columns = [] - col_len = [] - for m in i: - space = m.group(1) - space_len.append(len(space)) - col = m.group(2) - columns.append(col) - col_len.append(len(col)) + for n in xrange(1, len(columns)): + # prev col width + smaller half of trailing whitespace + right_half = col_len[n-1] + int(floor(0.5*space_len[n])) + if n == 1: + # all of 0th whitespace + left_half = space_len[n-1] + else: + # larger half of leading whitespace + left_half = int(ceil(0.5*space_len[n-1])) + # half leading whitespace, col width, half trailing whitespace + width = left_half + right_half + point = base + width + # set base to current breakpoint + base = point + header_breakpoints.append(point) + + # store header breakpoints keyed by head_num + breakpoints[head_num] = header_breakpoints - # first value is 0 for [0:x] slice - header_breakpoints = [0] - base = 0 + print breakpoints + + ### Slice data lines based on header whitespace + for head_num, line in self._rows: + bp = breakpoints[head_num] + num_cols = len(bp) + for i in xrange(num_cols): + # last value + if i == num_cols-1: + cell = line[bp[i]:] + else: + cell = line[bp[i]:bp[i+1]] + print cell.strip() + print "\n" + + def _writeDatcom(self): + pass + +if __name__ == "__main__": - for n in xrange(1, len(columns)): - # prev col width + smaller half of trailing whitespace - right_half = col_len[n-1] + int(floor(0.5*space_len[n])) - if n == 1: - # all of 0th whitespace - left_half = space_len[n-1] - else: - # larger half of leading whitespace - left_half = int(ceil(0.5*space_len[n-1])) - # half leading whitespace, col width, half trailing whitespace - width = left_half + right_half - point = base + width - # set base to current breakpoint - base = point - header_breakpoints.append(point) + if len(sys.argv) == 3: + infile = sys.argv[1] + outfile = sys.argv[2] + d = Datcom2xml(infile, outfile) + else: + sys.stderr.write("Args: input_file output_file\n") + raise SystemExit + - # store header breakpoints keyed by head_num - breakpoints[head_num] = header_breakpoints - print breakpoints - ### Slice data lines based on header whitespace - for head_num, line in rows: - bp = breakpoints[head_num] - num_cols = len(bp) - for i in xrange(num_cols): - # last value - if i == num_cols-1: - cell = line[bp[i]:] - else: - cell = line[bp[i]:bp[i+1]] - print cell.strip() - print "\n" # vim:sw=4:ts=4:expandtab