/
datcom2xml.py
143 lines (120 loc) · 4.55 KB
/
datcom2xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/python
# Lenna X. Peterson
# arklenna@gmail.com
import sys
import os
import re
from math import floor, ceil
class Datcom2xml:
##### Constructor #####
def __init__(self, infile, outfile):
#### Constants ####
self._title_str = "-DERIVATIVE (PER DEGREE)-"
self._header_re = re.compile(r"0\s+ALPHA")
self._footer_re = re.compile(r"1\s+")
#### Files ####
if not os.access(infile, os.R_OK):
sys.stderr.write("Could not access datcom file '%s'") % infile
raise SystemExit
## not sure how to check writability other than try: open(outfile, w)
self._datcom = infile
self._xml = outfile
#### Containers ####
self._headers = []
self._rows = []
self._readDatcom()
self._parseDatcom()
self._writeDatcom()
##### Methods #####
def _readDatcom(self):
"""Collect header and data lines from the table"""
with open(self._datcom) as fh:
print "Opened file"
### Set flags
title = 0
header = 0
for line in fh:
if title:
### Stop looping at footer
if header and re.match(self._footer_re, line):
print "Found footer:", line
break
### Find, count, and store all header rows
elif re.match(self._header_re, line):
header += 1
self._headers.append((header,line))
print "Found header:", line
### Store all data rows
elif header:
if line.strip() == "0":
continue
self._rows.append((header,line))
#print "Found row: %s (%s)" % (line, header)
else:
### Start storing at title
if self._title_str in line:
title += 1
print "Found title:", line
def _parseDatcom(self):
### Loop through headers to detect whitespace to slice data lines
breakpoints = dict()
for head_num, line in self._headers:
if len(self._headers) == 0:
break
# replace leading 0 with space
line = re.sub(r"^0", " ", line)
# find all pairs of (whitespace, non-whitespace)
i = re.finditer(r"(\s*)([^\s]+)", line)
space_len = []
columns = []
col_len = []
for m in i:
space = m.group(1)
space_len.append(len(space))
col = m.group(2)
columns.append(col)
col_len.append(len(col))
# first value is 0 for [0:x] slice
header_breakpoints = [0]
base = 0
for n in xrange(1, len(columns)):
# prev col width + smaller half of trailing whitespace
right_half = col_len[n-1] + int(floor(0.5*space_len[n]))
if n == 1:
# all of 0th whitespace
left_half = space_len[n-1]
else:
# larger half of leading whitespace
left_half = int(ceil(0.5*space_len[n-1]))
# half leading whitespace, col width, half trailing whitespace
width = left_half + right_half
point = base + width
# set base to current breakpoint
base = point
header_breakpoints.append(point)
# store header breakpoints keyed by head_num
breakpoints[head_num] = header_breakpoints
print breakpoints
### Slice data lines based on header whitespace
for head_num, line in self._rows:
bp = breakpoints[head_num]
num_cols = len(bp)
for i in xrange(num_cols):
# last value
if i == num_cols-1:
cell = line[bp[i]:]
else:
cell = line[bp[i]:bp[i+1]]
print cell.strip()
print "\n"
def _writeDatcom(self):
pass
if __name__ == "__main__":
if len(sys.argv) == 3:
infile = sys.argv[1]
outfile = sys.argv[2]
d = Datcom2xml(infile, outfile)
else:
sys.stderr.write("Args: input_file output_file\n")
raise SystemExit
# vim:sw=4:ts=4:expandtab