forked from nschloe/meshio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vtu_io.py
433 lines (361 loc) · 14.5 KB
/
vtu_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# -*- coding: utf-8 -*-
#
'''
I/O for VTU.
.. moduleauthor:: Nico Schlömer <nico.schloemer@gmail.com>
'''
import base64
import logging
try:
from StringIO import cStringIO as BytesIO
except ImportError:
from io import BytesIO
import sys
import zlib
import numpy
from .__about__ import __version__
from .vtk_io import vtk_to_meshio_type, meshio_to_vtk_type, raw_from_cell_data
from .gmsh_io import num_nodes_per_cell
def num_bytes_to_num_base64_chars(num_bytes):
# Rounding up in integer division works by double negation since Python
# always rounds down.
return -(-num_bytes // 3) * 4
def _cells_from_data(connectivity, offsets, types, cell_data_raw):
# Translate it into the cells dictionary.
# `connectivity` is a one-dimensional vector with
# (p0, p1, ... ,pk, p10, p11, ..., p1k, ...
# Collect types into bins.
# See <https://stackoverflow.com/q/47310359/353337> for better
# alternatives.
uniques = numpy.unique(types)
bins = {u: numpy.where(types == u)[0] for u in uniques}
assert len(offsets) == len(types)
cells = {}
cell_data = {}
for tpe, b in bins.items():
meshio_type = vtk_to_meshio_type[tpe]
n = num_nodes_per_cell[meshio_type]
# The offsets point to the _end_ of the indices
indices = numpy.add.outer(offsets[b], numpy.arange(-n, 0))
cells[meshio_type] = connectivity[indices]
cell_data[meshio_type] = \
{key: value[b] for key, value in cell_data_raw.items()}
return cells, cell_data
vtu_to_numpy_type = {
'Float32': numpy.dtype(numpy.float32),
'Float64': numpy.dtype(numpy.float64),
'Int8': numpy.dtype(numpy.int8),
'Int16': numpy.dtype(numpy.int16),
'Int32': numpy.dtype(numpy.int32),
'Int64': numpy.dtype(numpy.int64),
'UInt8': numpy.dtype(numpy.uint8),
'UInt16': numpy.dtype(numpy.uint16),
'UInt32': numpy.dtype(numpy.uint32),
'UInt64': numpy.dtype(numpy.uint64),
}
numpy_to_vtu_type = {v: k for k, v in vtu_to_numpy_type.items()}
# pylint: disable=too-many-instance-attributes
class VtuReader(object):
'''Helper class for reading VTU files. Some properties are global to the
file (e.g., byte_order), and instead of passing around these parameters,
make them properties of this class.
'''
def __init__(self, filename):
from lxml import etree as ET
points = None
point_data = {}
cell_data_raw = {}
cells = {}
field_data = {}
# libxml2 and with it lxml have a safety net for memory overflows; see,
# e.g., <https://stackoverflow.com/q/33828728/353337>.
# This causes the error
# ```
# cannot parse large files and instead throws the exception
#
# lxml.etree.XMLSyntaxError: xmlSAX2Characters: huge text node, [...]
# ```
# Setting huge_tree=True removes the limit. Another alternative would
# be to use Python's native xml parser to avoid this error,
# import xml.etree.cElementTree as ET
parser = ET.XMLParser(remove_comments=True, huge_tree=True)
tree = ET.parse(filename, parser)
root = tree.getroot()
assert root.tag == 'VTKFile'
assert root.attrib['type'] == 'UnstructuredGrid'
assert root.attrib['version'] in ['0.1', '1.0'], \
'Unknown VTU file version \'{}\'.'.format(root.attrib['version'])
try:
assert root.attrib['compressor'] == 'vtkZLibDataCompressor'
except KeyError:
pass
self.header_type = (
root.attrib['header_type'] if 'header_type' in root.attrib
else 'UInt32'
)
try:
self.byte_order = root.attrib['byte_order']
assert self.byte_order in ['LittleEndian', 'BigEndian'], \
'Unknown byte order \'{}\'.'.format(self.byte_order)
except KeyError:
self.byte_order = None
grid = None
self.appended_data = None
for c in root:
if c.tag == 'UnstructuredGrid':
assert grid is None, 'More than one UnstructuredGrid found.'
grid = c
else:
assert c.tag == 'AppendedData', \
'Unknown main tag \'{}\'.'.format(c.tag)
assert self.appended_data is None, \
'More than one AppendedData found.'
assert c.attrib['encoding'] == 'base64'
self.appended_data = c.text.strip()
# The appended data always begins with a (meaningless)
# underscore.
assert self.appended_data[0] == '_'
self.appended_data = self.appended_data[1:]
assert grid is not None, 'No UnstructuredGrid found.'
piece = None
for c in grid:
if c.tag == 'Piece':
assert piece is None, 'More than one Piece found.'
piece = c
else:
assert c.tag == 'FieldData', \
'Unknown grid subtag \'{}\'.'.format(c.tag)
# TODO test field data
for data_array in c:
field_data[data_array.attrib['Name']] = \
self.read_data(data_array)
assert piece is not None, 'No Piece found.'
num_points = int(piece.attrib['NumberOfPoints'])
num_cells = int(piece.attrib['NumberOfCells'])
for child in piece:
if child.tag == 'Points':
data_arrays = list(child)
assert len(data_arrays) == 1
data_array = data_arrays[0]
assert data_array.tag == 'DataArray'
points = self.read_data(data_array)
num_components = int(data_array.attrib['NumberOfComponents'])
points = points.reshape(num_points, num_components)
elif child.tag == 'Cells':
for data_array in child:
assert data_array.tag == 'DataArray'
cells[data_array.attrib['Name']] = \
self.read_data(data_array)
assert len(cells['offsets']) == num_cells
assert len(cells['types']) == num_cells
elif child.tag == 'PointData':
for c in child:
assert c.tag == 'DataArray'
point_data[c.attrib['Name']] = self.read_data(c)
else:
assert child.tag == 'CellData', \
'Unknown tag \'{}\'.'.format(child.tag)
for c in child:
assert c.tag == 'DataArray'
cell_data_raw[c.attrib['Name']] = self.read_data(c)
assert points is not None
assert 'connectivity' in cells
assert 'offsets' in cells
assert 'types' in cells
cells, cell_data = _cells_from_data(
cells['connectivity'], cells['offsets'], cells['types'],
cell_data_raw
)
self.points = points
self.cells = cells
self.point_data = point_data
self.cell_data = cell_data
self.field_data = field_data
return
def read_binary(self, data, data_type):
# first read the the block size; it determines the size of the header
dtype = vtu_to_numpy_type[self.header_type]
num_bytes_per_item = numpy.dtype(dtype).itemsize
num_chars = num_bytes_to_num_base64_chars(num_bytes_per_item)
byte_string = base64.b64decode(data[:num_chars])[:num_bytes_per_item]
num_blocks = numpy.fromstring(byte_string, dtype)[0]
# read the entire header
num_header_items = 3 + num_blocks
num_header_bytes = num_bytes_per_item * num_header_items
num_header_chars = num_bytes_to_num_base64_chars(num_header_bytes)
byte_string = base64.b64decode(data[:num_header_chars])
header = numpy.fromstring(byte_string, dtype)
# num_blocks = header[0]
# max_uncompressed_block_size = header[1]
# last_compressed_block_size = header[2]
block_sizes = header[3:]
# Read the block data
byte_array = base64.b64decode(data[num_header_chars:])
dtype = vtu_to_numpy_type[data_type]
num_bytes_per_item = numpy.dtype(dtype).itemsize
byte_offsets = \
numpy.empty(block_sizes.shape[0]+1, dtype=block_sizes.dtype)
byte_offsets[0] = 0
numpy.cumsum(block_sizes, out=byte_offsets[1:])
# process the compressed data
block_data = numpy.concatenate([
numpy.fromstring(zlib.decompress(
byte_array[byte_offsets[k]:byte_offsets[k+1]]
), dtype=dtype)
for k in range(num_blocks)
])
return block_data
def read_data(self, c):
if c.attrib['format'] == 'ascii':
# ascii
data = numpy.array(
c.text.split(),
dtype=vtu_to_numpy_type[c.attrib['type']]
)
elif c.attrib['format'] == 'binary':
data = self.read_binary(c.text.strip(), c.attrib['type'])
else:
# appended data
assert c.attrib['format'] == 'appended', \
'Unknown data format \'{}\'.'.format(c.attrib['format'])
offset = int(c.attrib['offset'])
data = self.read_binary(
self.appended_data[offset:], c.attrib['type']
)
if 'NumberOfComponents' in c.attrib:
data = data.reshape(-1, int(c.attrib['NumberOfComponents']))
return data
def read(filename):
reader = VtuReader(filename)
return (
reader.points, reader.cells, reader.point_data, reader.cell_data,
reader.field_data
)
def write(filename,
points,
cells,
point_data=None,
cell_data=None,
field_data=None,
write_binary=True,
pretty_xml=True):
from lxml import etree as ET
if not write_binary:
logging.warning('VTU ASCII files are only meant for debugging.')
point_data = {} if point_data is None else point_data
cell_data = {} if cell_data is None else cell_data
field_data = {} if field_data is None else field_data
header_type = 'UInt32'
vtk_file = ET.Element(
'VTKFile',
type='UnstructuredGrid',
version='0.1',
# Use the native endianness. Not strictly necessary, but this
# simplifies things a bit.
byte_order=(
'LittleEndian' if sys.byteorder == 'little' else 'BigEndian'
),
header_type=header_type,
compressor='vtkZLibDataCompressor'
)
# swap the data to match the system byteorder
# Don't use byteswap to make sure that the dtype is changed; see
# <https://github.com/numpy/numpy/issues/10372>.
points = points.astype(points.dtype.newbyteorder('='))
for data in point_data.values():
data = data.astype(data.dtype.newbyteorder('='))
for data in cell_data.values():
for dat in data.values():
dat = dat.astype(dat.dtype.newbyteorder('='))
for data in field_data.values():
data = data.astype(data.dtype.newbyteorder('='))
def chunk_it(array, n):
out = []
k = 0
while k*n < len(array):
out.append(array[k*n:(k+1)*n])
k += 1
return out
def numpy_to_xml_array(parent, name, fmt, data):
da = ET.SubElement(
parent, 'DataArray',
type=numpy_to_vtu_type[data.dtype],
Name=name,
)
if len(data.shape) == 2:
da.set('NumberOfComponents', '{}'.format(data.shape[1]))
if write_binary:
da.set('format', 'binary')
max_block_size = 32768
data_bytes = data.tostring()
blocks = chunk_it(data_bytes, max_block_size)
num_blocks = len(blocks)
last_block_size = len(blocks[-1])
compressed_blocks = [zlib.compress(block) for block in blocks]
# collect header
header = numpy.array(
[num_blocks, max_block_size, last_block_size]
+ [len(b) for b in compressed_blocks],
dtype=vtu_to_numpy_type[header_type]
)
da.text = (
base64.b64encode(header.tostring())
+ base64.b64encode(b''.join(compressed_blocks))
).decode()
else:
da.set('format', 'ascii')
s = BytesIO()
numpy.savetxt(s, data.flatten(), fmt)
da.text = s.getvalue().decode()
return
comment = \
ET.Comment('This file was created by meshio v{}'.format(__version__))
vtk_file.insert(1, comment)
grid = ET.SubElement(vtk_file, 'UnstructuredGrid')
total_num_cells = sum([len(c) for c in cells.values()])
piece = ET.SubElement(
grid, 'Piece',
NumberOfPoints='{}'.format(len(points)),
NumberOfCells='{}'.format(total_num_cells)
)
# points
if points is not None:
pts = ET.SubElement(piece, 'Points')
numpy_to_xml_array(pts, 'Points', '%.11e', points)
if cells is not None:
cls = ET.SubElement(piece, 'Cells')
# create connectivity, offset, type arrays
connectivity = numpy.concatenate([
numpy.concatenate(v) for v in cells.values()
])
# offset (points to the first element of the next cell)
offsets = [
v.shape[1] * numpy.arange(1, v.shape[0]+1)
for v in cells.values()
]
for k in range(1, len(offsets)):
offsets[k] += offsets[k-1][-1]
offsets = numpy.concatenate(offsets)
# types
types = numpy.concatenate([
numpy.full(len(v), meshio_to_vtk_type[k])
for k, v in cells.items()
])
numpy_to_xml_array(cls, 'connectivity', '%d', connectivity)
numpy_to_xml_array(cls, 'offsets', '%d', offsets)
numpy_to_xml_array(cls, 'types', '%d', types)
if point_data:
pd = ET.SubElement(piece, 'PointData')
for name, data in point_data.items():
numpy_to_xml_array(pd, name, '%.11e', data)
if cell_data:
cd = ET.SubElement(piece, 'CellData')
for name, data in raw_from_cell_data(cell_data).items():
numpy_to_xml_array(cd, name, '%.11e', data)
write_xml(filename, vtk_file, pretty_xml)
return
def write_xml(filename, root, pretty_print=False):
from lxml import etree as ET
tree = ET.ElementTree(root)
tree.write(filename, pretty_print=pretty_print)
return