Skip to content

Commit

Permalink
Merge pull request #134 from mkuron/blockfile-py
Browse files Browse the repository at this point in the history
Blockfile parser in Python
  • Loading branch information
fweik committed Feb 24, 2014
2 parents a979f5c + f299a6d commit 00e4f0f
Showing 1 changed file with 175 additions and 0 deletions.
175 changes: 175 additions & 0 deletions tools/blockfile.py
@@ -0,0 +1,175 @@
import re
import numpy
import sys
import os
import __builtin__

col_types = {'^di': 3,
'^ext_f': 3,
'^ext_t': 3,
'^f': 3,
'^fix': 3,
'^i': 1,
'^ma': 1,
'^mo': 1,
'^omega': 3,
'^omega_b': 3,
'^omega_l': 3,
'^p': 3,
'^q$': 1,
'^qu': 4,
'^tbf': 3,
'^torque': 3,
'^torque_b': 3,
'^torque_l': 3,
'^ty': 1,
'^v': 3,
'^vi': 1,
'^vs': 2}

re_block_type = re.compile('^{([a-z_]+)\s')
re_particles = re.compile('{particles\s*{([a-z_\s]*)}((?:\s*{.*?})*)\s*}')
re_particle = re.compile('\{(\s*(?:[0-9\.-]+\s*)*)\}')
re_space = re.compile('\s')
re_variable = re.compile('\{(?!variable\s)([^\s\}]+)([^\}]*)\}')
re_int_list = re.compile('^[0-9\s]+$')
re_float_list = re.compile('^[0-9\.\s]+$')
re_int = re.compile('[0-9]+')
re_float = re.compile('[0-9\.]+')

re_col_types = {}
for col_type in col_types:
re_col_types[col_type] = re.compile(col_type)

def load_col_types(blockfile_support_tcl):
global col_types, re_col_types
col_types, re_col_types = {}, {}
with __builtin__.open(blockfile_support_tcl) as f:
for i in re.finditer('"(\^[a-z_]+\$*)"\s*{.*?; incr idx ([0-9]*)\s*}', f.read(), re.DOTALL):
var_name, var_cols = i.group(1), i.group(2)
if var_cols == '': var_cols = 1
else: var_cols = int(var_cols)
col_types[var_name] = var_cols
for col_type in col_types:
re_col_types[col_type] = re.compile(col_type)

def process(block):
"""
Processes the block and returns the block's type and the block's contents as a tuple.
"""
block_type = re_block_type.match(block).group(1)

# particle data looks like this:
# {particles {id pos v q f}
# {0 7.875 9.0 0.0 0.0 0.0 0.0 -0.6 7.29706 -1.08036 -3.39398}
# {1 7.875 9.0 1.2 0.0 0.0 0.0 -0.6 1.37885 -0.320172 -2.49209}
if block_type == 'particles':
particles = re_particles.match(block)

# get the field labels and determine how many columns a field consists of (i.e. dimensionality of vector quantities)
fields = particles.group(1).split(' ')
field_lengths = [0]*len(fields)
for i,field in enumerate(fields):
for col_type in col_types:
if re_col_types[col_type].match(field) is not None:
field_lengths[i] = col_types[col_type]
break
if field_lengths[i] == 0:
raise Exception("Field '%s' is unknown. Dimensionality of this field cannot be determined." % field)

# split the particle block into the individual particles
particles = [re_space.split(x) for x in re_particle.findall(particles.group(2))]
N = len(particles)

# create one empty numpy array per field with the appropriate dimensionality
particle_data = {}
for i,field in enumerate(fields):
dtype = float
if re_col_types['^i'].match(field) is not None or re_col_types['^ty'].match(field) is not None:
# the ID and type fields are integer, everything else is a float
dtype = int
particle_data[field] = numpy.empty((N,field_lengths[i]), dtype=dtype)

# fill the numpy arrays with the particle data
for i,particle in enumerate(particles):
fieldcount = 0
for j,field in enumerate(fields):
particle_data[field][i] = particle[fieldcount:fieldcount+field_lengths[j]]
fieldcount += field_lengths[j]

return block_type, particle_data

# variables look like this:
# {variable {box_l 18.0 18.0 480.0} {test 18.0} {stringtest test} }
if block_type == 'variable':
variables = {}
# extract all variable names and values from the block
for m in re_variable.finditer(block):
try:
# convert it to an integer/float if it's a (list of) integer/float variable(s)
value = m.group(2).strip()
if re_int_list.match(value) is not None:
value = numpy.array(re_int.findall(value), dtype=int)
elif re_float_list.match(value) is not None:
value = numpy.array(re_float.findall(value), dtype=float)
variables[m.group(1)] = value
except:
# return it as a string if we can't convert it to a number
variables[m.group(1)] = m.group(2).strip()

return block_type, variables

# any other block types are returned as string
return block_type, block[len(block_type)+1:-2].strip()

class blockfile(object):
f = None

def __init__(self, path):
"""
Opens the blockfile.
"""
self.f = __builtin__.open(path)

def __iter__(self):
"""
Iterator over all the blocks in the open blockfile.
The iterator returns each block as a tuple of block type and block contents.
"""
block = ''
while True:
block += self.f.readline() # this assumes that all blocks end with a newline
if block.count('{') == block.count('}'): # we have a full block:
yield process(block)
block = ''
if self.f.tell() == os.fstat(self.f.fileno()).st_size:
return

def __del__(self):
"""
Closes the blockfile.
"""
self.close()

def close(self):
"""
Closes the blockfile.
"""
self.f.close()

if __name__ != "__main__":
def open(path):
"""
Opens the blockfile at path.
"""
return blockfile(path)

if __name__ == "__main__":
# If this script is called directly with the argument --extract, it extracts the list of column types from scripts/blockfile_support.tcl.
if len(sys.argv) > 1 and sys.argv[1] == '--extract':
if len(sys.argv) < 2 or not os.path.exists(sys.argv[2]):
sys.stderr.write("Please specify the path to scripts/blockfile_support.tcl from Espresso.\n")
sys.exit(1)
load_col_types(sys.argv[2])
print pprint(col_types)
sys.exit()

0 comments on commit 00e4f0f

Please sign in to comment.