Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "--fields" option for glance info #13

Closed
wants to merge 12 commits into from
152 changes: 134 additions & 18 deletions pyglance/glance/compare.py
Expand Up @@ -1134,6 +1134,122 @@ def inspect_stats_library_call (afn, var_list=[ ], options_set={ }, do_document=
if doc_atend:
print >> output_channel, ('\n\n' + statistics.INSP_STATISTICS_DOC_STR)

def gather_stats_for_variable(filehandle, variable):
""" Generate statistics for a variable, flatting into a single level dictionary

Generate statistics for the variable variable in the file filehandle.
statistics.StatisticalInspectionAnalysis.withSimpleData normally returns a
complex, nested data structure; this method flattens it out to a simple
dictionary.
"""
missing_val = filehandle.missing_value(variable)
stats = statistics.StatisticalInspectionAnalysis.withSimpleData(filehandle[variable], missing_val)
results = {}
for title, data in stats.dictionary_form().items():
results.update(data)
return results


def return_info_fields(file, input, var, fields):
""" Return a list of the requested fields from input[var]

fields is itself a list. Values:
"filename" - value of file
"variable" - value of var
"shape" - shape of the variable
ATTRIBUTE_NAME - Value of this attribute for this variable
"stats("+NAME+")" - The calculated statistic NAME. Uses same names
as visible from "glance inspectstats". Only the names are used,
not the category names.

Elements in the returned list can be safely passed through str()
to get a suitable, human-friendly form. No other promises are made.
Elements may be None if the value is not available.
"""
result = []
stats = None
stats_failed = False
for f in fields:
if f == "filename":
result.append(file)

elif f == "variable":
result.append(var)

elif f == "shape":
try:
result.append(str(input[var].shape))
except ValueError:
result.append(None)

elif f.find("stats(") != -1 and f[-1] == ")":
statname = f[6:-1]
try:
if stats is None and stats_failed == False:
stats = gather_stats_for_variable(input, var)
except ValueError:
LOG.warn('Skipping variable "'+var+'" in "'+file+"' as unparsable.")
stats_failed = True
result.append(None)
if stats is not None and statname in stats:
result.append(stats[statname])
else:
result.append(None)

elif f in input.get_variable_attributes(var):
result.append(input.get_attribute(var, f))

else:
result.append(None)
return result


def info_library_call(options, *args):
"""list information about a list of files
List available variables for comparison.
"""
problems = 0

fields = options.fields.split(',')

# For backward compatibility, non-parsable output should always start with
# the filename. Variables after the first will be indented.
if not options.parsable_output and fields[0] != "filename":
fields = ["filename"]+fields

for fn in args:
try :
input = io.open(fn)
lal = list(input())
lal.sort()
if options.parsable_output:
def format_parsable(file, input, var):
out_fields = return_info_fields(file, input, var, fields)
str_out_fields = [ str(x) if x is not None else "" for x in out_fields ]
str_out = "\t".join(str_out_fields)+"\n"
return str_out
print "".join(map(lambda x: format_parsable(fn, input, x), lal))
else:
def format_original(file, input, var, fields):
out_fields = return_info_fields(file, input, var, fields)
str_out_fields = [ str(x) if x is not None else "" for x in out_fields ]
str_out = " ".join(str_out_fields)
return str_out
if len(lal) == 0:
print fn+": "
else:
print fn+": "+format_original(fn, input, lal[0], fields[1:])
if len(lal) > 1:
for var in lal[1:]:
print " "+" "*len(fn)+format_original(fn, input, var, fields[1:])
except KeyError :
LOG.warn('Unable to open / process file selection: ' + fn)
problems += 1
if problems > 255:
# exit code is 8-bits, limit ourselves.
problems = 255
return problems

def main():
import optparse
usage = """
Expand Down Expand Up @@ -1181,25 +1297,25 @@ def main():
"""

def info(*args):
"""list information about a list of files
List available variables for comparison.
"""List information about variables in files

--parsable prints one variable per line, with field of information
seperated by tabs.
--fields specify one or more fields to print for each variable:
filename - The name of the file containing the variable
variable - The name of the variable
shape - The shape of the data
<AttributeName> - If the variable has the specified
attribute, it will be printed. ex: FillValue
stats(<StatName>) - Any statistic from
"glance inspectstats"
ex: stats(num_data_points)

Examples:
glance info hdffile
glance info hdffile --parsable --fields 'filename,variable,stats(min),FillValue'
"""
problems = 0
for fn in args:
try :
lal = list(io.open(fn)())
lal.sort()
if options.parsable_output:
print "".join(map(lambda x: fn+"\t"+x+"\n", lal))
else:
print fn + ': ' + ('\n ' + ' '*len(fn)).join(lal)
except KeyError :
LOG.warn('Unable to open / process file selection: ' + fn)
problems += 1
if problems > 255:
# exit code is 8-bits, limit ourselves.
problems = 255
return problems
return info_library_call(options, *args)

def stats(*args):
"""create statistics summary of variables
Expand Down
3 changes: 3 additions & 0 deletions pyglance/glance/config_organizer.py
Expand Up @@ -467,6 +467,9 @@ def set_up_command_line_options (parser) :
help="generate only html report files (no images)")
parser.add_option('-c', '--configfile', dest=OPTIONS_CONFIG_FILE_KEY, type='string', default=None,
help="set optional configuration file")
parser.add_option('--fields', type='string',
default='filename,variable',
help="comma separted list of additional fields to add to info's output. Values include filename, variable, shape, <AttributeName> and stats(StatisticName)")

# should pass/fail be tested?
parser.add_option('-x', '--doPassFail', dest=DO_TEST_PASSFAIL_KEY,
Expand Down