Skip to content

Commit

Permalink
Dump HistorySet MetaData into the correct output csv files (#987)
Browse files Browse the repository at this point in the history
* enable historyset to handle both scalar and vector meta data, and add check for NAN from code collections

* fix

* fix HistorySet

* fix reading vector meta data

* update tests

* add unit test for meta data load, print, reload

* fix

* modify test to test only the mechanics
  • Loading branch information
wangcj05 authored and alfoa committed May 10, 2019
1 parent 69f066f commit 9849909
Show file tree
Hide file tree
Showing 12 changed files with 498 additions and 100 deletions.
13 changes: 9 additions & 4 deletions framework/DataObjects/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,21 @@ def addExpectedMeta(self,keys, params={}):
@ In, keys, set(str), keys to register
@ In, params, dict, optional, {key:[indexes]}, keys of the dictionary are the variable names,
values of the dictionary are lists of the corresponding indexes/coordinates of given variable
@ Out, None
@ Out, keys, list(str), extra keys that has been registered
"""
# TODO add option to skip parts of meta if user wants to
# remove already existing keys
keys = list(key for key in keys if key not in self.getVars()+self.indexes)
# if no new meta, move along
if len(keys) == 0:
return
return keys
# CANNOT add expected meta after samples are started
assert(self._data is None)
assert(self._collector is None or len(self._collector) == 0)
self._metavars.extend(keys)
self._orderedVars.extend(keys)
self.setPivotParams(params)
return keys

def addMeta(self, tag, xmlDict = None, node = None):
"""
Expand Down Expand Up @@ -972,7 +973,7 @@ def _convertArrayListToDataset(self,array,action='return'):
# determine dimensions for each variable
dimsMeta = {}
for name, var in new.variables.items():
if name not in self._inputs + self._outputs:
if name not in self._inputs + self._outputs + self._metavars:
continue
dims = list(var.dims)
# don't list if only entry is sampleTag
Expand Down Expand Up @@ -1554,8 +1555,12 @@ def _loadCsvMeta(self,fileName):
dims = meta.get('pivotParams',{})
if len(dims)>0:
self.setPivotParams(dims)
# vector metavars is also stored in 'DataSet/dims' node
metavars = meta.get('metavars',[])
# get dict of vector metavars
params = {key:val for key, val in dims.items() if key in metavars}
# add metadata, so we get probability weights and etc
self.addExpectedMeta(meta.get('metavars',[]))
self.addExpectedMeta(metavars,params)
# check all variables desired are available
provided = set(meta.get('inputs',[])+meta.get('outputs',[])+meta.get('metavars',[]))
# otherwise, if we have no meta XML to load from, infer what we can from the CSV, which is only the available variables.
Expand Down
28 changes: 22 additions & 6 deletions framework/DataObjects/HistorySet.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def __init__(self):
self.printTag = self.name
self._tempPivotParam = None
self._neededForReload = [] # HistorySet doesn't need anything special to load, since it's written in cluster-by-sample CSV format
self._inputMetaVars = [] # meta vars belong to the input of HistorySet, i.e. scalar
self._outputMetaVars = [] # meta vara belong to the output of HistorySet, i.e. vector

def _readMoreXML(self,xmlNode):
"""
Expand Down Expand Up @@ -126,7 +128,7 @@ def _fromCSV(self,fileName,**kwargs):
main = self._readPandasCSV(fileName+'.csv')
nSamples = len(main.index)
## collect input space data
for inp in self._inputs + self._metavars:
for inp in self._inputs + self._inputMetaVars:
data[inp] = main[inp].values
## get the sampleTag values if they're present, in case it's not just range
if self.sampleTag in main:
Expand All @@ -136,7 +138,7 @@ def _fromCSV(self,fileName,**kwargs):
# load subfiles for output spaces
subFiles = main['filename'].values
# pre-build realization spots
for out in self._outputs + self.indexes:
for out in self._outputs + self.indexes + self._outputMetaVars:
data[out] = np.zeros(nSamples,dtype=object)
# read in secondary CSVs
for i,sub in enumerate(subFiles):
Expand All @@ -150,7 +152,7 @@ def _fromCSV(self,fileName,**kwargs):
if len(set(subDat.keys()).intersection(self.indexes)) != len(self.indexes):
self.raiseAnError(IOError,'Importing HistorySet from .csv: the pivot parameters "'+', '.join(self.indexes)+'" have not been found in the .csv file. Check that the '
'correct <pivotParameter> has been specified in the dataObject or make sure the <pivotParameter> is included in the .csv files')
for out in self._outputs+self.indexes:
for out in self._outputs + self.indexes + self._outputMetaVars:
data[out][i] = subDat[out].values
# construct final data object
self.load(data,style='dict',dims=self.getDimensions())
Expand Down Expand Up @@ -190,7 +192,7 @@ def _selectiveRealization(self,rlz):
if not utils.isSingleValued(val):
# treat inputs, outputs differently TODO this should extend to per-variable someday
## inputs
if var in self._inputs:
if var in self._inputs + self._inputMetaVars:
method,indic = self._selectInput
# pivot variables are included here in "else"; remove them after they're used in operators
else:
Expand Down Expand Up @@ -256,7 +258,7 @@ def _toCSV(self,fileName,start=0,**kwargs):
# specific implementation
## write input space CSV with pointers to history CSVs
### get list of input variables to keep
ordered = list(i for i in itertools.chain(self._inputs,self._metavars) if i in keep)
ordered = list(i for i in itertools.chain(self._inputs,self._inputMetaVars) if i in keep)
### select input part of dataset
inpData = data[ordered]
### add column for realization information, pointing to the appropriate CSV
Expand All @@ -271,7 +273,7 @@ def _toCSV(self,fileName,start=0,**kwargs):
### write CSV
self._usePandasWriteCSV(fileName,inpData,ordered,keepSampleTag = self.sampleTag in keep,mode=mode)
## obtain slices to write subset CSVs
ordered = list(o for o in self.getVars('output') if o in keep)
ordered = list(o for o in itertools.chain(self._outputs,self._outputMetaVars) if o in keep)

if len(ordered):
# hierarchical flag controls the printing/plotting of the dataobject in case it is an hierarchical one.
Expand Down Expand Up @@ -300,3 +302,17 @@ def _toCSV(self,fileName,start=0,**kwargs):
self._usePandasWriteCSV(filename,rlz,ordered,keepIndex=True)
else:
self.raiseAWarning('No output space variables have been requested for DataObject "{}"! No history files will be printed!'.format(self.name))

def addExpectedMeta(self,keys, params={}):
"""
Registers meta to look for in realizations.
@ In, keys, set(str), keys to register
@ In, params, dict, optional, {key:[indexes]}, keys of the dictionary are the variable names,
values of the dictionary are lists of the corresponding indexes/coordinates of given variable
@ Out, None
"""
extraKeys = DataSet.addExpectedMeta(self, keys, params)
self._inputMetaVars.extend(list(key for key in extraKeys if key not in params))
if params:
self._outputMetaVars.extend(list(key for key in extraKeys if key in params))
return extraKeys
2 changes: 2 additions & 0 deletions framework/Models/Code.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ def evaluateSample(self, myInput, samplerType, kwargs):

csvLoader = CsvLoader.CsvLoader(self.messageHandler)
csvData = csvLoader.loadCsvFile(outFile)
if np.isnan(csvData).all():
self.raiseAnError(IOError, 'The data collected from', outputFile+'.csv', 'only contain "NAN"')
headers = csvLoader.getAllFieldNames()

## Numpy by default iterates over rows, thus we transpose the data and
Expand Down
22 changes: 17 additions & 5 deletions framework/PostProcessors/BasicStatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,20 +254,32 @@ def initialize(self, runInfo, inputs, initDict):
inputObj = inputs[-1] if type(inputs) == list else inputs
if inputObj.type == 'HistorySet':
self.dynamic = True
metaKeys = []
inputMetaKeys = []
outputMetaKeys = []
for metric, infos in self.toDo.items():
steMetric = metric + '_ste'
if steMetric in self.steVals:
for info in infos:
prefix = info['prefix']
for target in info['targets']:
metaVar = prefix + '_ste_' + target if not self.outputDataset else metric + '_ste'
metaKeys.append(metaVar)
metaDim = inputObj.getDimensions(target)
if len(metaDim[target]) == 0:
inputMetaKeys.append(metaVar)
else:
outputMetaKeys.append(metaVar)
metaParams = {}
if not self.outputDataset:
metaParams = {key:[self.pivotParameter] for key in metaKeys} if self.dynamic else {}
if len(outputMetaKeys) > 0:
metaParams = {key:[self.pivotParameter] for key in outputMetaKeys}
else:
metaParams = {key:[self.pivotParameter,self.steMetaIndex] for key in metaKeys} if self.dynamic else {key:[self.steMetaIndex]}

if len(outputMetaKeys) > 0:
params = {key:[self.pivotParameter,self.steMetaIndex] for key in outputMetaKeys + inputMetaKeys}
metaParams.update(params)
elif len(inputMetaKeys) > 0:
params = {key:[self.steMetaIndex] for key in inputMetaKeys}
metaParams.update(params)
metaKeys = inputMetaKeys + outputMetaKeys
self.addMetaKeys(metaKeys,metaParams)

def _localReadMoreXML(self, xmlNode):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<DataObjectMetadata name="SET_Back_to_MASTER">
<DataSet type="Static">
<dims>
<avg_EXP_MaxCtoF_a>Time_aligned</avg_EXP_MaxCtoF_a>
<avg_EXP_PCT_a>Time_aligned</avg_EXP_PCT_a>
<avg_EXP_P_a>Time_aligned</avg_EXP_P_a>
<avg_REP_MaxCtoF_a>Time_aligned</avg_REP_MaxCtoF_a>
<avg_REP_PCT_a>Time_aligned</avg_REP_PCT_a>
<avg_REP_P_a>Time_aligned</avg_REP_P_a>
<avg_ste_EXP_MaxCtoF_a>Time_aligned</avg_ste_EXP_MaxCtoF_a>
<avg_ste_EXP_PCT_a>Time_aligned</avg_ste_EXP_PCT_a>
<avg_ste_EXP_P_a>Time_aligned</avg_ste_EXP_P_a>
<avg_ste_REP_MaxCtoF_a>Time_aligned</avg_ste_REP_MaxCtoF_a>
<avg_ste_REP_PCT_a>Time_aligned</avg_ste_REP_PCT_a>
<avg_ste_REP_P_a>Time_aligned</avg_ste_REP_P_a>
<sen_EXP_MaxCtoF_a_HTcoeff>Time_aligned</sen_EXP_MaxCtoF_a_HTcoeff>
<sen_EXP_MaxCtoF_a_LinPow>Time_aligned</sen_EXP_MaxCtoF_a_LinPow>
<sen_EXP_MaxCtoF_a_RodPitch>Time_aligned</sen_EXP_MaxCtoF_a_RodPitch>
<sen_EXP_MaxCtoF_a_TCool>Time_aligned</sen_EXP_MaxCtoF_a_TCool>
<sen_EXP_MaxCtoF_a_VCool>Time_aligned</sen_EXP_MaxCtoF_a_VCool>
<sen_EXP_PCT_a_HTcoeff>Time_aligned</sen_EXP_PCT_a_HTcoeff>
<sen_EXP_PCT_a_LinPow>Time_aligned</sen_EXP_PCT_a_LinPow>
<sen_EXP_PCT_a_RodPitch>Time_aligned</sen_EXP_PCT_a_RodPitch>
<sen_EXP_PCT_a_TCool>Time_aligned</sen_EXP_PCT_a_TCool>
<sen_EXP_PCT_a_VCool>Time_aligned</sen_EXP_PCT_a_VCool>
<sen_EXP_P_a_HTcoeff>Time_aligned</sen_EXP_P_a_HTcoeff>
<sen_EXP_P_a_LinPow>Time_aligned</sen_EXP_P_a_LinPow>
<sen_EXP_P_a_RodPitch>Time_aligned</sen_EXP_P_a_RodPitch>
<sen_EXP_P_a_TCool>Time_aligned</sen_EXP_P_a_TCool>
<sen_EXP_P_a_VCool>Time_aligned</sen_EXP_P_a_VCool>
<sen_REP_MaxCtoF_a_HTcoeff>Time_aligned</sen_REP_MaxCtoF_a_HTcoeff>
<sen_REP_MaxCtoF_a_LinPow>Time_aligned</sen_REP_MaxCtoF_a_LinPow>
<sen_REP_MaxCtoF_a_RodPitch>Time_aligned</sen_REP_MaxCtoF_a_RodPitch>
<sen_REP_MaxCtoF_a_TCool>Time_aligned</sen_REP_MaxCtoF_a_TCool>
<sen_REP_MaxCtoF_a_VCool>Time_aligned</sen_REP_MaxCtoF_a_VCool>
<sen_REP_PCT_a_HTcoeff>Time_aligned</sen_REP_PCT_a_HTcoeff>
<sen_REP_PCT_a_LinPow>Time_aligned</sen_REP_PCT_a_LinPow>
<sen_REP_PCT_a_RodPitch>Time_aligned</sen_REP_PCT_a_RodPitch>
<sen_REP_PCT_a_TCool>Time_aligned</sen_REP_PCT_a_TCool>
<sen_REP_PCT_a_VCool>Time_aligned</sen_REP_PCT_a_VCool>
<sen_REP_P_a_HTcoeff>Time_aligned</sen_REP_P_a_HTcoeff>
<sen_REP_P_a_LinPow>Time_aligned</sen_REP_P_a_LinPow>
<sen_REP_P_a_TCool>Time_aligned</sen_REP_P_a_TCool>
<sen_REP_P_a_VCool>Time_aligned</sen_REP_P_a_VCool>
<var_EXP_MaxCtoF_a>Time_aligned</var_EXP_MaxCtoF_a>
<var_EXP_PCT_a>Time_aligned</var_EXP_PCT_a>
<var_EXP_P_a>Time_aligned</var_EXP_P_a>
<var_REP_MaxCtoF_a>Time_aligned</var_REP_MaxCtoF_a>
<var_REP_PCT_a>Time_aligned</var_REP_PCT_a>
<var_REP_P_a>Time_aligned</var_REP_P_a>
<var_ste_EXP_MaxCtoF_a>Time_aligned</var_ste_EXP_MaxCtoF_a>
<var_ste_EXP_PCT_a>Time_aligned</var_ste_EXP_PCT_a>
<var_ste_EXP_P_a>Time_aligned</var_ste_EXP_P_a>
<var_ste_REP_MaxCtoF_a>Time_aligned</var_ste_REP_MaxCtoF_a>
<var_ste_REP_PCT_a>Time_aligned</var_ste_REP_PCT_a>
<var_ste_REP_P_a>Time_aligned</var_ste_REP_P_a>
</dims>
<general>
<inputs>avg_VCool,avg_TCool,avg_LinPow,avg_HTcoeff,avg_RodPitch,var_VCool,var_TCool,var_LinPow,var_HTcoeff,var_RodPitch</inputs>
<outputs>sen_EXP_PCT_a_VCool,sen_EXP_MaxCtoF_a_VCool,sen_EXP_P_a_VCool,sen_EXP_PCT_a_TCool,sen_EXP_MaxCtoF_a_TCool,sen_EXP_P_a_TCool,sen_EXP_PCT_a_LinPow,sen_EXP_MaxCtoF_a_LinPow,sen_EXP_P_a_LinPow,sen_EXP_PCT_a_HTcoeff,sen_EXP_MaxCtoF_a_HTcoeff,sen_EXP_P_a_HTcoeff,sen_EXP_PCT_a_RodPitch,sen_EXP_MaxCtoF_a_RodPitch,sen_EXP_P_a_RodPitch,sen_REP_PCT_a_VCool,sen_REP_MaxCtoF_a_VCool,sen_REP_P_a_VCool,sen_REP_PCT_a_TCool,sen_REP_MaxCtoF_a_TCool,sen_REP_P_a_TCool,sen_REP_PCT_a_LinPow,sen_REP_MaxCtoF_a_LinPow,sen_REP_P_a_LinPow,sen_REP_PCT_a_HTcoeff,sen_REP_MaxCtoF_a_HTcoeff,sen_REP_P_a_HTcoeff,sen_REP_PCT_a_RodPitch,sen_REP_MaxCtoF_a_RodPitch,avg_EXP_PCT_a,avg_EXP_MaxCtoF_a,avg_EXP_P_a,avg_REP_PCT_a,avg_REP_MaxCtoF_a,avg_REP_P_a,var_EXP_PCT_a,var_EXP_MaxCtoF_a,var_EXP_P_a,var_REP_PCT_a,var_REP_MaxCtoF_a,var_REP_P_a</outputs>
<pointwise_meta>avg_ste_EXP_MaxCtoF_a,avg_ste_EXP_PCT_a,avg_ste_EXP_P_a,avg_ste_HTcoeff,avg_ste_LinPow,avg_ste_REP_MaxCtoF_a,avg_ste_REP_PCT_a,avg_ste_REP_P_a,avg_ste_RodPitch,avg_ste_TCool,avg_ste_VCool,var_ste_EXP_MaxCtoF_a,var_ste_EXP_PCT_a,var_ste_EXP_P_a,var_ste_HTcoeff,var_ste_LinPow,var_ste_REP_MaxCtoF_a,var_ste_REP_PCT_a,var_ste_REP_P_a,var_ste_RodPitch,var_ste_TCool,var_ste_VCool</pointwise_meta>
<sampleTag>RAVEN_sample_ID</sampleTag>
</general>
</DataSet>

</DataObjectMetadata>
157 changes: 157 additions & 0 deletions tests/framework/PostProcessors/BasicStatistics/test_time_dep_meta.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?xml version="1.0" ?>
<Simulation verbosity="debug">

<TestInfo>
<name>framework/PostProcessors/BasicStatistics.timeDepMeta</name>
<author>wangc</author>
<created>2019-05-10</created>
<classesTested>PostProcessors.BasicStatistics, DataObjects.HistorySet</classesTested>
<description>
This test checks the metadata printing of history set, the scalar meta data should be printed with
the input of HistorySet, while vector meta data should be printed with output of HistorySet. See issue #986.
With the fix of #986, The HistorySet will handle scalar and vector meta data separately.
</description>
</TestInfo>

<RunInfo>
<WorkingDir>timeDepMeta</WorkingDir>
<Sequence>Read_raw, SensPost_1, Back_to_MASTER</Sequence>
<internalParallel>True</internalParallel>
<deleteOutExtension>o,plt,rst,csv,i</deleteOutExtension>
</RunInfo>

<Files>
<Input name="myinputfile" type="">RawData</Input>
</Files>

<VariableGroups>
<Group name="GRO_SensPost_in_features_scalar">
VCool,
TCool,
LinPow,
HTcoeff,
RodPitch
</Group>
<Group name="GRO_SensPost_in_targets_Timealigned">
EXP_PCT_a,
EXP_MaxCtoF_a,
EXP_P_a,
REP_PCT_a,
REP_MaxCtoF_a,
REP_P_a
</Group>
<Group name="GRO_SensPost_out_scalar">
avg_VCool,
avg_TCool,
avg_LinPow,
avg_HTcoeff,
avg_RodPitch,
var_VCool,
var_TCool,
var_LinPow,
var_HTcoeff,
var_RodPitch
</Group>
<Group name="GRO_SensPost_out_Timealigned">
sen_EXP_PCT_a_VCool,
sen_EXP_MaxCtoF_a_VCool,
sen_EXP_P_a_VCool,
sen_EXP_PCT_a_TCool,
sen_EXP_MaxCtoF_a_TCool,
sen_EXP_P_a_TCool,
sen_EXP_PCT_a_LinPow,
sen_EXP_MaxCtoF_a_LinPow,
sen_EXP_P_a_LinPow,
sen_EXP_PCT_a_HTcoeff,
sen_EXP_MaxCtoF_a_HTcoeff,
sen_EXP_P_a_HTcoeff,
sen_EXP_PCT_a_RodPitch,
sen_EXP_MaxCtoF_a_RodPitch,
sen_EXP_P_a_RodPitch,
sen_REP_PCT_a_VCool,
sen_REP_MaxCtoF_a_VCool,
sen_REP_P_a_VCool,
sen_REP_PCT_a_TCool,
sen_REP_MaxCtoF_a_TCool,
sen_REP_P_a_TCool,
sen_REP_PCT_a_LinPow,
sen_REP_MaxCtoF_a_LinPow,
sen_REP_P_a_LinPow,
sen_REP_PCT_a_HTcoeff,
sen_REP_MaxCtoF_a_HTcoeff,
sen_REP_P_a_HTcoeff,
sen_REP_PCT_a_RodPitch,
sen_REP_MaxCtoF_a_RodPitch,
sen_REP_P_a_HTcoeff,
avg_EXP_PCT_a,
avg_EXP_MaxCtoF_a,
avg_EXP_P_a,
avg_REP_PCT_a,
avg_REP_MaxCtoF_a,
avg_REP_P_a,
var_EXP_PCT_a,
var_EXP_MaxCtoF_a,
var_EXP_P_a,
var_REP_PCT_a,
var_REP_MaxCtoF_a,
var_REP_P_a
</Group>
</VariableGroups>
<Models>
<PostProcessor name="SensPost_1" subType="BasicStatistics" verbosity="debug">
<pivotParameter>Time_aligned</pivotParameter>
<sensitivity prefix="sen">
<targets>GRO_SensPost_in_targets_Timealigned</targets>
<features>GRO_SensPost_in_features_scalar</features>
</sensitivity>
<expectedValue prefix="avg">
GRO_SensPost_in_targets_Timealigned, GRO_SensPost_in_features_scalar
</expectedValue>
<variance prefix="var">
GRO_SensPost_in_targets_Timealigned, GRO_SensPost_in_features_scalar
</variance>
</PostProcessor>
</Models>

<DataObjects>
<HistorySet name="SET_SensPost_aligned">
<Input>GRO_SensPost_in_features_scalar</Input>
<Output>GRO_SensPost_in_targets_Timealigned</Output>
<options>
<pivotParameter>Time_aligned</pivotParameter>
</options>
</HistorySet>
<HistorySet name="SET_Back_to_MASTER">
<Input>GRO_SensPost_out_scalar</Input>
<Output>GRO_SensPost_out_Timealigned</Output>
<options>
<pivotParameter>Time_aligned</pivotParameter>
</options>
</HistorySet>
</DataObjects>

<Steps>
<IOStep name="Read_raw">
<Input class="Files" type="csv">myinputfile</Input>
<Output class="DataObjects" type="HiostorySet">SET_SensPost_aligned</Output>
</IOStep>

<PostProcess name="SensPost_1">
<Input class="DataObjects" type="HistorySet">SET_SensPost_aligned</Input>
<Model class="Models" type="PostProcessor">SensPost_1</Model>
<Output class="DataObjects" type="HistorySet">SET_Back_to_MASTER</Output>
</PostProcess>

<IOStep name="Back_to_MASTER">
<Input class="DataObjects" type="HistorySet">SET_Back_to_MASTER</Input>
<Output class="OutStreams" type="Print">Back_to_MASTER</Output>
</IOStep>
</Steps>

<OutStreams>
<Print name="Back_to_MASTER">
<type>csv</type>
<source>SET_Back_to_MASTER</source>
</Print>
</OutStreams>
</Simulation>

0 comments on commit 9849909

Please sign in to comment.