Skip to content

Commit

Permalink
Implement binned ntuple diff
Browse files Browse the repository at this point in the history
I don’t really like this because it gives infinities when the reference
histogram has zero in a bin. This will be a problem when low statistics
are used (which is the case for the TOPAS module tests).
  • Loading branch information
David Hall committed Jun 15, 2015
1 parent 1e80498 commit 25a98b2
Showing 1 changed file with 60 additions and 21 deletions.
81 changes: 60 additions & 21 deletions nrtest/diff/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,69 @@
from .factory import register, NumericDiff, DiffException


@register('array')
class ArrayDiff(NumericDiff):

def __init__(self, *args, **kwargs):
def __init__(self, is_binary, delimiter, compare_hist, *args, **kwargs):
super(ArrayDiff, self).__init__(*args, **kwargs)

self.is_binary = is_binary
self.delimiter = delimiter
self.compare_hist = compare_hist

self.data_t = self._read_file(self.path_t)
self.data_r = self._read_file(self.path_r)
self.diff = None

if compare_hist:
if self.data_t.shape[1] != self.data_r.shape[1]:
raise DiffException('Arrays have different number of columns')

n_cols = self.data_t.shape[1]
for iCol in range(n_cols):
col_diff = self._compare_column(self.data_t[:, iCol],
self.data_r[:, iCol])

if self.diff is None:
self.diff = col_diff
else:
self.diff = np.concatenate((self.diff, col_diff), axis=1)

print self.diff

else:
self.diff = self._compare_array(self.data_t, self.data_r)

def _read_file(self, path):
if self.is_binary:
return np.fromfile(path)
else:
return np.loadtxt(path, delimiter=self.delimiter, ndmin=1)

def _compare_column(self, col_t, col_r):
val_max = max(np.amax(col_t), np.amax(col_r))
val_min = min(np.amin(col_t), np.amin(col_r))
n_bins = 5

hist_t, _ = np.histogram(col_t, bins=n_bins, range=(val_min, val_max),
density=True)
hist_r, _ = np.histogram(col_r, bins=n_bins, range=(val_min, val_max),
density=True)

if self.data_t.shape != self.data_r.shape:
hist_t = hist_t.reshape((-1, 1))
hist_r = hist_r.reshape((-1, 1))

return self._compare_array(hist_t, hist_r)

def _compare_array(self, data_t, data_r):
if data_t.shape != data_r.shape:
raise DiffException('Inconsistent array shape')

np.seterr(divide='ignore', invalid='ignore')
self.diff = np.absolute((self.data_t - self.data_r) / self.data_r)
both_zero_ind = np.nonzero((self.data_t == 0) & (self.data_r == 0))
self.diff[both_zero_ind] = 0
diff = np.absolute((data_t - data_r) / data_r)
both_zero_ind = np.nonzero((data_t == 0) & (data_r == 0))
diff[both_zero_ind] = 0

def _read_file(self, path):
return np.loadtxt(path, ndmin=1)
return diff

def max(self):
return np.amax(self.diff)
Expand All @@ -33,21 +77,16 @@ def mean(self):
return np.mean(self.diff)


@register('csv')
class CsvDiff(ArrayDiff):
@register('array')
def default(path_t, path_r):
return ArrayDiff(False, None, True, path_t, path_r)

def __init__(self, *args, **kwargs):
super(CsvDiff, self).__init__(*args, **kwargs)

def _read_file(self, path):
return np.loadtxt(path, delimiter=',', ndmin=1)
@register('csv')
def csv(path_t, path_r):
return ArrayDiff(False, ',', False, path_t, path_r)


@register('binarray')
class BinaryArrayDiff(ArrayDiff):

def __init__(self, *args, **kwargs):
super(BinaryArrayDiff, self).__init__(*args, **kwargs)

def _read_file(self, path):
return np.fromfile(path)
def binary(path_t, path_r):
return ArrayDiff(True, None, False, path_t, path_r)

0 comments on commit 25a98b2

Please sign in to comment.