From a21d579347334a3fbadb71e331ea24ea64c9b325 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:20:49 +0530 Subject: [PATCH 1/7] Add toInt helper method --- histogrammar/primitives/count.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/histogrammar/primitives/count.py b/histogrammar/primitives/count.py index 229d7ad..c91e2a9 100644 --- a/histogrammar/primitives/count.py +++ b/histogrammar/primitives/count.py @@ -194,6 +194,13 @@ def _numpy(self, data, weights, shape): def _sparksql(self, jvm, converter): return converter.Count() # TODO: handle transform + def toInt(count): + """Return intiger value of count""" + value = str(count) + end = len(value) - 1 + value = float(value[7:end]) + return int(value) + @property def children(self): """List of sub-aggregators, to make it possible to walk the tree.""" From 3a6982eaaaa655e228d1890755d878386ea6b572 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:24:56 +0530 Subject: [PATCH 2/7] Add _toArray helper method --- histogrammar/primitives/bin.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/histogrammar/primitives/bin.py b/histogrammar/primitives/bin.py index 3e47eec..cb6b487 100644 --- a/histogrammar/primitives/bin.py +++ b/histogrammar/primitives/bin.py @@ -472,6 +472,17 @@ def _numpy(self, data, weights, shape): def _sparksql(self, jvm, converter): return converter.Bin(len(self.values), self.low, self.high, self.quantity.asSparkSQL(), self.values[0]._sparksql(jvm, converter), self.underflow._sparksql(jvm, converter), self.overflow._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter)) + def _toArray(Bin): + """Converts Bin to array of frequencies""" + values = [None] * int(len(Bin.values)) + i = 0 + for value in Bin.values: + value = str(value) + end = len(value) - 1 + values[i] = float(value[7:end]) + i += 1 + return values + @property def children(self): """List of sub-aggregators, to make it possible to walk the tree.""" From c1c561267e561971038a02b1f9be497b3332a529 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:27:15 +0530 Subject: [PATCH 3/7] Fix toInt, by accepting self --- histogrammar/primitives/count.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/histogrammar/primitives/count.py b/histogrammar/primitives/count.py index c91e2a9..7525d7c 100644 --- a/histogrammar/primitives/count.py +++ b/histogrammar/primitives/count.py @@ -194,9 +194,9 @@ def _numpy(self, data, weights, shape): def _sparksql(self, jvm, converter): return converter.Count() # TODO: handle transform - def toInt(count): + def toInt(self): """Return intiger value of count""" - value = str(count) + value = str(self) end = len(value) - 1 value = float(value[7:end]) return int(value) From 090f24b472c77e8e9263413cadaedd15f8bc463b Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:28:07 +0530 Subject: [PATCH 4/7] Fix toArray, by accepting self --- histogrammar/primitives/bin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/histogrammar/primitives/bin.py b/histogrammar/primitives/bin.py index cb6b487..49fe47e 100644 --- a/histogrammar/primitives/bin.py +++ b/histogrammar/primitives/bin.py @@ -472,11 +472,11 @@ def _numpy(self, data, weights, shape): def _sparksql(self, jvm, converter): return converter.Bin(len(self.values), self.low, self.high, self.quantity.asSparkSQL(), self.values[0]._sparksql(jvm, converter), self.underflow._sparksql(jvm, converter), self.overflow._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter)) - def _toArray(Bin): + def _toArray(self): """Converts Bin to array of frequencies""" - values = [None] * int(len(Bin.values)) + values = [None] * int(len(self.values)) i = 0 - for value in Bin.values: + for value in self.values: value = str(value) end = len(value) - 1 values[i] = float(value[7:end]) From 96ae2bfd4bc09bc20e8851984cb1ed266bee8b70 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:36:37 +0530 Subject: [PATCH 5/7] Add ascii command Prints an ascii histogram, similar to ascii method in histogrammar scala library. --- histogrammar/primitives/bin.py | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/histogrammar/primitives/bin.py b/histogrammar/primitives/bin.py index 49fe47e..3ad8bac 100644 --- a/histogrammar/primitives/bin.py +++ b/histogrammar/primitives/bin.py @@ -142,6 +142,51 @@ def __init__(self, num, low, high, quantity, value=Count(), underflow=Count(), o super(Bin, self).__init__() self.specialize() + def ascii(self): + """Prints ascii histogram, for debuging on headless machines""" + underflow = self.underflow.toInt() + overflow = self.overflow.toInt() + nanflow = self.nanflow.toInt() + values = [underflow] + self._toArray() + [overflow, nanflow] + min = values[0] + max = values[0] + + length = len(values) + i = 1 + while i < length: + if values[i] > max: + max = values[i] + elif values[i] < min: + min = values[i] + i += 1 + + # Map values to number of dots representing them (maximum is 63) + range = max - min + prop = 63 / range + + dots = [None] * length + i = 0 + while i < length: + dots[i] = int((values[i] - min)*prop) + i += 1 + + # Get range of values corresponding to each bin + ranges = ["underflow"] + [None] * (length - 3) + ["overflow", "nanflow"] + i = 1 + while i < (length - 2): + ranges[i] = "[" + str(self.range(i))[1:] + i += 1 + + print("{:>19}{:>65}".format(min, max)) + print(" " * 18 + "+" + "-" * 63 + "+") + + i = 0 + while i < length: + print("{:<14}{:<4}{:<65}".format(ranges[i], int(values[i]), "|" + "*" * dots[i] + " " * (63 - dots[i]) + "|")) + i += 1 + + print(" " * 18 + "+" + "-" * 63 + "+") + def histogram(self): """Return a plain histogram by converting all sub-aggregator values into :doc:`Counts `.""" out = Bin(len(self.values), self.low, self.high, self.quantity, None, self.underflow.copy(), self.overflow.copy(), self.nanflow.copy()) From 9846d8e5e18e687d3adf33300e8da41e4ff737b1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Fri, 24 Mar 2017 23:50:18 +0530 Subject: [PATCH 6/7] Combine generation of values and ranges to one loop --- histogrammar/primitives/bin.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/histogrammar/primitives/bin.py b/histogrammar/primitives/bin.py index 3ad8bac..9b0f749 100644 --- a/histogrammar/primitives/bin.py +++ b/histogrammar/primitives/bin.py @@ -161,19 +161,16 @@ def ascii(self): i += 1 # Map values to number of dots representing them (maximum is 63) + # and get range of values corresponding to each bin range = max - min prop = 63 / range dots = [None] * length - i = 0 - while i < length: - dots[i] = int((values[i] - min)*prop) - i += 1 - # Get range of values corresponding to each bin ranges = ["underflow"] + [None] * (length - 3) + ["overflow", "nanflow"] i = 1 while i < (length - 2): + dots[i] = int((values[i] - min)*prop) ranges[i] = "[" + str(self.range(i))[1:] i += 1 From 1bcac10c240e7c7b55a608cf28789f231f4d7bf2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Sartre Date: Sat, 25 Mar 2017 00:08:27 +0530 Subject: [PATCH 7/7] Rollback to 'Add ascii command' I wanted to optimise the program by combining filling of values array and filling of ranges array, but I forgot that I had changed my implementation to not iterate over certain indeces for ranges array. --- histogrammar/primitives/bin.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/histogrammar/primitives/bin.py b/histogrammar/primitives/bin.py index 9b0f749..ab2a93f 100644 --- a/histogrammar/primitives/bin.py +++ b/histogrammar/primitives/bin.py @@ -161,16 +161,19 @@ def ascii(self): i += 1 # Map values to number of dots representing them (maximum is 63) - # and get range of values corresponding to each bin range = max - min prop = 63 / range dots = [None] * length + i = 0 + while i < length: + dots[i] = int((values[i] - min)*prop) + i += 1 + # Get range of values corresponding to each bin ranges = ["underflow"] + [None] * (length - 3) + ["overflow", "nanflow"] i = 1 while i < (length - 2): - dots[i] = int((values[i] - min)*prop) ranges[i] = "[" + str(self.range(i))[1:] i += 1 @@ -632,3 +635,4 @@ def __hash__(self): return hash((self.low, self.high, self.quantity, self.entries, tuple(self.values), self.underflow, self.overflow, self.nanflow)) Factory.register(Bin) +