Skip to content

Commit

Permalink
[SPARK-2470] PEP8 fixes to statcounter.py
Browse files Browse the repository at this point in the history
  • Loading branch information
nchammas committed Jul 20, 2014
1 parent d644477 commit b3b96cf
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions python/pyspark/statcounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@
import copy
import math


class StatCounter(object):

def __init__(self, values=[]):
self.n = 0L # Running count of our values
self.mu = 0.0 # Running mean of our values
self.m2 = 0.0 # Running variance numerator (sum of (x - mean)^2)
self.maxValue = float("-inf")
self.minValue = float("inf")

for v in values:
self.merge(v)

# Add a value into this StatCounter, updating the internal statistics.
def merge(self, value):
delta = value - self.mu
Expand All @@ -42,15 +43,15 @@ def merge(self, value):
self.maxValue = value
if self.minValue > value:
self.minValue = value

return self

# Merge another StatCounter into this one, adding up the internal statistics.
def mergeStats(self, other):
if not isinstance(other, StatCounter):
raise Exception("Can only merge Statcounters!")

if other is self: # reference equality holds
if other is self: # reference equality holds
self.merge(copy.deepcopy(other)) # Avoid overwriting fields in a weird order
else:
if self.n == 0:
Expand All @@ -59,19 +60,19 @@ def mergeStats(self, other):
self.n = other.n
self.maxValue = other.maxValue
self.minValue = other.minValue
elif other.n != 0:

elif other.n != 0:
delta = other.mu - self.mu
if other.n * 10 < self.n:
self.mu = self.mu + (delta * other.n) / (self.n + other.n)
elif self.n * 10 < other.n:
self.mu = other.mu - (delta * self.n) / (self.n + other.n)
else:
self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n)

self.maxValue = max(self.maxValue, other.maxValue)
self.minValue = min(self.minValue, other.minValue)

self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n)
self.n += other.n
return self
Expand All @@ -94,7 +95,7 @@ def min(self):

def max(self):
return self.maxValue

# Return the variance of the values.
def variance(self):
if self.n == 0:
Expand Down Expand Up @@ -124,5 +125,5 @@ def sampleStdev(self):
return math.sqrt(self.sampleVariance())

def __repr__(self):
return "(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" % (self.count(), self.mean(), self.stdev(), self.max(), self.min())

return "(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" %
(self.count(), self.mean(), self.stdev(), self.max(), self.min())

0 comments on commit b3b96cf

Please sign in to comment.