Skip to content

Commit

Permalink
Fix: statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
janpipek committed Jun 3, 2016
1 parent 568506a commit f905bac
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
15 changes: 9 additions & 6 deletions physt/histogram1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,16 @@ def mean(self):
else:
return None # TODO: or error

def std(self):
def std(self, ddof=0):
# TODO: Add DOF
if self._stats:
return np.sqrt(self.variance())
return np.sqrt(self.variance(ddof=ddof))
else:
return None # TODO: or error

def variance(self):
def variance(self, ddof=0):
# TODO: Add DOF
# http://stats.stackexchange.com/questions/6534/how-do-i-calculate-a-weighted-standard-deviation-in-excel
if self._stats:
if self.total > 0:
return (self._stats["sum2"] - self._stats["sum"] ** 2 / self.total) / self.total
Expand Down Expand Up @@ -323,7 +326,7 @@ def fill(self, value, weight=1):
self._errors2[ixbin] += weight ** 2
if self._stats:
self._stats["sum"] += weight * value
self._stats["sum2"] += (weight * value) ** 2
self._stats["sum2"] += weight * value ** 2
return ixbin

def plot(self, histtype='bar', cumulative=False, density=False, errors=False, backend="matplotlib", ax=None, **kwargs):
Expand Down Expand Up @@ -809,7 +812,7 @@ def calculate_frequencies(data, bins, weights=None, validate_bins=True, already_
frequencies[xbin] = weights[start:stop].sum()
errors2[xbin] = (weights[start:stop] ** 2).sum()
sum += (data[start:stop] * weights[start:stop]).sum()
sum2 += ((data[start:stop] * weights[start:stop]) ** 2).sum()
sum2 += ((data[start:stop]) ** 2 * weights[start:stop]).sum()

# Underflow and overflow don't make sense for unconsecutive binning.
if not bin_utils.is_consecutive(bins):
Expand Down Expand Up @@ -870,4 +873,4 @@ def fill(self, value, weight=1.0):
self._errors2[bin] += weight ** 2

self._stats["sum"] += weight * value
self._stats["sum2"] += (weight * value) ** 2
self._stats["sum2"] += weight * value ** 2
34 changes: 34 additions & 0 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
import numpy as np
import pytest
import physt
from physt.histogram1d import calculate_frequencies

values = [1, 2, 3, 4]
weights = [1, 1, 1, 2]
h = physt.h1(values)
hw = physt.h1(values, weights=weights)

class TestStatistics(object):
def test_stats_filled_in(self):
assert h._stats["sum"] == 10
assert h._stats["sum2"] == 30

def test_mean_no_weights(self):
assert h.mean() == 2.5

def test_std_no_weights(self):
assert np.allclose(h.std(), np.sqrt(5/4))

def test_mean_weights(self):
assert hw.mean() == 2.8

def test_std_weights(self):
print(hw.std())
assert np.allclose(hw.std(), np.sqrt(6.8 / 5))


if __name__ == "__main__":
pytest.main(__file__)

0 comments on commit f905bac

Please sign in to comment.