Skip to content

Commit

Permalink
use saner column / attr names in paircount result.
Browse files Browse the repository at this point in the history
wnpairs : weighted npairs (per bin implied)

total : total (no longer per bin)

paircount.pairs.attrs['total_wnpairs'] shall be prefered than
paircount.attrs['total_wnpairs'] -- leave this change to the next
PR.
  • Loading branch information
rainwoodman committed May 15, 2018
1 parent c90bfd5 commit 167f844
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 47 deletions.
4 changes: 2 additions & 2 deletions nbodykit/algorithms/pair_counters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def __init__(self, mode, edges, first, second, Nmu, pimax, weight, show_progress
wpairs1, wpairs2 = self.comm.allreduce(first.compute(first[weight].sum())), self.comm.allreduce(first.compute((first[weight]**2).sum()))
# for auto excluding self pairs to avoid a biased estimator. The factor 0.5 is by convention.
# In the end it will cancel out in two point function estimators.
self.attrs['weighted_npairs'] = 0.5*(wpairs1**2-wpairs2)
self.attrs['total_wnpairs'] = 0.5*(wpairs1**2-wpairs2)
self.attrs['is_cross'] = False
else:
wpairs1, wpairs2 = self.comm.allreduce(first.compute(first[weight].sum())), self.comm.allreduce(second.compute(second[weight].sum()))
self.attrs['weighted_npairs'] = 0.5*wpairs1*wpairs2
self.attrs['total_wnpairs'] = 0.5*wpairs1*wpairs2
self.attrs['is_cross'] = True

def __getstate__(self):
Expand Down
8 changes: 6 additions & 2 deletions nbodykit/algorithms/pair_counters/corrfunc/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,17 @@ def run(chunk):
if 's' in dims: dims[dims.index('s')] = 'r'

# make a new structured array
dtype = numpy.dtype([(dims[0], 'f8'), ('npairs', 'u8'), ('weightavg', 'f8')])
dtype = numpy.dtype([(dims[0], 'f8'),
('npairs', 'u8'),
('wnpairs', 'f8')])
data = numpy.zeros(pc.shape, dtype=dtype)

# copy over main results
data[dims[0]] = pc[self.binning_dims[0]+'avg']
data['npairs'] = pc['npairs']
data['weightavg'] = pc['weightavg']
data['wnpairs'] = pc['weightavg'] * pc['npairs']
# patch up when there is no pair, the weighted value shall be zero as well.
data['wnpairs'][pc['npairs'] == 0] = 0

# return the BinnedStatistic
return BinnedStatistic(dims, self.edges, data, fields_to_sum=['npairs'])
Expand Down
7 changes: 6 additions & 1 deletion nbodykit/algorithms/pair_counters/mocksurvey.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def run(self):
- :attr:`SurveyDataPairCount.pairs`
self.pairs.attrs['total_wnpairs']: The total of wnpairs.
Attributes
----------
pairs : :class:`~nbodykit.binned_statistic.BinnedStatistic`
Expand All @@ -140,8 +142,10 @@ def run(self):
- ``r``, ``rp``, or ``theta`` : the mean separation value in the bin
- ``npairs``: the number of pairs in the bin
- ``weightavg``: the average weight value in the bin; each pair
- ``wnpairs``: the weighted npairs in the bin; each pair
contributes the product of the individual weight values
"""
from .domain import decompose_survey_data

Expand Down Expand Up @@ -179,6 +183,7 @@ def run(self):

# do the calculation
self.pairs = func(pos1, w1, pos2, w2, **attrs['config'])
self.pairs.attrs['total_wnpairs'] = self.attrs['total_wnpairs']

# squeeze the result if '1d' (single mu bin was used)
if mode == '1d':
Expand Down
3 changes: 2 additions & 1 deletion nbodykit/algorithms/pair_counters/simbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def run(self):
- ``r``, ``rp``, or ``theta`` : the mean separation value in the bin
- ``npairs``: the number of pairs in the bin
- ``weightavg``: the average weight value in the bin; each pair
- ``wnpairs``: the average weight value in the bin; each pair
contributes the product of the individual weight values
"""
# setup
Expand Down Expand Up @@ -216,6 +216,7 @@ def run(self):
# do the calculation
self.pairs = func(pos1, w1, pos2, w2, **attrs['config'])

self.pairs.attrs['total_wnpairs'] = self.attrs['total_wnpairs']

def shift_to_box_center(pos, BoxSize, comm):
"""
Expand Down
12 changes: 6 additions & 6 deletions nbodykit/algorithms/pair_counters/tests/test_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_sim_periodic_auto(comm):
npairs, ravg, wsum = reference_paircount(pos, w, redges, source.attrs['BoxSize'])
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

# test save
r.save('paircount-test.json')
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_sim_nonperiodic_auto(comm):
npairs, ravg, wsum = reference_paircount(pos, w, redges, None)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])


@MPITest([1, 3])
Expand All @@ -122,7 +122,7 @@ def test_sim_periodic_cross(comm):
npairs, ravg, wsum = reference_paircount(pos1, None, redges, first.attrs['BoxSize'], pos2=pos2)
assert_allclose(ravg, r.pairs['r'], rtol=1e-6)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1])
def test_bad_los(comm):
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_survey_auto(comm):
npairs, ravg, wsum = reference_paircount(pos, w, redges, None)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_survey_auto_endianess(comm):
Expand Down Expand Up @@ -257,7 +257,7 @@ def test_survey_auto_endianess(comm):
npairs, ravg, wsum = reference_paircount(pos, w, redges, None)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_survey_cross(comm):
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_survey_cross(comm):
npairs, ravg, wsum = reference_paircount(pos1, w1, redges, None, pos2=pos2, w2=w2)
assert_allclose(ravg, r.pairs['r'], rtol=1e-6)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1])
def test_survey_missing_columns(comm):
Expand Down
12 changes: 6 additions & 6 deletions nbodykit/algorithms/pair_counters/tests/test_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_sim_periodic_auto(comm):
npairs, ravg, wsum = reference_sim_paircount(pos, w, redges, Nmu, source.attrs['BoxSize'])
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([3])
def test_sim_diff_los(comm):
Expand All @@ -105,7 +105,7 @@ def test_sim_diff_los(comm):
npairs, ravg, wsum = reference_sim_paircount(pos, w, redges, Nmu, source.attrs['BoxSize'], los=0)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 3])
def test_sim_nonperiodic_auto(comm):
Expand All @@ -129,7 +129,7 @@ def test_sim_nonperiodic_auto(comm):
npairs, ravg, wsum = reference_sim_paircount(pos, w, redges, Nmu, None)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])


@MPITest([1, 3])
Expand All @@ -154,7 +154,7 @@ def test_sim_periodic_cross(comm):
npairs, ravg, wsum = reference_sim_paircount(pos1, None, redges, Nmu, first.attrs['BoxSize'], pos2=pos2)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_survey_auto(comm):
Expand All @@ -179,7 +179,7 @@ def test_survey_auto(comm):
npairs, ravg, wsum = reference_survey_paircount(pos, w, redges, Nmu)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])


@MPITest([1, 4])
Expand Down Expand Up @@ -209,7 +209,7 @@ def test_survey_cross(comm):
npairs, ravg, wsum = reference_survey_paircount(pos1, w1, redges, Nmu, pos2=pos2, w2=w2)
assert_allclose(ravg, r.pairs['r'])
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

# test save
r.save('paircount-test.json')
Expand Down
8 changes: 4 additions & 4 deletions nbodykit/algorithms/pair_counters/tests/test_angular.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_survey_auto(comm):
# verify with kdcount
npairs, thetaavg, wsum = reference_paircount([ra,dec], w, edges)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_survey_cross(comm):
Expand Down Expand Up @@ -96,7 +96,7 @@ def test_survey_cross(comm):
# verify with kdcount
npairs, thetaavg, wsum = reference_paircount([ra1,dec1], w1, edges, pos2=[ra2,dec2], w2=w2)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

# test save
r.save('angular-paircount-test.json')
Expand Down Expand Up @@ -132,7 +132,7 @@ def test_sim_auto(comm):
# verify with kdcount
npairs, thetaavg, wsum = reference_paircount([ra,dec], w, edges)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_sim_cross(comm):
Expand All @@ -158,7 +158,7 @@ def test_sim_cross(comm):
# verify with kdcount
npairs, thetaavg, wsum = reference_paircount([ra1,dec1], w1, edges, pos2=[ra2,dec2], w2=w2)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wsum, r.pairs['npairs'] * r.pairs['weightavg'])
assert_allclose(wsum, r.pairs['wnpairs'])

# test save
r.save('angular-paircount-test.json')
Expand Down
17 changes: 8 additions & 9 deletions nbodykit/algorithms/pair_counters/tests/test_projected.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def compute_rp_pi(r, i, j):
# initialize the return arrays
npairs = numpy.zeros(shape, dtype='i8')
rpavg = numpy.zeros(shape, dtype='f8')
weightavg = numpy.zeros(shape, dtype='f8')
wnpairs = numpy.zeros(shape, dtype='f8')

# mean rp values
rpavg.flat += numpy.bincount(multi_index, weights=rp, minlength=rpavg.size)
Expand All @@ -98,11 +98,10 @@ def compute_rp_pi(r, i, j):
npairs.flat += numpy.bincount(multi_index, minlength=npairs.size)
npairs = npairs[1:-1,1:-1]

# mean weights
weightavg.flat += numpy.bincount(multi_index, weights=w1[i]*w2[j], minlength=weightavg.size)
weightavg = weightavg[1:-1,1:-1]
wnpairs.flat += numpy.bincount(multi_index, weights=w1[i]*w2[j], minlength=wnpairs.size)
wnpairs = wnpairs[1:-1,1:-1]

return npairs, rpavg/npairs, weightavg/npairs
return npairs, rpavg/npairs, wnpairs


@MPITest([1, 3])
Expand Down Expand Up @@ -213,10 +212,10 @@ def test_survey_auto(comm):
w = gather_data(source, 'Weight')

# verify with kdcount
npairs, ravg, wavg = reference_survey_paircount(pos, w, redges, pimax)
npairs, ravg, wnpairs = reference_survey_paircount(pos, w, redges, pimax)
assert_allclose(ravg, r.pairs['rp'], rtol=1e-5)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wavg, r.pairs['weightavg'])
assert_allclose(wnpairs, r.pairs['wnpairs'])

@MPITest([1, 4])
def test_survey_cross(comm):
Expand All @@ -242,10 +241,10 @@ def test_survey_cross(comm):
w2 = gather_data(second, 'Weight')

# verify with kdcount
npairs, ravg, wavg = reference_survey_paircount(pos1, w1, redges, pimax, pos2=pos2, w2=w2)
npairs, ravg, wnpairs = reference_survey_paircount(pos1, w1, redges, pimax, pos2=pos2, w2=w2)
assert_allclose(ravg, r.pairs['rp'], rtol=1e-3)
assert_allclose(npairs, r.pairs['npairs'])
assert_allclose(wavg, r.pairs['weightavg'])
assert_allclose(wnpairs, r.pairs['wnpairs'])

# test save
r.save('paircount-test.json')
Expand Down
33 changes: 17 additions & 16 deletions nbodykit/algorithms/paircount_tpcf/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,28 +113,29 @@ def get_filling_factor(self):

def __call__(self, NR1, NR2=None):
"""
Evaluate the expected randoms pair counts, and the weighted_npairs, returns
Evaluate the expected randoms pair counts, and the total_weight, returns
as an object that looks like the result of paircount.
"""
edges = [self.edges[d] for d in self.dims] # sequentialize it, poor API!

if NR2 is None:
R1R2 = NR1 ** 2 * self.get_filling_factor()
wnpairs = NR1 * (NR1 - 1) * 0.5
total_wnpairs = NR1 * (NR1 - 1) * 0.5
else:
R1R2 = NR1 * NR2 * self.get_filling_factor()
wnpairs = NR1 * NR2 * 0.5
total_wnpairs = NR1 * NR2 * 0.5

data = numpy.empty_like(R1R2, dtype=[('npairs', 'f8'), ('weightavg', 'f8')])
data = numpy.empty_like(R1R2, dtype=[('npairs', 'f8'), ('wnpairs', 'f8')])
data['npairs'] = R1R2
data['weightavg'] = 1
data['wnpairs'] = R1R2
pairs = WedgeBinnedStatistic(self.dims, edges, data)

R1R2 = lambda : None
R1R2.attrs = {}
R1R2.pairs = pairs
R1R2.attrs['weighted_npairs']= wnpairs
R1R2.attrs['total_wnpairs']= total_wnpairs
R1R2.pairs.attrs['total_wnpairs']= total_wnpairs

return R1R2

Expand Down Expand Up @@ -198,9 +199,9 @@ def LandySzalayEstimator(pair_counter, data1, data2, randoms1, randoms2, R1R2=No
else:
D2R1 = D1R2

fDD = R1R2.attrs['weighted_npairs']/D1D2.attrs['weighted_npairs']
fDR = R1R2.attrs['weighted_npairs']/D1R2.attrs['weighted_npairs']
fRD = R1R2.attrs['weighted_npairs']/D2R1.attrs['weighted_npairs']
fDD = R1R2.attrs['total_wnpairs']/D1D2.attrs['total_wnpairs']
fDR = R1R2.attrs['total_wnpairs']/D1R2.attrs['total_wnpairs']
fRD = R1R2.attrs['total_wnpairs']/D2R1.attrs['total_wnpairs']

nonzero = R1R2.pairs['npairs'] > 0

Expand All @@ -212,10 +213,10 @@ def LandySzalayEstimator(pair_counter, data1, data2, randoms1, randoms2, R1R2=No

# the Landy - Szalay estimator
# (DD - DR - RD + RR) / RR
DD = (D1D2.pairs['npairs']*D1D2.pairs['weightavg'])[nonzero]
DR = (D1R2.pairs['npairs']*D1R2.pairs['weightavg'])[nonzero]
RD = (D2R1.pairs['npairs']*D2R1.pairs['weightavg'])[nonzero]
RR = (R1R2.pairs['npairs']*R1R2.pairs['weightavg'])[nonzero]
DD = (D1D2.pairs['wnpairs'])[nonzero]
DR = (D1R2.pairs['wnpairs'])[nonzero]
RD = (D2R1.pairs['wnpairs'])[nonzero]
RR = (R1R2.pairs['wnpairs'])[nonzero]
xi = (fDD * DD - fDR * DR - fRD * RD)/RR + 1
CF[nonzero] = xi[:]

Expand Down Expand Up @@ -251,9 +252,9 @@ def NaturalEstimator(D1D2):
R1R2 = AnalyticUniformRandoms(mode, D1D2.pairs.dims, D1D2.pairs.edges, BoxSize)(ND1, ND2)

# and compute the correlation function as DD/RR - 1
fDD = R1R2.attrs['weighted_npairs'] / D1D2.attrs['weighted_npairs']
RR = R1R2.pairs['npairs'] * R1R2.pairs['weightavg']
DD = D1D2.pairs['npairs'] * D1D2.pairs['weightavg']
fDD = R1R2.attrs['total_wnpairs'] / D1D2.attrs['total_wnpairs']
RR = R1R2.pairs['wnpairs']
DD = D1D2.pairs['wnpairs']

CF = (DD * fDD) / RR - 1.

Expand Down

0 comments on commit 167f844

Please sign in to comment.