Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to return indexes in conditional abundance matching #913

Merged
merged 12 commits into from Jun 8, 2018
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -43,11 +43,12 @@ distribute-*.tar.gz
# Other
.*.swp
*~
temporary_testing_script.py
temporary_testing_script.py
*.ipynb
.ipynb_checkpoints/*
*/.ipynb_checkpoints/*
*/*/.ipynb_checkpoints/*
.pytest_cache/*

# Mac OSX
.DS_Store
46 changes: 35 additions & 11 deletions halotools/empirical_models/abunmatch/bin_free_cam.py
Expand Up @@ -3,12 +3,12 @@
import numpy as np
from ...utils import unsorting_indices
from ...utils.conditional_percentile import _check_xyn_bounds, rank_order_function
from .engines import cython_bin_free_cam_kernel
from .engines import cython_bin_free_cam_kernel, get_value_at_rank
from .tests.naive_python_cam import sample2_window_indices


def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
assume_x_is_sorted=False, assume_x2_is_sorted=False):
assume_x_is_sorted=False, assume_x2_is_sorted=False, return_indexes=False):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the best api is - another option would be to have two functions:
conditional_abunmatch and conditional_abunmatch_indexes. I don't know how this is done elsewhere in halotools/how you would like to do it going forward. Your call :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only time I prefer creating an entirely new function is in cases where adding in some new feature considerably slows performance; if performance is mission-critical to the original function, I just write a new one. The additional algebraic operations added here are minor enough that I think what you've done here is preferable.

r"""
Given a set of input points with primary property `x` and secondary property `y`,
use conditional abundance matching to map new values `ynew` onto the input points
Expand Down Expand Up @@ -78,6 +78,9 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
`~halotools.empirical_models.conditional_abunmatch_bin_based`.

"""
if (return_indexes and add_subgrid_noise):
raise ValueError("Can't add subgrid noise when returning indexes")

x, y, nwin = _check_xyn_bounds(x, y, nwin)
x2, y2, nwin = _check_xyn_bounds(x2, y2, nwin)
nhalfwin = int(nwin/2)
Expand All @@ -102,27 +105,48 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
i2_matched = np.searchsorted(x2_sorted, x_sorted).astype('i4')

result = np.array(cython_bin_free_cam_kernel(
y_sorted, y2_sorted, i2_matched, nwin, int(add_subgrid_noise)))
y_sorted, y2_sorted, i2_matched, nwin, int(add_subgrid_noise), int(return_indexes)))

# Finish the leftmost points in pure python
iw = 0
leftmost_window_ranks = rank_order_function(y_sorted[:nwin])
for ix1 in range(0, nhalfwin):
iy2_low, iy2_high = sample2_window_indices(ix1, x_sorted, x2_sorted, nwin)
leftmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
leftmost_window_ranks = rank_order_function(y_sorted[:nwin])
result[ix1] = leftmost_sorted_window_y2[leftmost_window_ranks[iw]]

if return_indexes:
leftmost_window_sorting_indexes = np.argsort(y2_sorted[iy2_low:iy2_high])
result[ix1] = iy2_low + leftmost_window_sorting_indexes[
leftmost_window_ranks[iw]
]
else:
leftmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
result[ix1] = get_value_at_rank(leftmost_sorted_window_y2, leftmost_window_ranks[iw], nwin, int(add_subgrid_noise))

iw += 1

# Finish the rightmost points in pure python
iw = nhalfwin + 1
rightmost_window_ranks = rank_order_function(y_sorted[-nwin:])
for ix1 in range(npts1-nhalfwin, npts1):
iy2_low, iy2_high = sample2_window_indices(ix1, x_sorted, x2_sorted, nwin)
rightmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
rightmost_window_ranks = rank_order_function(y_sorted[-nwin:])
result[ix1] = rightmost_sorted_window_y2[rightmost_window_ranks[iw]]

if return_indexes:
rightmost_window_sorting_indexes = np.argsort(y2_sorted[iy2_low:iy2_high])
result[ix1] = iy2_low + rightmost_window_sorting_indexes[
rightmost_window_ranks[iw]
]
else:
rightmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
result[ix1] = get_value_at_rank(rightmost_sorted_window_y2, rightmost_window_ranks[iw], nwin, int(add_subgrid_noise))
iw += 1

if assume_x_is_sorted:

if return_indexes:
# The result indexes point to the location in y2_sorted. Undo that if required
result = result if assume_x2_is_sorted else idx_x2_sorted[result]
# The result indexes are ordered like y_sorted. Undo that if required
result = result if assume_x_is_sorted else result[unsorting_indices(idx_x_sorted)]
return result
else:
return result[unsorting_indices(idx_x_sorted)]
# The result values are ordered like y_sorted, Undo that if required
return result if assume_x_is_sorted else result[unsorting_indices(idx_x_sorted)]
2 changes: 1 addition & 1 deletion halotools/empirical_models/abunmatch/engines/__init__.py
@@ -1 +1 @@
from .bin_free_cam_kernel import cython_bin_free_cam_kernel
from .bin_free_cam_kernel import cython_bin_free_cam_kernel, get_value_at_rank
111 changes: 68 additions & 43 deletions halotools/empirical_models/abunmatch/engines/bin_free_cam_kernel.pyx
Expand Up @@ -6,7 +6,7 @@ import numpy as np
cimport cython
from ....utils import unsorting_indices

__all__ = ('cython_bin_free_cam_kernel', )
__all__ = ('cython_bin_free_cam_kernel', 'get_value_at_rank')


cdef double random_uniform():
Expand Down Expand Up @@ -110,11 +110,42 @@ def setup_initial_indices(int iy, int nwin, int npts):
return iy, init_iy_low, init_iy_high


@cython.boundscheck(False)
@cython.nonecheck(False)
@cython.wraparound(False)
cdef int _find_index(int[:] arr, int val):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _find_index function implements a blind lookup for a value in the correspondence indices array. The cython is clean (though watch out for the undeclared index variable on line 265), but I think the algorithm can be improved by taking advantage of the binary search that is already implemented, no? While the correspondence_indx2 array is not sorted, the sorted_cdf_values2 maintains a sorted order. If an original array of indices were defined at the very beginning of the engine, simply with np.arange(npts2), couldn't that array be manipulated alongside sorted_cdf_values2 to avoid the blind lookup? Naively, it seems this would be faster; it could be that the performance hit from the additional manipulations wash out the improvement, I don't know, but it seems worth testing since the cythonized binary search machinery is already in place. Let me know whether this makes sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to make sure I understand what you are suggesting:
In the same way that we have the correspondence_indx2 array to keep track of how to unsort the sorted_cdf_values2, we can have a another array that keeps track of where things are in the correspondence_indx2? That would make this _find_index just a lookup in that map.

I think that this has roughly the same time complexity - when we update the correspondence indicies (to shift them along) we will need to update this array too and that would be roughly O(len(nwin)) which is I think what we have now.

But I could well have missed something... I definitely don't fully understand all the internals.

(index cdefed in f05be1c)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may be right, this might not be worth the trouble. After cleaning the cython of the remaining python interactions as discussed elsewhere, could you just do a simple timing test comparing your implementation against what is currently in master? If runtimes are comparable, then it means that further optimization is not worth spending time on.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the timing test, just make sure to use at least ~1e6 points

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the new code

Not returning indexes 143.03s
Returning indexes 171.74s

In master

Not returning indexes 150.37s
... TypeError: conditional_abunmatch() got an unexpected keyword argument 'return_indexes'                  

So returning indexes is ~20% slower, probably due to that extra linear search over the window. I used 1e7 points in both x and y and a 10001 sized window.

But no change in the returning values case.

for i in range(len(arr)):
if arr[i] == val:
return i
return -1


# Public version of _get_value_at_rank as we need to call this from python.
@cython.boundscheck(False)
@cython.nonecheck(False)
@cython.wraparound(False)
def get_value_at_rank(double[:] sorted_values, int rank1, int nwin, int add_subgrid_noise):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, quick implementation comment about this get_value_at_rank function. This function is declared with def rather than cdef, which means get_value_at_rank returns a python object. If you run cython -a bin_free_cam_kernel.pyx and inspect the html output, you will see deep yellow on line 270. Click on it and see all the code cython generates to compile this line. To fix this, get_value_at_rank needs to be C-declared. See, e.g., the definition of _bisect_left_kernel. With this change, this means that get_value_at_rank will no longer be accessible by the outside world, but only accessible within cython, so you will not be permitted to add it to __all__.

In general, it's good to get in the habit of always running cython -a whenever reviewing cython code. Because cython is a superset of python, the compiler will allow you to write code with avoidable python-layer interactions without complaining, so you just have to be vigilant and always check this manually.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I defed it so that it would be accessible to the code that does the left and right edges.

But maybe a better way would be to have an internal cdef int _get_value_at_rank and then an external def get_value_at_rank that just wraps this?

Running cython -a is definitely a good idea

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bin_free_cam_kernel.html looks clean as of this commit. I also just did a simple timing test, comparing your branch to the current version of master, and it looks like there is a nearly negligible performance increase from this feature, so I think this PR is all set once it passes CI.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sweet, thanks for the comments!

return _get_value_at_rank(sorted_values, rank1, nwin, add_subgrid_noise)

@cython.boundscheck(False)
@cython.nonecheck(False)
@cython.wraparound(False)
cdef double _get_value_at_rank(double[:] sorted_values, int rank1, int nwin, int add_subgrid_noise):
if add_subgrid_noise == 0:
return sorted_values[rank1]
else:
low_rank = max(rank1 - 1, 0)
high_rank = min(rank1 + 1, nwin - 1)
low_cdf = sorted_values[low_rank]
high_cdf = sorted_values[high_rank]
return low_cdf + (high_cdf-low_cdf)*random_uniform()


@cython.boundscheck(False)
@cython.nonecheck(False)
@cython.wraparound(False)
def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int nwin,
int add_subgrid_noise=0):
int add_subgrid_noise, int return_indexes):
""" Kernel underlying the bin-free implementation of conditional abundance matching.
For the i^th element of y1, we define a window of length `nwin` surrounding the
point y1[i], and another window surrounding y2[i2_match[i]]. We calculate the
Expand Down Expand Up @@ -159,7 +190,9 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
cdef int idx_in1, idx_out1, idx_in2, idx_out2
cdef double value_in1, value_out1, value_in2, value_out2

cdef double[:] y1_new = np.zeros(npts1, dtype='f8') - 1
cdef int[:] y1_new_indexes = np.zeros(npts1, dtype='i4') - 1
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to define these in if statements:

if return_indexes:
    cdef int[:] y1_new_indexes = np.zeros(npts1, dtype='i4') - 1
else:
    cdef double[:] y1_new_values = np.zeros(npts1, dtype='f8') - 1

but that wasn't allowed?

Defining both is a bit unnecessary, but I couldn't find a nice way around it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I don't know a way around this either. This seems fine.

cdef double[:] y1_new_values = np.zeros(npts1, dtype='f8') - 1

cdef int rank1, rank2

# Set up initial window arrays for y1
Expand All @@ -178,6 +211,7 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
cdef int iy2_max = npts2 - nhalfwin - 1

cdef int low_rank, high_rank
cdef int index
cdef double low_cdf, high_cdf

# Ensure that any bookkeeping error in setting up the window
Expand All @@ -201,58 +235,47 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
for iy1 in range(nhalfwin, npts1-nhalfwin):

rank1 = correspondence_indx1[nhalfwin]
iy2_match = i2_match[iy1]
# Where to center the second window (making sure we don't slide off the end)
iy2_match = min(i2_match[iy1], iy2_max)

# Stop updating the second window once we reach npts2-nwin/2
if iy2_match > iy2_max:
iy2_match = iy2_max

if iy2 > iy2_max:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure that this shouldn't be possible and this set of if if else block confused me a bit at first. I think this reads a bit easier now but I'm happy to revert if you disagree.

Copy link
Contributor

@aphearin aphearin Jun 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed this is a little easier to read, but min returns a python object, so it's better practice to use a c-imported min function instead:

from libc.math cimport fmin as c_min

After looking at the cython -a output, in this case I'm pretty sure the cython compiler figures it out anyway and there is no performance hit, so just pointing this out so you are aware of python-cython interactions.

Also watch out for this on lines 130 & 131 inside the definition of get_value_at_rank

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, as these are ints I don't think there is a min/max func in libc.math for them. (checked https://github.com/cython/cython/blob/master/Cython/Includes/libc/math.pxd)

http://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html suggests that cython has a fast min/max for ints (as you saw in the cython -a output).

iy2 = iy2_max
else:
# Continue to slide the window along the second array
# until we find the matching point, updating the window with each iteration
while iy2 < iy2_match:
# Continue to slide the window along the second array
# until we find the matching point, updating the window with each iteration
while iy2 < iy2_match:

# Find the value coming in and the value coming out
value_in2 = y2[iy2 + nhalfwin + 1]
idx_out2 = correspondence_indx2[nwin-1]
value_out2 = sorted_cdf_values2[idx_out2]
# Find the value coming in and the value coming out
value_in2 = y2[iy2 + nhalfwin + 1]
idx_out2 = correspondence_indx2[nwin-1]
value_out2 = sorted_cdf_values2[idx_out2]

# Find the position where we will insert the new point into the second window
idx_in2 = _bisect_left_kernel(sorted_cdf_values2, value_in2)
if value_in2 > value_out2:
idx_in2 -= 1
# Find the position where we will insert the new point into the second window
idx_in2 = _bisect_left_kernel(sorted_cdf_values2, value_in2)
if value_in2 > value_out2:
idx_in2 -= 1

# Update the correspondence array
_insert_first_pop_last_kernel(&correspondence_indx2[0], idx_in2, nwin)
for j in range(1, nwin):
idx2 = correspondence_indx2[j]
correspondence_indx2[j] += _correspondence_indices_shift(
idx_in2, idx_out2, idx2)
# Update the correspondence array
_insert_first_pop_last_kernel(&correspondence_indx2[0], idx_in2, nwin)
for j in range(1, nwin):
idx2 = correspondence_indx2[j]
correspondence_indx2[j] += _correspondence_indices_shift(
idx_in2, idx_out2, idx2)

# Update the CDF window
_insert_pop_kernel(&sorted_cdf_values2[0], idx_in2, idx_out2, value_in2)
# Update the CDF window
_insert_pop_kernel(&sorted_cdf_values2[0], idx_in2, idx_out2, value_in2)

iy2 += 1
iy2 += 1

# The array sorted_cdf_values2 is now centered on the correct point of y2
# We have already calculated the rank-order of the point iy1, rank1
# So we either directly map sorted_cdf_values2[rank1] to ynew,
# or alternatively we randomly draw a value between
# sorted_cdf_values2[rank1-1] and sorted_cdf_values2[rank1+1]
if add_subgrid_noise == 0:
y1_new[iy1] = sorted_cdf_values2[rank1]
if return_indexes == 1:
index = _find_index(correspondence_indx2, rank1)
if index == -1:
raise Exception("Index {} not found in correspondence_indx2".format(rank1))
y1_new_indexes[iy1] = iy2 + nhalfwin - index
else:
low_rank = rank1 - 1
high_rank = rank1 + 1
if low_rank < 0:
low_rank = 0
elif high_rank >= nwin:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should just be if rather than elif? That is what I have in the function (or the same with min/max)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, no problem at all to leave as is - mostly I was pointing this out for pedagogical reasons: since this is one of your first cython function contributions I thought it might be helpful to be explicitly aware of all python-layer interactions. But cython -a does reveal that the cython compiler has this very well cleaned.

high_rank = nwin - 1
low_cdf = sorted_cdf_values2[low_rank]
high_cdf = sorted_cdf_values2[high_rank]
y1_new[iy1] = low_cdf + (high_cdf-low_cdf)*random_uniform()
y1_new_values[iy1] = _get_value_at_rank(sorted_cdf_values2, rank1, nwin, add_subgrid_noise)

# Move on to the next value in y1

Expand All @@ -276,4 +299,6 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
# Update the CDF window
_insert_pop_kernel(&sorted_cdf_values1[0], idx_in1, idx_out1, value_in1)

return y1_new
if return_indexes:
return y1_new_indexes
return y1_new_values
41 changes: 41 additions & 0 deletions halotools/empirical_models/abunmatch/tests/test_bin_free_cam.py
Expand Up @@ -2,13 +2,15 @@
"""
import numpy as np
from astropy.utils.misc import NumpyRNGContext
import pytest
from ..bin_free_cam import conditional_abunmatch
from ....utils.conditional_percentile import cython_sliding_rank, rank_order_function
from .naive_python_cam import pure_python_rank_matching
from ....utils import unsorting_indices


fixed_seed = 43
fixed_seed2 = 44


def test1():
Expand Down Expand Up @@ -384,11 +386,13 @@ def test_subgrid_noise1():
result2 = conditional_abunmatch(x, y, x2, y2, nwin1, add_subgrid_noise=True)
assert np.allclose(result, result2, atol=0.1)
assert not np.allclose(result, result2, atol=0.02)
assert np.all(result - result2 != 0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be enough to assert that we are adding noise to the leftmost/rightmost edges.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this actually ensure that noise was added to the edges? Or just that noise was added somewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it ensures that noise was added to every point

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're right, upon second look I see that now.


nwin2 = 1001
result = conditional_abunmatch(x, y, x2, y2, nwin2, add_subgrid_noise=False)
result2 = conditional_abunmatch(x, y, x2, y2, nwin2, add_subgrid_noise=True)
assert np.allclose(result, result2, atol=0.02)
assert np.all(result - result2 != 0)


def test_initial_sorting1():
Expand Down Expand Up @@ -503,3 +507,40 @@ def test_initial_sorting4():
assume_x_is_sorted=True, assume_x2_is_sorted=True,
add_subgrid_noise=False)
assert np.allclose(result, result4[unsorting_indices(idx_x_sorted)])

def test_no_subgrid_noise_with_return_indexes():
x, y = np.arange(5), np.arange(5)
x2, y2 = np.arange(10), np.arange(10)
nwin = 3
with pytest.raises(ValueError) as err:
conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True, return_indexes=True)
assert str(err.value) == "Can't add subgrid noise when returning indexes"


def test_return_indexes():
n1, n2 = int(1e2), int(1e2)

with NumpyRNGContext(fixed_seed):
x = np.random.uniform(0, 10, n1)
y = np.random.uniform(0, 1, n1)

with NumpyRNGContext(fixed_seed2):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I used the same seed here as above tests were passing when they shouldn't (before I had correctly reordered/reindexed the indexes). I think that was because we were in the special case of x == x2 and y == y2 + c.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, yes, good catch, sometimes I have to use different seeds for such purposes as well.

x2 = np.random.uniform(0, 10, n2)
y2 = np.random.uniform(-4, -3, n2)

nwin = 9
for sorted_x in [False, True]:
for sorted_x2 in [False, True]:
x_, y_, x2_, y2_ = x, y, x2, y2
if sorted_x:
x_, y_ = np.sort(x_), np.sort(y_)
if sorted_x2:
x2_, y2_ = np.sort(x2_), np.sort(y2_)


values = conditional_abunmatch(x_, y_, x2_, y2_, nwin, add_subgrid_noise=False,
assume_x_is_sorted=sorted_x, assume_x2_is_sorted=sorted_x2, return_indexes=False)
indexes = conditional_abunmatch(x_, y_, x2_, y2_, nwin, add_subgrid_noise=False,
assume_x_is_sorted=sorted_x, assume_x2_is_sorted=sorted_x2, return_indexes=True)

assert np.all(y2_[indexes] == values), "{}, {}".format(sorted_x, sorted_x2)