Skip to content

Commit

Permalink
Merge pull request #210 from alimanfoo/pbs-20181012
Browse files Browse the repository at this point in the history
Implement population branching statistic (PBS)
  • Loading branch information
alimanfoo committed Oct 16, 2018
2 parents d91f42e + d55148f commit ed3058c
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 12 deletions.
11 changes: 6 additions & 5 deletions allel/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
from .fst import weir_cockerham_fst, hudson_fst, \
windowed_weir_cockerham_fst, windowed_hudson_fst, patterson_fst, \
windowed_patterson_fst, blockwise_weir_cockerham_fst, \
blockwise_hudson_fst, blockwise_patterson_fst, average_hudson_fst, average_patterson_fst, \
average_weir_cockerham_fst, moving_hudson_fst, moving_patterson_fst, moving_weir_cockerham_fst
blockwise_hudson_fst, blockwise_patterson_fst, average_hudson_fst, \
average_patterson_fst, average_weir_cockerham_fst, moving_hudson_fst, \
moving_patterson_fst, moving_weir_cockerham_fst

from .distance import pairwise_distance, pairwise_dxy, pcoa, \
plot_pairwise_distance, condensed_coords, condensed_coords_between, \
Expand All @@ -36,14 +37,14 @@
from .preprocessing import StandardScaler, CenterScaler, PattersonScaler, get_scaler

from .admixture import patterson_f2, patterson_f3, patterson_d, \
blockwise_patterson_f3, blockwise_patterson_d, average_patterson_d, average_patterson_f3, \
moving_patterson_d, moving_patterson_f3
blockwise_patterson_f3, blockwise_patterson_d, average_patterson_d, \
average_patterson_f3, moving_patterson_d, moving_patterson_f3

from .selection import ehh_decay, voight_painting, xpehh, ihs, \
plot_voight_painting, fig_voight_painting, plot_haplotype_frequencies, \
plot_moving_haplotype_frequencies, haplotype_diversity, \
moving_haplotype_diversity, garud_h, moving_garud_h, nsl, xpnsl, \
standardize, standardize_by_allele_count, moving_delta_tajima_d
standardize, standardize_by_allele_count, moving_delta_tajima_d, pbs

from .sf import sfs, sfs_folded, sfs_scaled, sfs_folded_scaled, \
joint_sfs, joint_sfs_folded, joint_sfs_scaled, joint_sfs_folded_scaled, \
Expand Down
82 changes: 81 additions & 1 deletion allel/stats/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@

from allel.compat import memoryview_safe
from allel.util import asarray_ndim, check_dim0_aligned, check_integer_dtype
from allel.model.ndarray import HaplotypeArray
from allel.model.ndarray import HaplotypeArray, AlleleCountsArray
from allel.stats.window import moving_statistic, index_windows
from allel.stats.diversity import moving_tajima_d
from allel.stats.fst import moving_hudson_fst
from allel.opt.stats import pairwise_shared_prefix_lengths, paint_shared_prefixes, \
ihh01_scan, ihh_scan, nsl01_scan, nsl_scan

Expand Down Expand Up @@ -1259,3 +1260,82 @@ def standardize_by_allele_count(score, aac, bins=None, n_bins=None,
score_standardized[loc] = (score[loc] - m) / s

return score_standardized, bins


def pbs(ac1, ac2, ac3, window_size, window_start=0, window_stop=None,
window_step=None, normed=True):
"""Compute the population branching statistic (PBS) which performs a comparison
of allele frequencies between three populations to detect genome regions that are
unusually differentiated in one population relative to the other two populations.
Parameters
----------
ac1 : array_like, int
Allele counts from the first population.
ac2 : array_like, int
Allele counts from the second population.
ac3 : array_like, int
Allele counts from the third population.
window_size : int
The window size (number of variants) within which to compute PBS values.
window_start : int, optional
The variant index at which to start windowed calculations.
window_stop : int, optional
The variant index at which to stop windowed calculations.
window_step : int, optional
The number of variants between start positions of windows. If not given, defaults
to the window size, i.e., non-overlapping windows.
normed : bool, optional
If True (default), use the normalised version of PBS, also known as PBSn1 [2]_.
Otherwise, use the PBS statistic as originally defined in [1]_.
Returns
-------
pbs : ndarray, float
Windowed PBS values.
Notes
-----
The F\ :sub:`ST` calculations use Hudson's estimator.
References
----------
.. [1] Yi et al., "Sequencing of Fifty Human Exomes Reveals Adaptation to High
Altitude", Science, 329(5987): 75–78, 2 July 2010.
.. [2] Malaspinas et al., "A genomic history of Aboriginal Australia", Nature. volume
538, pages 207–214, 13 October 2016.
"""

# normalise and check inputs
ac1 = AlleleCountsArray(ac1)
ac2 = AlleleCountsArray(ac2)
ac3 = AlleleCountsArray(ac3)
check_dim0_aligned(ac1, ac2, ac3)

# compute fst
fst12 = moving_hudson_fst(ac1, ac2, size=window_size, start=window_start,
stop=window_stop, step=window_step)
fst13 = moving_hudson_fst(ac1, ac3, size=window_size, start=window_start,
stop=window_stop, step=window_step)
fst23 = moving_hudson_fst(ac2, ac3, size=window_size, start=window_start,
stop=window_stop, step=window_step)

# clip fst values to avoid infinite if fst is 1
for x in fst12, fst13, fst23:
np.clip(x, a_min=0, a_max=0.99999, out=x)

# compute fst transform
t12 = -np.log(1 - fst12)
t13 = -np.log(1 - fst13)
t23 = -np.log(1 - fst23)

# compute pbs
ret = (t12 + t13 - t23) / 2

if normed:
# compute pbs normalising constant
norm = 1 + (t12 + t13 + t23) / 2
ret = ret / norm

return ret
20 changes: 19 additions & 1 deletion allel/test/stats/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from allel.test.tools import assert_array_equal, assert_array_nanclose


from allel import ihs, xpehh, nsl, xpnsl, ehh_decay, voight_painting
from allel import ihs, xpehh, nsl, xpnsl, ehh_decay, voight_painting, pbs
from allel.opt.stats import ssl01_scan, nsl01_scan, ihh01_scan,\
ssl2ihh, ihh_scan

Expand Down Expand Up @@ -616,3 +616,21 @@ def test_voight_painting():
[0, 0, 0, 0]]
a, _ = voight_painting(h)
assert_array_equal(e, a)


def test_pbs():

# minimal input data, sanity check for output existence and type
ac1 = [[2, 0], [0, 2], [1, 1], [2, 0], [0, 2]]
ac2 = [[1, 1], [2, 0], [0, 2], [2, 0], [0, 2]]
ac3 = [[0, 2], [1, 1], [2, 0], [2, 0], [0, 2]]
ret = pbs(ac1, ac2, ac3, window_size=2, window_step=1)
assert isinstance(ret, np.ndarray)
assert 1 == ret.ndim
assert 4 == ret.shape[0]
assert 'f' == ret.dtype.kind
# regression check
expect = [0.52349464, 0., -0.85199356, np.nan]
assert_array_nanclose(expect, ret)
# final value is nan because variants in final window are non-segregating
assert np.isnan(ret[3])
32 changes: 27 additions & 5 deletions docs/stats/selection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,42 @@ Selection
=========

.. automodule:: allel.stats.selection

Integrated haplotype score (IHS)
--------------------------------
.. autofunction:: allel.ihs

Cross-population extended haplotype homozygosity (XPEHH)
--------------------------------------------------------
.. autofunction:: allel.xpehh

Number of segregating sites by length (NSL)
-------------------------------------------
.. autofunction:: allel.nsl
.. autofunction:: allel.xpnsl

Haplotype diversity, Garud's H statistics
-----------------------------------------
.. autofunction:: allel.garud_h
.. autofunction:: allel.moving_garud_h
.. autofunction:: allel.haplotype_diversity
.. autofunction:: allel.moving_haplotype_diversity

Population branching statistic (PBS)
------------------------------------
.. autofunction:: allel.pbs

Delta Tajima's D
----------------
.. autofunction:: allel.moving_delta_tajima_d

Utilities and plotting functions
--------------------------------
.. autofunction:: allel.standardize
.. autofunction:: allel.standardize_by_allele_count
.. autofunction:: allel.ehh_decay
.. autofunction:: allel.voight_painting
.. autofunction:: allel.plot_voight_painting
.. autofunction:: allel.fig_voight_painting
.. autofunction:: allel.haplotype_diversity
.. autofunction:: allel.moving_haplotype_diversity
.. autofunction:: allel.garud_h
.. autofunction:: allel.moving_garud_h
.. autofunction:: allel.plot_haplotype_frequencies
.. autofunction:: allel.plot_moving_haplotype_frequencies
.. autofunction:: allel.moving_delta_tajima_d

0 comments on commit ed3058c

Please sign in to comment.