### Cookbook: abba-baba introgression stats

The *ipyrad.analysis* Python module includes functions to calculate abba-baba admixture statistics, as well as several variants of these measures, and to perform signifance tests. The code in this notebook is all Python, which you can copy/paste into an IPython terminal to execute, or, preferably, run in a Jupyter notebook like this one. See the other analysis cookbooks for [instructions](http://ipyrad.readthedocs.io/analysis.html) on using Jupyter notebooks.

In [734]:
## start by loading several Python librarires
import ipyrad as ip
from ipyrad.assemble.write_outfiles import reftrick, GETCONS
import ipyrad.analysis as ipa
import numpy as np
import pandas as pd
import numba

In [735]:
import ipyparallel as ipp
ipyclient = ipp.Client()
lbview = ipyclient.load_balanced_view()

### Load finished sim assembly

In [736]:
data = ip.load_json("cli/cli.json")

  loading Assembly: cli
  from saved path: ~/Documents/ipyrad/tests/cli/cli.json


In [737]:
data.outfiles

geno : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.geno
loci : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.loci
nexus : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.nex
phy : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.phy
snpsmap : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.snps.map
snpsphy : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.snps.phy
str : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.str
ugeno : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.u.geno
usnpsphy : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.u.snps.phy
ustr : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.ustr
vcf : /home/deren/Documents/ipyrad/tests/cli/cli_outfiles/cli.vcf


### input arguments to test

In [738]:
## a dictionary with [required] key names 
## optional: additional 'p4' key for 5-taxon tests.
test = {
    'p1': ["1A_0", "1B_0", "1C_0"],
    'p2': ["1D_0"],
    'p3': ["2E_0", "2F_0"],
    'out': ["3L_0", "3J_0", "3K_0"], 
}

## optional: dict for min samples per taxon (default=1 per tax)
## used to filter loci for inclusion in data set
mindict = {
    'p1': 1,
    'p2': 1,
    'p3': 1, 
    'out': 1,
}

## loci input file
handle = data.outfiles.loci

### loci parsing function
This takes a .loci file and converts it to an array of the format: (4 or 5, n-variable-loci, maxsnps)


In [6]:
## not sure why, but I can't get this to JIT

def reffreq(refseq, iseq, consdict):
    ## empty arrays
    freq = np.zeros((1, iseq.shape[1]), dtype=np.float64)
    amseq = np.zeros((iseq.shape[0]*2, iseq.shape[1]), dtype=np.uint8)
    
    ## fill in both copies
    for seq in xrange(iseq.shape[0]):
        for col in xrange(iseq.shape[1]):
            ## expand colums with ambigs and remove N-
            base = iseq[seq][col]
            who = np.where(consdict[:, 0] == base)[0]
            
            ## resolve heteros or enter into both copies
            if np.any(who):
                who = who[0]
                amseq[(seq*2)][col] = consdict[who][0]
                amseq[(seq*2)+1][col] = consdict[who][1]
            else:
                amseq[(seq*2)][col] = base
                amseq[(seq*2)+1][col] = base
    
    ## get as frequencies
    amseq = (refseq == amseq).astype(np.float64)
    for i in xrange(amseq.shape[0]):
        freq += amseq[i]
    return freq / np.float64(amseq.shape[0])


In [7]:
def loci_to_arr(locifile, test, mindict=None):

    ## read in the input file
    with open(locifile, 'r') as infile:
        loci = infile.read().strip().split("|\n")
        nloci = len(loci)

    ## get max loc length
    maxlen = 0
    for iloc in xrange(nloci):
        lines = loci[iloc].split("\n")[:-1]
        _maxl = len(lines[0]) 
        maxlen = max(maxlen, _maxl)

    ## make the array (4 or 5)
    arr = np.zeros((nloci, len(test), maxlen), dtype=np.float64)
    
    ## if not mindict, make one that requires 1 in each taxon
    if not mindict:
        mindict = {i:1 for i in test}

    ## grab seqs just for the good guys
    for loc in xrange(nloci):    

        ## parse the locus
        lines = loci[loc].split("\n")[:-1]
        names = [i.split()[0] for i in lines]
        seqs = np.array([list(i.split()[1]) for i in lines])

        ## check that names cover the test
        covs = [sum([j in names for j in test[tax]]) >= mindict[tax] for tax in test]
        if all(covs):
        
            ## get that refseq 
            ref = np.where([i in test['out'] for i in names])[0]
            refseq = seqs[ref].view(np.uint8)
            ancestral = np.array([reftrick(refseq, GETCONS)[:, 0]])

            ## and fill it in
            iseq = reffreq(ancestral, refseq, GETCONS)
            arr[loc, -1, :iseq.shape[1]] = iseq 

            ## fill each other tax freq in test
            keys = sorted([i for i in test.keys() if i[0] == 'p'])
            for tidx, key in enumerate(keys):

                ## get idx of names in test tax
                nidx = np.where([i in test[key] for i in names])[0]
                sidx = seqs[nidx].view(np.uint8)
                ## get freq of sidx
                iseq = reffreq(ancestral, sidx, GETCONS)
                ## fill it in 
                arr[loc, tidx, :iseq.shape[1]] = iseq

    ## size-down array to the number of loci that have taxa for the test
    return arr



In [8]:
## get locus array
arr = loci_to_arr(handle, test)

### Function to convert ms to arr
This is useful for testing results

In [1039]:
import msprime as ms

tree_sequence = ms.simulate(sample_size=5, Ne=1000)
tree = next(tree_sequence.trees())
print(tree)

{0: 8, 1: 7, 2: 5, 3: 6, 4: 5, 5: 6, 6: 7, 7: 8, 8: -1}


In [1040]:
def migration_example():
    
    # M is overall symmetric migration rate, d is number of demes
    M = 0.2
    d = 3

    # Rescale m into per-generation values for msprime.
    m = M / (4 * (d - 1))
    
    # Allocate the initial sample. Because we are interested in the
    # between deme coalescence times, we choose one sample each
    # from the first two demes.
    population_configurations = [
        ms.PopulationConfiguration(sample_size=1),
        ms.PopulationConfiguration(sample_size=1),
        ms.PopulationConfiguration(sample_size=0)]
    
    # Now we set up the migration matrix. Since this is a symmetric
    # island model, we have the same rate of migration between all
    # pairs of demes. Diagonal elements must be zero.
    migration_matrix = [
        [0, m, m],
        [m, 0, m],
        [m, m, 0]]
    
    # We pass these values to the simulate function, and ask it
    # to run the required number of replicates.
    num_replicates = int(1e6)
    replicates = ms.simulate(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        num_replicates=num_replicates)
    
    # And then iterate over these replicates
    T = np.zeros(num_replicates)
    for i, trees in enumerate(replicates):
        tree = next(trees.trees())
        
        # Convert the TMRCA to coalecent units.
        T[i] = tree.get_time(tree.get_root()) / 4
        
    # Calculate analytical expectation
    analytical = d / 2. + (d - 1) / (2. * M)
    print("Observed  =", np.mean(T))
    print("Predicted =", analytical)


In [1041]:
migration_example()

('Observed  =', 6.5076328730062123)
('Predicted =', 6.5)


In [1208]:
gen = 20
Taus = np.array([0, 1, 2, 3, 4, 5]) * 1e5 * gen
Taus

array([        0.,   2000000.,   4000000.,   6000000.,   8000000.,
        10000000.])

In [1274]:

def demography(nreps):
    
    # Set the ML values of various parameters
    Ns = 500000
    gen = 20
    Taus = np.array([0, 1, 2, 3, 4, 5]) * 1e4 * gen
      
    # Migration rates C -> B and from IJ -> EF
    m_C_B = 2e-6
    m_IJ_H = 2e-6
    
    # Population IDs correspond to their indexes in pop_config.
    pop_config = [
        ms.PopulationConfiguration(sample_size=2, initial_size=Ns)
        for i in range(12)]
    
    ## migration matrix all zeros time=0
    migmat = np.zeros((12, 12)).tolist()
    
    ## set up demography
    demog = [
        ## initial migration from C -> B
        ms.MigrationRateChange(time=0, rate=m_C_B, matrix_index=(2, 1)),
        ms.MigrationRateChange(time=Taus[1]/2., rate=0),#, matrix_index=(2, 1)),

        # merge events at time 1
        ms.MassMigration(time=Taus[1], source=1, destination=0, proportion=1.0), 
        ms.MassMigration(time=Taus[1], source=5, destination=4, proportion=1.0), 
        ms.MassMigration(time=Taus[1], source=9, destination=8, proportion=1.0), 
        
        ## migration from IJ -> H
        ms.MigrationRateChange(time=Taus[1], rate=m_IJ_H, matrix_index=(8, 7)), 

        ## merge events at time 2
        ms.MassMigration(time=Taus[2], source=2, destination=0, proportion=1.0), 
        ms.MassMigration(time=Taus[2], source=6, destination=4, proportion=1.0), 
        ms.MassMigration(time=Taus[2], source=10, destination=8, proportion=1.0), 

        ## end migration at ABC and merge
        ms.MigrationRateChange(time=Taus[2], rate=0),
        ms.MassMigration(time=Taus[3], source=3, destination=0, proportion=1.0), 
        ms.MassMigration(time=Taus[3], source=7, destination=4, proportion=1.0), 
        ms.MassMigration(time=Taus[3], source=11, destination=8, proportion=1.0),   
        
        ## merge EFJH -> IJKL
        ms.MassMigration(time=Taus[4], source=8, destination=4, proportion=1.0),   
        
        ## merge ABCD -> EFJHIJKL
        ms.MassMigration(time=Taus[5], source=4, destination=0, proportion=1.0),   
    ]

    ## sim the data
    replicates = ms.simulate(
        population_configurations=pop_config,
        migration_matrix=migmat,
        demographic_events=demog,
        num_replicates=nreps,
        length=100, 
        mutation_rate=1e-9)
    
    return replicates

In [1275]:

## function to convert trees to arr
def msp_to_arr(nreps, simreps, inds):
    
    ## inds e.g., [[0,1],[2,3],[4,5],[6,7]]
    ntips = len(inds)
    
    ## array to fill, limit to 100 len
    arr = np.zeros((nreps, ntips, 100))
    
    ## iterate over reps filling arr
    for idx in xrange(nreps):
        trees = next(simreps)
        
        ## build genotype array
        shape = trees.get_num_mutations(), trees.get_sample_size()
        garr = np.empty(shape, dtype="u1")
    
        ## fill the garr
        for variant in trees.variants():
            garr[variant.index] = variant.genotypes
        ## invert
        garr = garr.T
        
        ## fill my arr with freqs
        for sidx in xrange(ntips):
            samp = inds[sidx]
            freq = garr[samp]
            freq = freq.sum(axis=0) / float(freq.shape[0])
            maxsz = min(freq.shape[0], 100)
            arr[idx, sidx, :maxsz] = freq[:maxsz]
            
    ## reduce the size of arr to min  
    minl = np.where(np.all(np.all(arr==0, axis=1) == True, axis=0))[0].min()
    arr = arr[:, :, :minl]
    
    return arr

In [1279]:
## simulate data
nreps = 10000
simreps = demography(nreps)

## A, B, C, D
inds = np.array([[0,1],[2,3],[4,5],[6,7]])
## E, F, G, H
#inds = np.array(inds) + 8

## convert to freq arr
arr = msp_to_arr(nreps, simreps, inds)
arr.shape

(10000, 4, 9)

In [1280]:
abba, baba, dstat = prop_dstat(arr)
print abba, baba, dstat

176.4375 157.0625 0.058095952024


In [1281]:
o, e, s, z = get_signif(arr, 10000)
print o, e, s, z

0.058095952024 0.056770424894 0.0357828185272 1.62357115552


### Calculate 4-taxon statistics
<br>

$
    D = \frac{\Sigma(ABBA - BABA)}{\Sigma(ABBA+BABA)}
$

<br>

$
    D_p = \frac{\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ (1-p_4) ] - [(1-p_1) ~ p_2 ~ p_3 ~ (1-p_4)]}      {\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ (1-p_4) ] + [(1-p_1) ~ p_2 ~ p_3 ~ (1-p_4)]}
$

<br>

In [1215]:
@numba.jit(nopython=True)
def prop_dstat(arr):
    
    ## numerator
    abba = ((1.-arr[:, 0]) * (arr[:, 1]) * (arr[:, 2]) * (1.-arr[:, 3]))  
    baba = ((arr[:, 0]) * (1.-arr[:, 1]) * (arr[:, 2]) * (1.-arr[:, 3]))
    top = abba - baba
    bot = abba + baba

    ## get statistic and avoid zero div   
    if bot.sum() != 0:
        dstat = top.sum() / float(bot.sum())
    else:
        dstat = 0
    
    return abba.sum(), baba.sum(), dstat

In [1216]:
prop_dstat(arr)

nboots = 100
boots = np.zeros((nboots, 1))

## iterate to fill boots
for bidx in xrange(nboots):
    ## sample with replacement
    lidx = np.random.randint(0, arr.shape[0], arr.shape[0])
    tarr = arr[lidx]
    abba, baba, dstat = prop_dstat(tarr)
    boots[bidx] = dstat

In [1217]:
@numba.jit(nopython=True)
def get_boots(arr, nboots=1000):
    ## hold results (nboots, [dstat, ])
    boots = np.zeros((nboots, 1))
    
    ## iterate to fill boots
    for bidx in xrange(nboots):
        ## sample with replacement
        lidx = np.random.randint(0, arr.shape[0], arr.shape[0])
        tarr = arr[lidx]
        _, _, dstat = prop_dstat(tarr)
        boots[bidx] = dstat
    
    ## return bootarr
    return boots

In [1218]:
@numba.jit(nopython=True)
def get_signif(arr, nboots):
    abba, baba, dstat = prop_dstat(arr)
    b = get_boots(arr)
    e, s = (b.mean(), b.std())
    z = np.abs(dstat) / s
    return dstat, e, s, z
    

In [1219]:
get_signif(arr, 1000)

ZeroDivisionError: division by zero

### Setup a 5-taxon test

In [1179]:
## a dictionary with [required] key names 
## optional: additional 'p4' key for 5-taxon tests.
test = {
    'p1': ["1A_0", "1B_0", "1C_0"],
    'p2': ["1D_0"],
    'p3': ["2E_0", "2F_0"],
    'p4': ["2G_0", "2H_0"],
    'out': ["3L_0", "3J_0", "3K_0"], 
}

## optional: dict for min samples per taxon (default=1 per tax)
## used to filter loci for inclusion in data set
mindict = {
    'p1': 1,
    'p2': 1,
    'p3': 1, 
    'p4': 1,
    'out': 1,
}

## loci input file
handle = data.outfiles.loci

In [1180]:
## get locus array
arr = loci_to_arr(handle, test)

## simulate data
nreps = 10000
simreps = demography(nreps)

## A, B, C, D
inds = np.array([[0,1],[2,3],[4,5],[6,7]])

## [E], [H], [I], [L], [A,B,C,D]
inds = np.array([[8,9], [14,15], [16,17], [22,23], [0,1,2,3,4,5,6,7]])

## convert to freq arr
arr = msp_to_arr(nreps, simreps, inds)
arr.shape

(10000, 5, 20)

## Calculate 5-taxon statistics

<br>

$
    D_{12} = \frac{\Sigma(ABBBA - BABBA)}{\Sigma(ABBBA+BABBA)}
$

$
    D_{1} = \frac{\Sigma(ABBAA - BABAA)}{\Sigma(ABBAA+BABAA)}
$

$
    D_{2} = \frac{\Sigma(ABABA - BAABA)}{\Sigma(ABABA+BAABA)}
$

<br>

$
    D_{p12} = \frac 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ p_4 ~ (1-p_5) ] - [(1-p_1) ~ p_2 ~ p_3 ~ p_4 ~ (1-p_5)]} 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ p_4 ~ (1-p_4) ] + [(1-p_1) ~ p_2 ~ p_3 ~ p_4 ~ (1-p_5)]}
$


$
    D_{p1} = \frac 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ (1-p_4) ~ (1-p_5) ] - [(1-p_1) ~ p_2 ~ p_3 ~ (1-p_4) ~ (1-p_5)]} 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ p_3 ~ (1-p_4) ~ (1-p_5) ] + [(1-p_1) ~ p_2 ~ p_3 ~ (1-p_4) ~ (1-p_5)]}
$


$
    D_{p2} = \frac 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ (1-p_3) ~ p_4 ~ (1-p_5) ] - [(1-p_1) ~ p_2 ~ (1-p_3) ~ p_4 ~ (1-p_5)]} 
        {\Sigma ~ [ p_1 ~ (1-p_2) ~ (1-p_3) ~ p_4 ~ (1-p_5) ] + [(1-p_1) ~ p_2 ~ (1-p_3) ~ p_4 ~ (1-p_5)]}
$

<br>

In [1181]:
@numba.jit(nopython=True)
def prop_dstat_12(arr):
    
    ## calc
    sub = ((arr[:, 2] + arr[:, 3]) / 2.)
    abbba = ((1.-arr[:, 0]) * (arr[:, 1]) * sub * (1.-arr[:, 4]))
    babba = ((arr[:, 0]) * (1.-arr[:, 1]) * sub * (1.-arr[:, 4]))
    top = abbba - babba
    bot = abbba + babba

    ## get statistic and avoid zero div   
    if bot.sum() != 0:
        dstat = top.sum() / float(bot.sum())
    else:
        dstat = 0
    
    return abbba.sum(), babba.sum(), dstat

In [1182]:
@numba.jit(nopython=True)
def prop_dstat_1(arr):
    
    ## numerator
    sub = (arr[:, 2]) * (1.-arr[:, 3])
    abbaa = ((1.-arr[:, 0]) * (arr[:, 1]) * sub * (1.-arr[:, 4]))
    babaa = ((arr[:, 0]) * (1.-arr[:, 1]) * sub * (1.-arr[:, 4]))
    top = abbaa - babaa
    bot = abbaa + babaa

    ## get statistic and avoid zero div   
    if bot.sum() != 0:
        dstat = top.sum() / float(bot.sum())
    else:
        dstat = 0
    
    return abbaa.sum(), babaa.sum(), dstat

In [1183]:
@numba.jit(nopython=True)
def prop_dstat_2(arr):
    
    ## calc sub
    sub = (1.-arr[:, 2]) * arr[:, 3] 
    
    ## calc stats
    ababa = ((1.-arr[:, 0]) * (arr[:, 1]) * sub * (1.-arr[:, 4]))
    baaba = ((arr[:, 0]) * (1.-arr[:, 1]) * sub * (1.-arr[:, 4]))
    top = ababa - baaba
    bot = ababa + baaba

    ## get statistic and avoid zero div   
    if bot.sum() != 0:
        dstat = top.sum() / float(bot.sum())
    else:
        dstat = 0
    
    return ababa.sum(), baaba.sum(), dstat

In [1184]:
sub = (1.-arr[:, 2]) * arr[:, 3]

print arr[:5, 3, :10]
print arr[:5, 2, :10]
print sub[:5, :10]

[[ 0.   0.   0.   1.   0.   0.   0.   0.   0.   0. ]
 [ 1.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 1.   0.   0.5  0.   0.   0.   1.   0.   0.   0. ]]
[[ 0.   0.   0.   0.   0.   0.   1.   0.   1.   0. ]
 [ 0.   1.   0.5  0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   1.   0.   1.   0. ]
 [ 1.   0.   1.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.5  1.   0.   0.   0.   0.   0. ]]
[[ 0.   0.   0.   1.   0.   0.   0.   0.   0.   0. ]
 [ 1.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 1.   0.   0.5  0.   0.   0.   1.   0.   0.   0. ]]


In [1185]:
prop_dstat_12(arr), 'abbba babba'

((2036.0, 24.25, 0.9764591675767504), 'abbba babba')

In [1186]:
print prop_dstat_1(arr), 'abbaa, babaa'
print prop_dstat_2(arr), 'ababa, baaba'

(3998.5, 6.0, 0.9970033712073917) abbaa, babaa
(0.5, 42.5, -0.9767441860465116) ababa, baaba


In [1026]:
sub = (1.-arr[:, 2]) * arr[:, 3] 
sub[:20, :10]

array([[ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0.5,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  1. ,  0.5,  0. ],
       [ 0. ,  0. ,  1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 1. ,  1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0.5,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  1. ,  0. ],
       [ 0.5,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  1. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0.5,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 1. ,  0.5,  0. ,  0. ,  0

In [1021]:
prop_dstat_2(arr)

(195.125, 17.0, 0.8397171479080731)

In [1006]:
x = arr[:, 3]
y = arr[:, 2]

print x[:10, :8]
print y[:10, :8]
z = x-y
print z[:10, :8]

[[ 0.     0.     0.     0.     0.     0.     0.     0.   ]
 [ 0.125  0.125  0.125  0.     0.     0.     0.     0.   ]
 [ 0.625  0.25   0.375  0.     0.     0.     0.     0.   ]
 [ 0.     0.     0.     0.125  0.     0.25   0.125  0.125]
 [ 0.125  0.     0.     0.125  0.25   0.375  0.     0.125]
 [ 0.5    0.     0.     0.     0.25   0.     0.     0.   ]
 [ 1.     0.     0.75   0.     0.     0.     0.75   0.   ]
 [ 0.125  0.25   0.     0.25   0.125  0.     0.     0.   ]
 [ 0.     0.     0.25   0.     0.125  0.625  0.     0.   ]
 [ 0.     0.     0.     0.     0.75   0.25   0.     1.   ]]
[[ 0.25  0.    0.    1.    0.    0.5   0.    0.5 ]
 [ 0.    0.    0.    0.    0.5   0.    0.    0.  ]
 [ 0.    0.    0.    1.    0.75  0.    0.    1.  ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.5   0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    1.    0.    0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    1.    0.    0.    0.5 ]
 [ 0.    0.    0.    0.    0.    0.    0.    0.  ]
 

In [1000]:
sub = (arr[:, 3] + (arr[:, 2]))
sub - arr[:, 2]#* (1.-arr[:, 2])

x = arr[:, 3] * (1.-arr[:, 2])
x[:10, :8]

array([[ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ],
       [ 0.125,  0.125,  0.125,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ],
       [ 0.625,  0.25 ,  0.375,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.125,  0.   ,  0.25 ,  0.125,  0.125],
       [ 0.125,  0.   ,  0.   ,  0.125,  0.25 ,  0.375,  0.   ,  0.125],
       [ 0.5  ,  0.   ,  0.   ,  0.   ,  0.25 ,  0.   ,  0.   ,  0.   ],
       [ 1.   ,  0.   ,  0.75 ,  0.   ,  0.   ,  0.   ,  0.75 ,  0.   ],
       [ 0.125,  0.25 ,  0.   ,  0.25 ,  0.125,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  0.25 ,  0.   ,  0.125,  0.625,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.75 ,  0.25 ,  0.   ,  1.   ]])

In [907]:
prop_dstat_2(arr)

(79.21875, 4.9375, 0.8826587448941701)

In [892]:
x = arr[:, 3] - arr[:, 2]
x[x < 0] = 0
x


array([[ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  0.   ],
       [ 0.125,  0.125,  0.125, ...,  0.   ,  0.   ,  0.   ],
       [ 0.625,  0.25 ,  0.375, ...,  0.   ,  0.   ,  0.   ],
       ..., 
       [ 0.   ,  0.5  ,  0.625, ...,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  0.25 ,  0.   , ...,  0.   ,  0.   ,  0.   ],
       [ 0.25 ,  0.25 ,  0.   , ...,  0.   ,  0.   ,  0.   ]])