In [3]:
import msprime
import numpy as np


In [5]:
def get_pi(haplotypes):
    ## If no seg sites in a pop then haplotypes will be 0 length
    if haplotypes.size == 0:
        return 0
    n = len(haplotypes[0])
    n_comparisons = float(n) * (n - 1) / 2

    pi = 0
    for hap in haplotypes:
        k = np.count_nonzero(hap)
        pi += float(k) * (n - k) / n_comparisons
    return(pi)

In [18]:
# Run `reps` number of simulations accumulating pi and pi* values then average over reps
def simulate(ss=10, Ne=1e4, length=5e3, reps=10):
    ts_pis = []
    my_pis = []
    uniq_pis = []
    nuniq = 0
    for i in range(reps):
        tree_sequence = msprime.simulate(sample_size=ss, Ne=Ne, length=length, mutation_rate=1e-7)
        ts_pis.append(tree_sequence.get_pairwise_diversity()/length)

        haps = list(tree_sequence.haplotypes())
        haps_t = np.transpose(np.array([list(map(int, list(x))) for x in haps]))
        my_pis.append(get_pi(haps_t)/length)

        haps = set(haps)
        haps_t = np.transpose(np.array([list(map(int, list(x))) for x in haps]))
        uniq_pis.append(get_pi(haps_t)/length)
        nuniq += len(haps)
    print("  TS: {}".format(sum(ts_pis)/reps))
    print("  My: {}".format(sum(my_pis)/reps))
    print("  Uniq (avg# {}): {}".format(float(nuniq)/reps, sum(uniq_pis)/reps))

# Ne 10000, 500bp loci
for ss in [5, 10, 25, 50]:
    print("Sample size {}".format(ss))
    simulate(ss=ss, reps=100)

Sample size 5
  TS: 0.0037752
  My: 0.0037752
  Uniq (avg# 4.6): 0.00389126666667
Sample size 10
  TS: 0.00407266666667
  My: 0.00407266666667
  Uniq (avg# 8.29): 0.00426271269841
Sample size 25
  TS: 0.00393384
  My: 0.00393384
  Uniq (avg# 16.46): 0.00415783478684
Sample size 50
  TS: 0.00384919673469
  My: 0.00384919673469
  Uniq (avg# 24.88): 0.00420818508661


In [11]:
# Ne 10000, for 5000bp loci
for ss in [5, 10, 25, 50]:
    print("Sample size {}".format(ss))
    simulate(ss=ss, length=5e4, reps=100)

Sample size 5
  TS: 0.00398404
  My: 0.00398404
  Uniq: 0.00400111333333
Sample size 10
  TS: 0.00371806666667
  My: 0.00371806666667
  Uniq: 0.00374890269841
Sample size 25
  TS: 0.00428718133333
  My: 0.00428718133333
  Uniq: 0.00427695312015
Sample size 50
  TS: 0.00374426187755
  My: 0.00374426187755
  Uniq: 0.00376571732038


In [12]:
for ss in [5, 10, 25, 50]:
    print("Sample size {}".format(ss))
    simulate(ss=ss, Ne=1e5, length=5e3, reps=100)

Sample size 5
  TS: 0.038754
  My: 0.038754
  Uniq: 0.0389556
Sample size 10
  TS: 0.0421256888889
  My: 0.0421256888889
  Uniq: 0.0420385285714
Sample size 25
  TS: 0.0383534266667
  My: 0.0383534266667
  Uniq: 0.0382341022148
Sample size 50
  TS: 0.0396354677551
  My: 0.0396354677551
  Uniq: 0.0402361897382


In [13]:
for ss in [5, 10, 25, 50]:
    print("Sample size {}".format(ss))
    simulate(ss=ss, Ne=1e5, length=5e4, reps=100)

Sample size 5
  TS: 0.04143968
  My: 0.04143968
  Uniq: 0.04143968
Sample size 10
  TS: 0.0401874888889
  My: 0.0401874888889
  Uniq: 0.0401889688889
Sample size 25
  TS: 0.0405720106667
  My: 0.0405720106667
  Uniq: 0.0406147050804
Sample size 50
  TS: 0.0386323533061
  My: 0.0386323533061
  Uniq: 0.0386258339373
