In [1]:
import numpy as np
import msprime

In [2]:
tree_sequence = msprime.simulate(5, length=10)
tree = next(tree_sequence.trees())
print(tree)
tree.draw
tree_sequence = msprime.simulate(
    10000, length=100, Ne=1, mutation_rate=0.005, random_seed=19)

print("pairwise diversity - {}".format(tree_sequence.get_pairwise_diversity()))
print("# segregating sites - {}".format(len(next(tree_sequence.haplotypes()))))
tree = next(tree_sequence.trees())
print("tree length - {}".format(tree.get_time(tree.get_root())))

for h in tree_sequence.haplotypes():
    print(h)

{0: 6, 1: 5, 2: 6, 3: 7, 4: 5, 5: 8, 6: 7, 7: 8, 8: -1}
pairwise diversity - 1.38529526953
# segregating sites - 27
tree length - 1.07011862496
000010000000000010010000000
000000000000100000000000000
000010000000000010010000000
000010000000000010010000000
000000000000000000000000000
000010000000000010010000000
000010000000000010010000000
000000000000100000000000000
000010000000000010010000000
000010000000000010010000000
000010000000000010010000000
000010000000000010010000000
000000000000100000000000000
000000000000100000000000000
000010000000000010010000000
000010000000000010010000000
000000000000110000000000000
000000000000100000000000000
000010000000000010010000000
000010000000000010010000000
000010000000000010010000000
000000000000100000000000000
000010000000000010010000000
000010000000000010010000000
000000000000000000000000100
000000000000100000000000000
000010000000000010010000000
000000000000100000000000000
000000000000000000000000000
000010000000000010010000000
0000100000000000

In [48]:
def segregating_sites_example(n, theta, num_replicates):
    S = np.zeros(num_replicates)
    replicates = msprime.simulate(
        sample_size=n,
        mutation_rate=theta / 4,
        num_replicates=num_replicates)
    for j, tree_sequence in enumerate(replicates):
        S[j] = tree_sequence.get_num_mutations()
    # Now, calculate the analytical predictions
    S_mean_a = np.sum(1. / np.arange(1, n)) * theta
    S_var_a = (
        float(theta) * np.sum(1. / np.arange(1, n)) +
        theta**2 * np.sum(1. / np.arange(1, n)**2))
    print("              mean              variance")
    print("Observed      {}\t\t{}".format(np.mean(S), np.var(S)))
    print("Analytical    {:.5f}\t\t{:.5f}".format(S_mean_a, S_var_a))

In [50]:
segregating_sites_example(10, 5, 10000)

              mean              variance
Observed      11.3775		36.08919375
Analytical    14.14484		52.63903


In [103]:
## Add a mass migration event
island_pop = msprime.PopulationConfiguration(sample_size=20, initial_size=1)
meta_pop = msprime.PopulationConfiguration(sample_size=10, initial_size=20)
split_event = msprime.MassMigrationEvent(time=1, source=0, destination=1, proportion=1)
#resize_event = msprime.PopulationSizeChange(time=0.2, source=0, destination=1, proportion=1)
tree_sequence = msprime.simulate(sample_size=30, length=500, Ne=1, mutation_rate=0.005, \
                                random_seed=19, population_configurations=[island_pop, meta_pop],\
                                demographic_events=[split_event])
tree = next(tree_sequence.trees())
print(tree)
tree.draw("/Users/iovercast/Desktop/tmp.svg", 600, 600, show_times=True)



{0: 40, 1: 42, 2: 43, 3: 33, 4: 30, 5: 36, 6: 41, 7: 34, 8: 34, 9: 38, 10: 35, 11: 35, 12: 30, 13: 44, 14: 44, 15: 31, 16: 45, 17: 33, 18: 31, 19: 32, 20: 46, 21: 49, 22: 55, 23: 56, 24: 46, 25: 53, 26: 48, 27: 49, 28: 53, 29: 48, 30: 32, 31: 38, 32: 36, 33: 47, 34: 47, 35: 37, 36: 37, 37: 39, 38: 39, 39: 40, 40: 41, 41: 42, 42: 43, 43: 51, 44: 45, 45: 50, 46: 52, 47: 50, 48: 54, 49: 52, 50: 51, 51: 55, 52: 54, 53: 57, 54: 56, 55: 58, 56: 57, 57: 58, 58: -1}


In [20]:
import numpy as np
haps = tree_sequence.haplotypes()
island_haps = [next(haps) for _ in range(20)]
meta_haps = [next(haps) for _ in range(10)]

# Get S for islands
ihaps = np.transpose(np.array([list(x) for x in island_haps]))
mhaps = np.transpose(np.array([list(x) for x in meta_haps]))


TypeError: 'long' object is not iterable

In [26]:
ints_list = map(int, list(island_haps[0]))
print(ints_list)
print(np.count_nonzero(ints_list))

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
3


In [163]:
import collections
print(len(next(tree_sequence.haplotypes())))
print("island - {}".format(collections.Counter([len(set(ihaps[x])) for x in range(len(ihaps))])[2]))
print("meta - {}".format(collections.Counter([len(set(mhaps[x])) for x in range(len(mhaps))])[2]))
dohap = 23
n = len(mhaps[10])
print(mhaps[dohap])
k = collections.Counter(mhaps[dohap])['1']
denom = n * (n - 1) / 2
print(n, k, denom)
print(float(k) * (n - k) / denom)
pi = 0
for _, hap in enumerate(mhaps):
        k = collections.Counter(hap)['1']
        pi += float(k) * (n - k) / denom
        
print(pi)

109
island - 21
meta - 81
['1' '1' '0' '0' '1' '0' '1' '1' '0' '1']
(10, 6, 45)
0.533333333333
21.8888888889
