In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'png'

In [None]:
import numpy as np
import msprime as msprime
import matplotlib.pyplot as plt

def degrees_of_separation(N=50, L=int(10), m=5e-1, seed=None, *args, **kwargs):
        '''Calculates genomic degrees of separation for trees'''
        ts = msprime.sim_ancestry(
                N, 
                ploidy=1,
                sequence_length=L,
                random_seed=seed
                )

        mts = msprime.sim_mutations(
                ts, 
                model=msprime.BinaryMutationModel(),
                rate=m,
                random_seed=seed,
                discrete_genome=False
                )  
        
        # retrieve all SNP loci and genotype matrices
        SNPs = [(var.site.position, var.genotypes) for var in mts.variants()]
        all_xor = []

        # compare iterates comparing all components without repeat
        for i in range(len(SNPs)):
                for j in range(i+1,len(SNPs)):
                        # bitwise xor of current two vectors
                        geno_xor = SNPs[i][1]!=SNPs[j][1]
                        all_xor.append(np.sum(geno_xor))
        # counts each unique separtion 
        unique, counts = np.unique(all_xor, return_counts=True)
        # dictionary of separations - allows us to create a distibution if needed
        count_dist=dict(zip(unique, counts))
        # number of times one degree of separation present
        one_sep_count=count_dist.get(1)
        
        return N, one_sep_count


In [None]:
NO_SEEDS=100
def one_timer(N, L=10, m=0.5):
        ones_ratios_def=[]
        # find how many ones appear for a set number of seeds
        for counter in  range(1,NO_SEEDS):
                N, one_sep_count = degrees_of_separation(N=N, seed=counter, L=L, m=m)
                if not one_sep_count == None and one_sep_count > 0:
                        ones_ratios_def.append(one_sep_count/N)
        return np.sum(ones_ratios_def)/NO_SEEDS

ones_N=[]
N_range = np.arange(3,100)
# run for number of N 
for N in N_range:
        print(N)
        ones_N.append(one_timer(N))

In [None]:
# plotting

fig, ax = plt.subplots()
ax.set_xlabel(r'$N$')
ax.set_ylabel(r'Mean times $S_{g} = 1$')
ax.set_title(r'Mean times $S_{g} = 1$ vs. $N$')
ax.grid()
ax.plot(N_range,ones_N)

fig, ax2 = plt.subplots()
ax2.set_xscale("log")
ax2.set_yscale("log")
ax2.set_xlabel(r'$log(N)$')
ax2.set_ylabel(r'$log$(mean times $S_{g} = 1)$')
ax2.set_title(r'$log$(mean times $S_{g} = 1)$ vs. $log(N)$')
ax2.grid()
ax2.plot(N_range,ones_N)
plt.show()