# Generate SFS as a function of sigma and population parameters for burnin estimations

#### Chris Porras

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
rc = {'lines.linewidth': 3, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5',
      'axes.font': 'arial'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)
sns.set_palette('colorblind')
%matplotlib inline
from scipy.ndimage.filters import laplace

### Explicit and implicit mutation rate simulations

In [2]:
def SS_WF_sim(pop_size,mu,s,m,num_reps,num_gen,dims):
    #Pre-allocate and initialize
    f = np.zeros((num_gen,num_reps)+dims)
    f[0] = 1/pop_size
    for j in range(num_gen-1):
        #Wright-Fisher diffusion w/Stepping Stone migration
        df = mu*(1-2*f[j])-s*f[j]*(1-f[j]) \
        +m*laplace(f[j],mode='wrap')
        #bounds allele frequencies
        p = np.clip(a= f[j] + df ,a_min=0,a_max=1)
        #genetic drift sampling
        f[j+1]= np.random.binomial(pop_size,p)/pop_size
    return f

In [3]:
def implicit_mut_sim(pop_size,s,m,num_reps,num_gen,dims):
    #Pre-allocate and initialize
    f = np.zeros((num_gen,num_reps)+dims)
    f[0] = 1/pop_size
    for j in range(num_gen-1):
        #Wright-Fisher diffusion w/Stepping Stone migration
        df = -s*f[j]*(1-f[j]) + m*laplace(f[j],mode='wrap')
        #bounds allele frequencies
        p = np.clip(a= f[j] + df ,a_min=0,a_max=1)
        #genetic drift sampling w/ implicit mut
        f[j+1] = np.random.binomial(pop_size,p)/pop_size
        # if all elements of a replicate at time j are 0, reset to 1/N
        f[j+1,np.count_nonzero(f[j],axis=(1,2))==0] = 1/pop_size
    return f

#### For now, will continue with explicit sim

In [41]:
# Population parameters
pop_size = 10**4
mu = 10**-4
s = 10**-3
m = 10**-1
num_reps = 10
num_gen = 10**4
dims = (100,100)

In [42]:
f = SS_WF_sim(pop_size,mu,s,m,num_reps,num_gen,dims)

In [28]:
np.sqrt(m/s)

10.0

In [29]:
mu/s

0.1

In [109]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [114]:
df.values

array([[0.0841, 0.0875, 0.0886, ..., 0.087 , 0.078 , 0.089 ],
       [0.0936, 0.089 , 0.0971, ..., 0.0897, 0.0875, 0.0867],
       [0.0878, 0.0823, 0.0848, ..., 0.0949, 0.0879, 0.0874],
       ...,
       [0.0907, 0.0904, 0.0891, ..., 0.0964, 0.0897, 0.0855],
       [0.0967, 0.0919, 0.0947, ..., 0.0917, 0.0946, 0.0865],
       [0.0866, 0.0837, 0.0911, ..., 0.0938, 0.0789, 0.0865]])

In [115]:
df = pd.DataFrame(f[-1,0,:,:])
st_df= StandardScaler().fit_transform(df.values)

In [54]:
from sklearn.decomposition import PCA

In [116]:
n_components = 4
pca = PCA(n_components=n_components)

In [117]:
pcs = pca.fit(st_df)

In [118]:
pcs.components_.shape
comps = pcs.components_

In [123]:
pcs.explained_variance_

array([5.41821517, 4.70765163, 4.08241802, 3.75048561])

In [124]:
pcs.explained_variance_ratio_

array([0.05364033, 0.04660575, 0.04041594, 0.03712981])

In [122]:
pcomps = pd.DataFrame(comps.T,columns=[f'PC{i}' for i in range(1,n_components+1)])
pcomps.head()

Unnamed: 0,PC1,PC2,PC3,PC4
0,0.021106,0.101044,-0.104447,0.07876
1,-0.011707,0.191023,-0.065117,0.056948
2,-0.017601,-0.013474,-0.152828,0.14507
3,-0.006579,0.006706,-0.012046,0.132605
4,-0.050268,0.097414,-0.060151,0.158749


In [94]:
import plotly.express as px

In [127]:
fig = px.scatter_3d(pcomps, x='PC1', 
                        y='PC2')
fig.show()