## Demographic inference of the divergence history of *Erythrura trichroa* and *Erythrura papuana*

This notebook performs a demographic inference of the divergence history of *Erythrura trichroa* and *Erythrura papuana* using `dadi2`. For now I run a limited number of replicates, to be boosted in the future. We start by importing the relevant libraries: 

In [1]:
import os
import numpy as np
import dadi
import nlopt
from dadi import Numerics
from scipy.optimize import minimize

Next we provide inputs for the analysis: our `.vcf`, population IDs and names, projections and points, replicates, and seeds:

In [2]:
vcf = "/home/k14m234/erythrura_assembly/results/GCF_005870125.1/QC/erythrura.pruned.vcf.gz"
popfile = "/home/k14m234/erythrura/config/pixy_pop.txt"
pops = ['trichroa', 'papuana']
projections = [20, 20]
n_reps = 10
maxiter = 10          # set 3 to mimic the moments example; 20-100 is more realistic
seed = 1
rng = np.random.default_rng(seed)

Directories for output: 

In [3]:
outdir = "/home/k14m234/erythrura/results/dadi"
os.makedirs(outdir, exist_ok=True)

Scaling constraints: 

In [4]:
mu = 2.3e-9            # per-site per-generation mutation rate
L  = 1010218420        # from awk '{sum += $3-$2} END{print sum}' erythrura_callable_sites.bed 
gen_time = 1.0         # years per generation (optional; set to 1 if unknown)

A folded SFS: 

In [5]:
dd = dadi.Misc.make_data_dict_vcf(vcf, popfile)
fs = dadi.Spectrum.from_data_dict(dd, pops, projections=projections, polarized=False)
print(fs.S())

73881.42115007679


Sample sizes and grid sizes:  

In [6]:
ns = fs.sample_sizes
pts_l = [max(ns)+20, max(ns)+30, max(ns)+40]

Bootstrapped datasets:

In [9]:
# --- ADD 1: bootstrap spectra ONCE (outside loop) ---
Nboot = 100
chunk_size = int(5e6)
chunks = dadi.Misc.fragment_data_dict(dd, chunk_size)
boots = dadi.Misc.bootstraps_from_dd_chunks(chunks, Nboot, pops, ns)
boots = [b.fold() for b in boots]
for b in boots:
    b.mask = fs.mask

Define models:

In [10]:
# strict isolation
def SI(params, ns, pts):
    nu1, nu2, T = params
    xx = dadi.Numerics.default_grid(pts)
    phi = dadi.PhiManip.phi_1D(xx)
    phi = dadi.PhiManip.phi_1D_to_2D(xx, phi)
    phi = dadi.Integration.two_pops(phi, xx, T, nu1=nu1, nu2=nu2, m12=0, m21=0)
    return dadi.Spectrum.from_phi(phi, ns, (xx, xx))
si_ex = dadi.Numerics.make_extrap_log_func(SI)

# isolation-with-migration
def IM(params, ns, pts):
    nu1, nu2, T, m12, m21 = params
    xx = dadi.Numerics.default_grid(pts)
    phi = dadi.PhiManip.phi_1D(xx)
    phi = dadi.PhiManip.phi_1D_to_2D(xx, phi)
    phi = dadi.Integration.two_pops(phi, xx, T, nu1=nu1, nu2=nu2, m12=m12, m21=m21)
    fs_model = dadi.Spectrum.from_phi(phi, ns, (xx, xx))
    return fs_model
im_ex = dadi.Numerics.make_extrap_log_func(IM)

# secondary contact
def SC(params, ns, pts):
    nu1, nu2, T1, T2, m12, m21 = params
    xx = dadi.Numerics.default_grid(pts)
    phi = dadi.PhiManip.phi_1D(xx)
    phi = dadi.PhiManip.phi_1D_to_2D(xx, phi)

    # Phase 1: strict isolation
    phi = dadi.Integration.two_pops(phi, xx, T1, nu1=nu1, nu2=nu2, m12=0,   m21=0)

    # Phase 2: secondary contact
    phi = dadi.Integration.two_pops(phi, xx, T2, nu1=nu1, nu2=nu2, m12=m12, m21=m21)

    fs_model = dadi.Spectrum.from_phi(phi, ns, (xx, xx))
    return fs_model
sc_ex = dadi.Numerics.make_extrap_log_func(SC)

We now run replicate model fitting, starting with strict isolation (SI): 

In [18]:
# define starting param
SI_params = [1, 1, 0.01]
SI_param_names = ["nu1","nu2","T"]
SI_lower = [1e-3, 1e-3, 1e-4]
SI_upper = [10,   10,   1]

# checkoutput dir
os.makedirs("dadi", exist_ok=True)

# 20 replicates
for i in range(50):

    # random staring perturbation
    p0 = dadi.Misc.perturb_params(
        SI_params, fold=1,
        upper_bound=SI_upper,
        lower_bound=SI_lower
    )

    # generate model
    popt, ll_model = dadi.Inference.opt(
        p0, fs, si_ex, pts_l,
        lower_bound=SI_lower,
        upper_bound=SI_upper,
        algorithm=nlopt.LN_BOBYQA,
        maxeval=400,
        verbose=100
    )

    # get model fs
    model_fs = si_ex(popt, ns, pts_l)

    # theta + params
    theta0 = dadi.Inference.optimal_sfs_scaling(model_fs, fs)
    Nref   = theta0 / (4 * mu * L)
    nTri = popt[0] * Nref
    nPap = popt[1] * Nref
    t1  = popt[2] * 2 * Nref

    # write real params
    out1 = "\t".join(map(str, [i+1, ll_model, nTri, nPap, t1, theta0])) + "\n"
    with open("dadi/si_real.tsv", "a") as f:
        f.write(out1)

    # write model params
    out2 = "\t".join(map(str, [i+1, ll_model] + list(popt) + [theta0])) + "\n"
    with open("dadi/si_model.tsv", "a") as f:
        f.write(out2)

    for eps in [1e-2, 5e-3, 1e-3]:
        try:
            uncert = dadi.Godambe.GIM_uncert(si_ex, pts_l, boots, popt, fs, eps=eps)
        except np.linalg.LinAlgError:
            print(f"rep {i+1}: Godambe failed (singular); skipping CI")
            continue   # ← must be inside except block
    
        # Only runs if no exception
        if len(uncert) == len(popt) + 1:
            se = uncert[:-1]
        else:
            se = uncert
    
        ci_low  = np.array(popt) - 1.96 * np.array(se)
        ci_high = np.array(popt) + 1.96 * np.array(se)

    with open("dadi/si_ci.tsv", "a") as f:
        for name, est, se_i, lo, hi in zip(SI_param_names, popt, se, ci_low, ci_high):
            f.write("\t".join(map(str, [i+1, ll_model, name, est, se_i, lo, hi])) + "\n")

    print(i+1, ll_model, popt)

3200    , -2631.05    , array([ 4.2409     ,  1.3274     ,  0.0573657  ])
rep 1: Godambe failed (singular); skipping CI
1 -2242.095592505911 [5.0725311  2.28680631 0.0617874 ]
3300    , -2425.86    , array([ 5.24563    ,  1.4504     ,  0.0471485  ])
2 -2242.095593477997 [5.07277825 2.28663665 0.06178661]
3400    , -3668.26    , array([ 1.78635    ,  0.96036    ,  0.0206375  ])
3 -2242.0955965861453 [5.07199913 2.28698153 0.06178887]
3500    , -2255.04    , array([ 6.04214    ,  2.08812    ,  0.0613626  ])
4 -2242.0956637185095 [5.07044465 2.28663075 0.06179079]
3600    , -2301.53    , array([ 3.88709    ,  2.80661    ,  0.0687861  ])
5 -2242.0955902897567 [5.07223842 2.2867352  0.06178731]
3700    , -2284.1     , array([ 4.38489    ,  2.9784     ,  0.0673144  ])
6 -2242.0956204587046 [5.07060582 2.28692567 0.06178635]
3800    , -3865.52    , array([ 1.83736    ,  0.784213   ,  0.0180608  ])
7 -2242.095590316114 [5.0722718  2.28672066 0.06178707]
3900    , -3985.95    , array([ 1.10742 



5245.131189830356 73881.42115007679
5400    , -2270.28    , array([ 4.00701    ,  2.26809    ,  0.0630712  ])
20 -2242.095590309577 [5.0722247  2.28674451 0.06178743]
5500    , -2969.07    , array([ 1.79997    ,  4.81813    ,  0.0500372  ])
5600    , -2242.11    , array([ 5.06006    ,  2.27912    ,  0.0618236  ])
21 -2242.095590895473 [5.07209367 2.28671421 0.06178664]
22 -2242.0955980923836 [5.07256152 2.28694689 0.06178939]
5700    , -4019.02    , array([ 1.15743    ,  1.99819    ,  0.0204211  ])




9424.411727155162 73881.42115007679
23 -2242.0955904957777 [5.07216434 2.28676941 0.06178764]
5800    , -4533.36    , array([ 0.851947   ,  1.79018    ,  0.0158877  ])
5900    , -2242.1     , array([ 5.0723     ,  2.2865     ,  0.0617871  ])
24 -2242.0955975463976 [5.0723012  2.28650128 0.06178711]
6000    , -2242.1     , array([ 5.07295    ,  2.28686    ,  0.0617891  ])
25 -2242.095591733785 [5.07261877 2.28665897 0.06178747]
26 -2242.0955903116665 [5.07227801 2.28675142 0.06178743]
6100    , -3805.56    , array([ 1.08349    ,  1.92078    ,  0.0253046  ])
6200    , -2242.25    , array([ 5.02311    ,  2.27265    ,  0.0612861  ])
27 -2242.095644849092 [5.07399165 2.28681926 0.06178532]




2061.0247062676967 73881.42115007679
6300    , -2277.82    , array([ 4.16689    ,  2.11363    ,  0.0547674  ])
28 -2242.095599612686 [5.07318269 2.28663448 0.06178858]
6400    , -2445.15    , array([ 2.9452     ,  1.89604    ,  0.0588942  ])
29 -2242.0955908517826 [5.07245538 2.28666914 0.06178715]
6500    , -3801.33    , array([ 1.43097    ,  1.04433    ,  0.0199416  ])
6600    , -2242.1     , array([ 5.08282    ,  2.2851     ,  0.0617724  ])
30 -2242.095591695106 [5.07192431 2.28674739 0.06178687]
6700    , -2248.24    , array([ 5.1218     ,  2.534      ,  0.0628619  ])
31 -2242.0955904690254 [5.07238748 2.28671701 0.06178766]
6800    , -2281.46    , array([ 5.00182    ,  1.96082    ,  0.0630968  ])
rep 32: Godambe failed (singular); skipping CI
32 -2242.095590299599 [5.07231749 2.28673678 0.06178746]
6900    , -3409.74    , array([ 2.38499    ,  0.794091   ,  0.0259249  ])
33 -2242.095590343358 [5.07222366 2.28673225 0.0617875 ]
7000    , -5790.67    , array([ 0.766749   ,  1.9432  



3024.1486982865267 73881.42115007679
7600    , -2243.39    , array([ 4.76145    ,  2.35308    ,  0.061581   ])
39 -2242.0955975033885 [5.07294159 2.28680014 0.061788  ]
7700    , -2242.6     , array([ 5.27032    ,  2.28919    ,  0.0622685  ])
40 -2242.0955939870014 [5.07280497 2.28661498 0.06178651]
7800    , -2383.97    , array([ 3.06657    ,  2.21775    ,  0.0625241  ])
41 -2242.095596911794 [5.07298517 2.28670752 0.06178682]
7900    , -4997.98    , array([ 2.48662    ,  0.685824   ,  0.0085372  ])
8000    , -2242.1     , array([ 5.07591    ,  2.28653    ,  0.0617883  ])
42 -2242.0955912655854 [5.07214421 2.28682402 0.06178735]
8100    , -2242.12    , array([ 5.10503    ,  2.2952     ,  0.0618603  ])
43 -2242.0955909277905 [5.07206882 2.28675767 0.06178779]




8096.866807951625 73881.42115007679
8200    , -2242.16    , array([ 5.02606    ,  2.28621    ,  0.0619485  ])
44 -2242.0955903838735 [5.07233397 2.28670428 0.06178707]
8300    , -2243.03    , array([ 5.14835    ,  2.36613    ,  0.0626161  ])
45 -2242.095597853564 [5.07200921 2.28673741 0.06179022]
8400    , -2325.61    , array([ 3.37098    ,  2.26301    ,  0.0624045  ])
46 -2242.09559052443 [5.07217693 2.28677982 0.06178753]
8500    , -2410.94    , array([ 2.99308    ,  3.41939    ,  0.060528   ])
47 -2242.0955913991243 [5.07213136 2.28673884 0.06178606]
8600    , -2273.72    , array([ 7.30219    ,  2.07452    ,  0.0600934  ])
48 -2242.095595440427 [5.07249084 2.28651688 0.06178538]
8700    , -2472.88    , array([ 2.70007    ,  3.36489    ,  0.0628537  ])
49 -2242.0955914088404 [5.0724181  2.28665497 0.06178614]
8800    , -2450.09    , array([ 4.02672    ,  4.45419    ,  0.0674888  ])
50 -2242.0955902830265 [5.07225331 2.28674081 0.06178741]


Next up is secondary contact (SC): 

In [19]:
# define starting param
SC_params = [1, 1, 0.01, 0.01, 0.001, 0.001]
SC_param_names = ["nu1","nu2","T1","T2","m12","m21"]
SC_lower = [1e-3, 1e-3, 1e-4, 1e-4, 1e-6, 1e-6]
SC_upper = [10,   10,   1,   1,   0.5,   0.5]

# checkoutput dir
os.makedirs("dadi", exist_ok=True)

# pick stepsize and write path for cis
eps = 1e-3
ci_path = "dadi/sc_ci.tsv"
if not os.path.exists(ci_path):
    with open(ci_path, "w") as f:
        f.write("rep\tll\tparam\test\tse\tci_low\tci_high\n")

# 20 replicates
for i in range(50):

    # random staring perturbation
    p0 = dadi.Misc.perturb_params(
        SC_params, fold=1,
        upper_bound=SC_upper,
        lower_bound=SC_lower
    )

    # generate model
    popt, ll_model = dadi.Inference.opt(
        p0, fs, sc_ex, pts_l,
        lower_bound=SC_lower,
        upper_bound=SC_upper,
        algorithm=nlopt.LN_BOBYQA,
        maxeval=400,
        verbose=100
    )

    # get model fs
    model_fs = sc_ex(popt, ns, pts_l)

    # theta + real params
    theta0 = dadi.Inference.optimal_sfs_scaling(model_fs, fs)
    Nref   = theta0 / (4 * mu * L)
    Nref=theta0/(4*mu*L)
    nTri=popt[0]*Nref
    nPap=popt[1]*Nref
    t1=popt[2]*2*Nref
    t2=popt[3]*2*Nref
    m12=popt[4]/(2*Nref)
    m21=popt[5]/(2*Nref)
    
    # write real params
    out1 = "\t".join(map(str, [i+1, ll_model, nTri, nPap, t1, t2, m12, m21, theta0])) + "\n"
    with open("dadi/sc_real.tsv", "a") as f:
        f.write(out1)

    # write model params
    out2 = "\t".join(map(str, [i+1, ll_model] + list(popt) + [theta0])) + "\n"
    with open("dadi/sc_model.tsv", "a") as f:
        f.write(out2)

    for eps in [1e-2, 5e-3, 1e-3]:
        try:
            uncert = dadi.Godambe.GIM_uncert(sc_ex, pts_l, boots, popt, fs, eps=eps)
        except np.linalg.LinAlgError:
            print(f"rep {i+1}: Godambe failed (singular); skipping CI")
            continue   # ← must be inside except block
    
        # Only runs if no exception
        if len(uncert) == len(popt) + 1:
            se = uncert[:-1]
        else:
            se = uncert
    
        ci_low  = np.array(popt) - 1.96 * np.array(se)
        ci_high = np.array(popt) + 1.96 * np.array(se)

    with open("dadi/sc_ci.tsv", "a") as f:
        for name, est, se_i, lo, hi in zip(SC_param_names, popt, se, ci_low, ci_high):
            f.write("\t".join(map(str, [i+1, ll_model, name, est, se_i, lo, hi])) + "\n")

    print(i+1, ll_model, popt)

8900    , -2878.03    , array([ 3.4181     ,  1.01526    ,  0.00393982 ,  0.036696   ,  0.000765333,  0.0034631  ])
9000    , -2243.41    , array([ 4.96294    ,  2.34013    ,  0.00316028 ,  0.0585963  ,  0.00488118 ,  0.00321445 ])
1 -2242.278706161022 [4.99166010e+00 2.34187867e+00 5.47779154e-03 5.62039242e-02
 5.70363153e-03 3.47797399e-03]
9100    , -2516.52    , array([ 3.24433    ,  1.45127    ,  0.0394531  ,  0.00683718 ,  0.00112814 ,  0.00228653 ])
9200    , -2242.29    , array([ 5.11613    ,  2.26517    ,  0.0616255  ,  0.0001     ,  0.00211232 ,  0.00138806 ])
2 -2242.2284861743424 [5.06535383e+00 2.28727042e+00 6.16667939e-02 1.00000000e-04
 2.11191239e-03 1.39455160e-03]
9300    , -2365.6     , array([ 3.01147    ,  2.42443    ,  0.0025127  ,  0.0588226  ,  1e-06      ,  0.0040397  ])
9400    , -2242.03    , array([ 5.04294    ,  2.29795    ,  0.0001     ,  0.0616456  ,  8.45685e-06,  0.00891425 ])
3 -2242.0085135107074 [5.07033146e+00 2.28618601e+00 1.00000000e-04 6.17034

Lastly, we'll run IM:

In [20]:
# define starting parameters
IM_params = [1, 1, 0.01, 0.001, 0.001]
IM_param_names = ["nu1","nu2","T","m12","m21"]
IM_lower = [1e-3, 1e-3, 1e-4, 1e-6, 1e-6]
IM_upper = [10,   10,   1,   0.5,   0.5]

# 20 replicates
for i in range(50):

    # random staring perturbation
    p0 = dadi.Misc.perturb_params(
        IM_params, fold=1,
        upper_bound=IM_upper,
        lower_bound=IM_lower
    )

    # generate model
    popt, ll_model = dadi.Inference.opt(
        p0, fs, im_ex, pts_l,
        lower_bound=IM_lower,
        upper_bound=IM_upper,
        algorithm=nlopt.LN_BOBYQA,
        maxeval=400,
        verbose=100
    )

    # get model fs
    model_fs = im_ex(popt, ns, pts_l)

    # theta + real params
    theta0 = dadi.Inference.optimal_sfs_scaling(model_fs, fs)
    Nref   = theta0 / (4 * mu * L)
    Nref=theta0/(4*mu*L)
    nTri=popt[0]*Nref
    nPap=popt[1]*Nref
    t1=popt[2]*2*Nref
    m12=popt[3]/(2*Nref)
    m21=popt[4]/(2*Nref)
    
    # write real params
    out1 = "\t".join(map(str, [i+1, ll_model, nTri, nPap, t1, m12, m21, theta0])) + "\n"
    with open("dadi/im_real.tsv", "a") as f:
        f.write(out1)

    # write model params
    out2 = "\t".join(map(str, [i+1, ll_model] + list(popt) + [theta0])) + "\n"
    with open("dadi/im_model.tsv", "a") as f:
        f.write(out2)

    for eps in [1e-2, 5e-3, 1e-3]:
        try:
            uncert = dadi.Godambe.GIM_uncert(im_ex, pts_l, boots, popt, fs, eps=eps)
        except np.linalg.LinAlgError:
            print(f"rep {i+1}: Godambe failed (singular); skipping CI")
            continue   # ← must be inside except block
    
        # Only runs if no exception
        if len(uncert) == len(popt) + 1:
            se = uncert[:-1]
        else:
            se = uncert
    
        ci_low  = np.array(popt) - 1.96 * np.array(se)
        ci_high = np.array(popt) + 1.96 * np.array(se)

    with open("dadi/im_ci.tsv", "a") as f:
        for name, est, se_i, lo, hi in zip(IM_param_names, popt, se, ci_low, ci_high):
            f.write("\t".join(map(str, [i+1, ll_model, name, est, se_i, lo, hi])) + "\n")
    
    print(i+1, ll_model, popt)

17900   , -2402.32    , array([ 2.79547    ,  2.44195    ,  0.0596482  ,  0.00108506 ,  0.000561187])
1 -2242.0429726240377 [5.05695646e+00 2.28899747e+00 6.18034710e-02 3.21310399e-04
 2.16173943e-03]
18000   , -2666.94    , array([ 2.31389    ,  1.63521    ,  0.0419875  ,  1e-06      ,  1e-06      ])
18100   , -2242       , array([ 5.06978    ,  2.28894    ,  0.0617921  ,  0.000334195,  0.00417272 ])
2 -2241.997253074808 [5.07376704e+00 2.28626969e+00 6.17989218e-02 3.31797612e-04
 4.18398285e-03]
18200   , -2242.23    , array([ 5.13124    ,  2.24041    ,  0.0618461  ,  0.0016216  ,  0.0044672  ])
3 -2241.9458166089917 [5.07078543e+00 2.28586605e+00 6.18051709e-02 1.82270331e-03
 4.53086832e-03]
18300   , -2750.36    , array([ 3.88545    ,  1.27729    ,  0.0347753  ,  0.00217128 ,  0.000671955])
18400   , -2242.01    , array([ 5.07104    ,  2.28587    ,  0.0617876  ,  0.00176106 ,  0.00145572 ])
4 -2242.0127907875594 [5.07162855e+00 2.28602787e+00 6.17919383e-02 1.76164875e-03
 1.456

Write parameters with CIs: