## MS-SMC' validation

In [4]:
from concurrent.futures import ProcessPoolExecutor
from typing import Tuple
import numpy as np
import pandas as pd
import ipcoal
import toyplot, toyplot.svg, toyplot.png
import toytree
from scipy import stats

### Parameters

In [11]:
RECOMB = 2e-9
SPECIES_TREE_HEIGHT = 1e6
NEFF_MIN = 50_000
NEFF_MAX = 500_000
NEFF_VALS = 10
SEED = 123
NLOCI = 4

### Generate a species tree for model 1 tip, 2 tips, or 8 tips

In [12]:
sptree = toytree.tree("(a);")
sptree = sptree.mod.edges_scale_to_root_height(treeheight=SPECIES_TREE_HEIGHT, include_stem=True)
sptree.draw('p');

In [13]:
sptree = toytree.rtree.baltree(2)
sptree = sptree.mod.edges_scale_to_root_height(treeheight=SPECIES_TREE_HEIGHT, include_stem=True)
sptree.draw('p');

In [14]:
sptree = toytree.rtree.baltree(8)
sptree = sptree.mod.edges_scale_to_root_height(treeheight=SPECIES_TREE_HEIGHT, include_stem=True)
sptree.draw('p');

In [25]:
def get_sptree(ntips: int) -> toytree.ToyTree:
    """Return a balanced species tree with the specified number of tips and even branch lengths."""
    if ntips == 1:
        sptree = toytree.tree("(a);")
    else:
        sptree = toytree.rtree.baltree(ntips)
    sptree = sptree.mod.edges_scale_to_root_height(SPECIES_TREE_HEIGHT, include_stem=True)
    return sptree

### Get waiting distances to event x:


In [26]:
def get_sim_and_analytical(sptree: toytree.ToyTree, nsamples: int, neff: int, nsites: int, seed: int, smc: bool) -> Tuple:
    """Return the prob and waiting distance for tree-change and topo-change for analytical and 
    simulations.
    
    Analytical results are computed for the first genealogy in a locus. Simulated results 
    are found by examining subsequent genealogies in the tree sequence until each event
    type is observed. 
    
    This uses some global variables (see above).
    
    Parameters
    ----------
    sptree: ToyTree
        A species tree with edge lengths in units of generations
    nsamples: int
        Number of haploid sampler per species tree tip.
    neff: int
        A diploid effective population size applied to all sptree edges.
    nsites: int
        Number of linked sites to simulate.
    seed: int
        A seed for the random number generator
    """
    model = ipcoal.Model(
        sptree, 
        Ne=neff, 
        seed_trees=seed, 
        nsamples=nsamples, 
        recomb=RECOMB,
        record_full_arg=True,
        ancestry_model="smc_prime" if smc else "hudson",  # use default (Hudson)
    )
    imap = model.get_imap_dict()
    
    # store results in arrays
    # 0 = smc_tree_probs
    # 1 = smc_topo_probs
    # 2 = smc_tree_dists
    # 3 = smc_topo_dists
    # 4 = sim_tree_dists
    # 5 = sim_topo_dists
    # 6 = sum_edge_lengths of genealogy 0
    # 7 = event type
    results = np.zeros(shape=(NLOCI, 8))
    
    # iterate over number of loci (replicates)
    for lidx in range(NLOCI):
        
        # get FULL tree sequence given the parameterized ipcoal.Model
        ts = next(model._get_tree_sequence_generator(nsites=nsites))
        
        # get SIMPLE tree sequence for fetching simplified trees at breakpoints
        # from the FULL `ts`, since no-change breakpoints disappear in `sts`.
        sts = ts.simplify(filter_sites=False)

        # get the starting tree and its sum branch lengths
        tree0 = sts.at(0, sample_lists=True)
        tsumlen0 = tree0.get_total_branch_length()
        results[lidx, 6] = tsumlen0

        # compute analytical probabilities of change given tree0
        toy0 = toytree.tree(tree0.as_newick(node_labels=model.tipdict))
        results[lidx, 0] = ipcoal.smc.get_probability_of_tree_change(model.tree, toy0, imap)
        results[lidx, 1] = ipcoal.smc.get_probability_of_topology_change(model.tree, toy0, imap)
        
        # compute analytical E[waiting distances] given tree0
        tree_rate = tsumlen0 * results[lidx, 0] * RECOMB
        topo_rate = tsumlen0 * results[lidx, 1] * RECOMB
        results[lidx, 2] = stats.expon.mean(scale=1 / tree_rate)
        results[lidx, 3] = stats.expon.mean(scale=1 / topo_rate)
        
        # get the next simulated tree (simplified tree at end of interval 0 of non-simple ts)
        tree1 = sts.at(ts.at(0).interval[1], sample_lists=True)
        
        # store type of first event: 0=no-change, 1=tree-change, 2=topo-change
        if tree0.kc_distance(tree1, lambda_=1):      # diff in blens only
            if tree0.kc_distance(tree1, lambda_=0):  # diff in topology only
                results[lidx, 7] = 2
                results[lidx, 4] = tree1.interval[0]
                results[lidx, 5] = tree1.interval[0]
            else:
                results[lidx, 7] = 1
                results[lidx, 4] = tree1.interval[0]
        else:
            results[lidx, 7] = 0

        # if waiting distances already found then continue to next locus
        if results[lidx, 5]:
            continue
        
        # iterate over trees from index 2 -> End to get waiting distances until the 
        # first observed tree and topo-change events from simulation.
        for idx in range(2, 999999):
            interval = ts.at_index(idx).interval
            tree1 = sts.at(interval[0], sample_lists=True)
                
            # if no difference in branch lengths then go to next idx
            if tree0.kc_distance(tree1, lambda_=1):     # diff in blens only
                if not results[lidx, 4]:
                    results[lidx, 4] = tree1.interval[0]
                if tree0.kc_distance(tree1, lambda_=0):  # diff in topology only
                    results[lidx, 5] = tree1.interval[0]
                    break
    return results

In [29]:
# example results array
sptree = get_sptree(8)
get_sim_and_analytical(sptree, nsamples=1, neff=100000, nsites=500_000, seed=123, smc=False)

array([[5.54764539e-01, 1.53804181e-01, 1.57655692e+02, 5.68656759e+02,
        1.03000000e+02, 2.37900000e+03, 5.71678234e+06, 1.00000000e+00],
       [5.52203657e-01, 2.80267923e-01, 1.86793057e+02, 3.68032873e+02,
        9.90000000e+01, 9.90000000e+01, 4.84741270e+06, 0.00000000e+00],
       [5.38887852e-01, 2.11229182e-01, 2.04351239e+02, 5.21340846e+02,
        1.11400000e+03, 1.11400000e+03, 4.54040231e+06, 0.00000000e+00],
       [5.46230438e-01, 1.19758179e-01, 2.23907949e+02, 1.02126917e+03,
        4.18000000e+02, 8.74000000e+02, 4.08812904e+06, 0.00000000e+00]])

## ----

## Compare results of the Hudson and SMC' methods

In [39]:
def distribute_jobs(sptree: toytree.ToyTree, nsamples: int):
    """Parallelize get_sim_and_analytical() function across Ne values and reps.    
    """
    # compare smc and full
    smc_results = np.zeros((NLOCI * NREPS, NEFF_VALS, 8))
    full_results = np.zeros((NLOCI * NREPS, NEFF_VALS, 8))

    # Ne values to test over
    nes = np.linspace(NEFF_MIN, NEFF_MAX, NEFF_VALS).astype(int)

    # run jobs in parallel to fill array
    rasyncs = {}
    with ProcessPoolExecutor(max_workers=50) as pool:

        # apply a different seed to each rep
        rng = np.random.default_rng(SEED)
        for rep in range(NREPS):
            seed = rng.integers(1e9)

            # apply same seed for each diff value of Ne
            for nidx, neff in enumerate(nes):

                # submit smc' jobs
                kwargs = {
                    "sptree": sptree,
                    "nsamples": nsamples,
                    "neff": neff, 
                    "nsites": int(550000 - neff), 
                    "seed": seed, 
                    "smc": True,
                }
                rasyncs[(nidx, 'smc', rep)] = pool.submit(get_sim_and_analytical, **kwargs)
                
                # submit full (non-smc') jobs
                kwargs = {
                    "sptree": sptree, 
                    "nsamples": nsamples,
                    "neff": neff, 
                    "nsites": int(550000 - neff), 
                    "seed": seed, 
                    "smc": False,
                }
                rasyncs[(nidx, 'full', rep)] = pool.submit(get_sim_and_analytical, **kwargs)

    # collect results into large res array
    for key, future in rasyncs.items():
        nidx, name, rep = key
        ival = slice(NLOCI * rep, NLOCI * (rep + 1))
        if name == 'smc':
            iresults = future.result()
            smc_results[ival, nidx, :] = iresults
        else:
            iresults = future.result()
            full_results[ival, nidx, :] = iresults
           
    np.save(f"./validation_full_{sptree.ntips}pops.npy", full_results)
    np.save(f"./validation_smc_{sptree.ntips}pops.npy", smc_results)

### RUN it

In [40]:
# 10K reps for each job, split into 100 parallelizable jobs
NLOCI = 10
NREPS = 10

In [41]:
sptree = get_sptree(1)
distribute_jobs(sptree, nsamples=8)

In [110]:
sptree = get_sptree(2)
distribute_jobs(sptree, nsamples=4)

In [106]:
sptree = get_sptree(8)
distribute_jobs(sptree, nsamples=1)

### Load results

In [42]:
def load_data(path):
    """Return large array results summarized into a dataframe.
    
    # 0 = smc_tree_probs
    # 1 = smc_topo_probs
    # 2 = smc_tree_dists
    # 3 = smc_topo_dists
    # 4 = sim_tree_dists
    # 5 = sim_topo_dists
    # 6 = sum_edge_lengths of genealogy 0
    # 7 = event type
    """
    results = np.load(path)  
    
    newdata = pd.DataFrame(
        data={
            'tree_smc_prob': results[:, :, 0].mean(axis=0),
            'topo_smc_prob': results[:, :, 1].mean(axis=0),
            'tree_smc_dist': results[:, :, 2].mean(axis=0),
            'topo_smc_dist': results[:, :, 3].mean(axis=0),       
            'tree_sim_prob': (results[:, :, 7] != 0).mean(0),
            'topo_sim_prob': (results[:, :, 7] == 2).mean(0),
            'tree_sim_dist': results[:, :, 4].mean(axis=0),
            'topo_sim_dist': results[:, :, 5].mean(axis=0),

            'tree_smc_prob_CI95':  tuple(zip(
                np.percentile(results[:, :, 0], 2.5, axis=0).round(5),
                np.percentile(results[:, :, 0], 97.5, axis=0).round(5),
            )),
            'topo_smc_prob_CI95':  tuple(zip(
                np.percentile(results[:, :, 1], 2.5, axis=0).round(5),
                np.percentile(results[:, :, 1], 97.5, axis=0).round(5),
            )),

            'tree_smc_dist_CI95':  tuple(zip(
                np.percentile(results[:, :, 2], 2.5, axis=0).round(5),
                np.percentile(results[:, :, 2], 97.5, axis=0).round(5),
            )),
            'topo_smc_dist_CI95':  tuple(zip(
                np.percentile(results[:, :, 3], 2.5, axis=0).round(5),
                np.percentile(results[:, :, 3], 97.5, axis=0).round(5),
            )),
            'tmrca': results[:, :, 6].mean(axis=0),
            
            # error 
            'tree_dist_error': abs(results[:, :, 4] - results[:, :, 2]).mean(axis=0),
            'tree_dist_error_perc': (
                abs(results[:, :, 4] - results[:, :, 2]) / 
                results[:, :, 4]
            ).mean(axis=0),
            'topo_dist_error': abs(results[:, :, 5] - results[:, :, 3]).mean(axis=0),
            'topo_dist_error_perc': (
                abs(results[:, :, 5] - results[:, :, 3]) /
                results[:, :, 5]
            ).mean(axis=0),
            'tree_dist_error_perc_sem': stats.sem(abs(results[:, :, 4] - results[:, :, 2]) /
                results[:, :, 4], axis=0),
            'topo_dist_error_perc_sem': stats.sem(abs(results[:, :, 5] - results[:, :, 3]) /
                results[:, :, 5], axis=0),
        },
        index=np.linspace(NEFF_MIN, NEFF_MAX, NEFF_VALS).astype(int),
    )
    newdata['nsamples'] = NLOCI * NREPS
    return newdata

In [334]:
data = load_data("./validation_full_2pops.npy")
data

Unnamed: 0,tree_smc_prob,topo_smc_prob,tree_smc_dist,topo_smc_dist,tree_sim_prob,topo_sim_prob,tree_sim_dist,topo_sim_dist,tree_smc_prob_CI95,topo_smc_prob_CI95,tree_smc_dist_CI95,topo_smc_dist_CI95,tmrca,tree_dist_error,tree_dist_error_perc,topo_dist_error,topo_dist_error_perc,tree_dist_error_perc_sem,topo_dist_error_perc_sem,nsamples
50000,0.478757,0.111213,676.268799,3654.401925,0.4774,0.1093,675.3757,3582.1332,"(0.35682, 0.58968)","(0.03462, 0.23607)","(400.46634, 1094.71949)","(1130.48907, 9634.67349)",1631340.0,505.554692,5.853635,2699.725566,7.403688,0.30218,0.615305,10000
100000,0.646151,0.225855,363.881016,1321.674949,0.6474,0.2282,362.5759,1303.2978,"(0.53602, 0.72513)","(0.07393, 0.45016)","(219.17838, 579.45744)","(411.92082, 3439.35414)",2258646.0,268.997142,5.543616,968.946987,7.391848,0.255541,0.54398,10000
150000,0.709904,0.313065,260.326753,701.879038,0.7104,0.3126,265.0879,717.6677,"(0.6106, 0.77856)","(0.11164, 0.51751)","(154.84618, 415.39722)","(285.81673, 1775.83932)",2897600.0,193.774727,4.547354,528.130149,5.009915,0.179925,0.244005,10000
200000,0.740947,0.367595,207.779292,471.085088,0.743,0.3687,210.0198,475.3567,"(0.64008, 0.80942)","(0.14678, 0.56616)","(119.75943, 332.09229)","(216.09206, 1117.71679)",3512331.0,155.109291,4.57659,353.180564,5.537589,0.177342,0.277451,10000
250000,0.75843,0.399244,175.429929,365.796188,0.7595,0.405,175.4391,364.2903,"(0.64994, 0.82855)","(0.16764, 0.59391)","(101.35483, 291.65201)","(179.65041, 839.06861)",4090813.0,128.001204,4.574884,268.482098,5.378818,0.169407,0.329674,10000
300000,0.770349,0.421542,152.285179,300.711882,0.7724,0.4228,154.2638,304.1593,"(0.65554, 0.84176)","(0.18132, 0.61643)","(86.30296, 252.68551)","(152.85254, 656.09956)",4672879.0,112.047357,4.324776,223.17694,5.3175,0.153298,0.256465,10000
350000,0.778406,0.434678,134.704801,257.274349,0.781,0.4326,135.8442,259.2106,"(0.66137, 0.85099)","(0.19871, 0.63489)","(75.19905, 227.33365)","(133.13225, 514.27454)",5245334.0,98.78475,4.257851,191.532145,4.968878,0.14666,0.214669,10000
400000,0.783426,0.442923,120.886196,227.550116,0.786,0.4406,121.9483,228.4511,"(0.65704, 0.85879)","(0.19749, 0.64864)","(66.14219, 206.37715)","(118.10011, 444.08066)",5849824.0,88.909483,4.069404,167.146392,4.562119,0.128534,0.174756,10000
450000,0.789172,0.450712,110.766089,204.42916,0.7896,0.4551,112.1042,209.1004,"(0.66305, 0.86532)","(0.20441, 0.65532)","(60.65237, 189.04474)","(109.50605, 387.53868)",6347644.0,81.431988,4.071231,154.139916,4.597892,0.12483,0.170724,10000
500000,0.791365,0.452936,101.172475,186.493868,0.7933,0.4496,101.1601,185.2469,"(0.6602, 0.86852)","(0.20559, 0.66422)","(54.35087, 175.71241)","(99.2669, 354.14556)",6969181.0,73.586737,3.775336,135.633137,4.189815,0.114481,0.151901,10000


# Plot results

### Plot probabilities

In [44]:
def plot_probs(data, topo: bool=False, smc: bool=False):
    """Plot probabilities of tree or topology change.
    
    """      
    smckey = "topo_smc_" if topo else "tree_smc_"
    simkey = "topo_sim_" if topo else "tree_sim_"
    color = toytree.color.COLORS2[0]
    canvas = toyplot.Canvas(width=350, height=300)
    
    # setup axes
    axb = canvas.cartesian(margin=65)
    axt = axb.share("y")
    axb.x.label.text = "N<sub>e</sub> (diploid)"
    axt.x.label.text = "Sptree edge lengths (coal units)"
    axb.y.label.text = "P(topo-change | S,G)" if topo else "P(tree-change | S,G)"
    axb.y.domain.max = 1
    axb.y.domain.min = 0
    axb.x.domain.min = 0
    
    # style axes
    for ax in (axb.x, axt.x, axb.y):
        ax.domain.show = False
        ax.ticks.show = True
        ax.ticks.near = 7.5
        ax.ticks.far = 0
        ax.ticks.labels.offset = 12
        ax.ticks.labels.style["font-size"] = 16
        ax.label.offset = 35
        ax.label.style["font-size"] = 16
        ax.ticks.style["stroke-width"] = 3
        ax.spine.style["stroke-width"] = 3
        
    avgdist = np.min([i.dist for i in sptree if not i.is_root()])
    axb.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(50_000, 500_000, 4),
        #np.linspace(0.5, 5, 5),
    )
    axt.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(50_000, 500_000, 4),
        (avgdist / (2 * np.linspace(50_000, 500_000, 4))).round(1),
    )
        
    # plot data
    axb.hlines([0, 0.5, 1.0], style={"stroke": "grey", "stroke-width": 2, "stroke-dasharray": "6,6", "stroke-opacity": 0.7})
    axb.fill(
       data.index,
       [i[0] for i in data[smckey + "prob_CI95"]],
       [i[1] for i in data[smckey + "prob_CI95"]],
       opacity=0.33,
    )
    axb.plot(data.index, data[smckey + "prob"], stroke_width=2, color=color)
    style = dict(opacity=0.8, color='black', mstyle={"stroke": "none"})
    marks = [
        axb.scatterplot(data.index, data[smckey + "prob"], size=12, color=color, opacity=0.7),
        axb.scatterplot(data.index, data[simkey + "prob"], size=6, marker='s', **style),
    ]
    return canvas

In [45]:
for nspecies in [1, 2, 8]:
    for smc in [True, False]:
        for topo in [True, False]:
            data = load_data(f"./validation_full_{nspecies}pops.npy")
            canvas = plot_probs(data, topo=topo, smc=smc)
            ttt = 'topo' if topo else 'tree'
            #toyplot.svg.render(canvas, f"../manuscript/figures/alternatives/validation-probs-{nspecies}pops-{ttt}.svg")


## Plot distances

In [47]:
def plot_distances(data, topo: bool=False, log: bool=True, smc: bool=False):
    """Plot waiting distances until tree or topology change.
    
    """
    smckey = "topo_smc_" if topo else "tree_smc_"
    simkey = "topo_sim_" if topo else "tree_sim_"
    color = toytree.color.COLORS2[0]
    canvas = toyplot.Canvas(width=350, height=300)
    
    # setup axes
    axb = canvas.cartesian(margin=65)
    axt = axb.share("y")#, yscale="log")
    axb.x.label.text = "N<sub>e</sub> (diploid)"
    axt.x.label.text = "Sptree edge lengths (coal units)"
    axb.y.label.text = "Distance to topo-change" if topo else "Distance to tree-change"
    axb.x.domain.min = 0
        
    if log:
        axt.y.scale = "log"
        axb.y.domain.min = 50
        axb.y.domain.max = 1e4
    else:
        axb.y.domain.min = 10
        axb.y.domain.max = 10000
    
    # style axes
    for ax in (axb.x, axt.x, axb.y):
        ax.domain.show = False
        ax.ticks.show = True
        ax.ticks.near = 7.5
        ax.ticks.far = 0
        ax.ticks.labels.offset = 12
        ax.ticks.labels.style["font-size"] = 16
        ax.label.offset = 35
        ax.label.style["font-size"] = 16
        ax.ticks.style["stroke-width"] = 3
        ax.spine.style["stroke-width"] = 3

    avgdist = np.min([i.dist for i in sptree if not i.is_root()])
    axb.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(NEFF_MIN, NEFF_MAX, 4),
    )
    axt.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(NEFF_MIN, NEFF_MAX, 4),
        (avgdist / (2 * np.linspace(NEFF_MIN, NEFF_MAX, 4))).round(1),
    )
    axt.y.ticks.locator = toyplot.locator.Explicit([100, 1000, 10000])

    axb.hlines([100, 1000, 10000], style={"stroke": "grey", "stroke-width": 2, "stroke-dasharray": "6,6", "stroke-opacity": 0.7})

    # plot data
    axb.fill(
       data.index,
       [i[0] for i in data[smckey + "dist_CI95"]],
       [i[1] for i in data[smckey + "dist_CI95"]],
       opacity=0.33,
    )
    axb.plot(data.index, data[smckey + "dist"], stroke_width=3, color=color)
    style = dict(opacity=0.8, color='black', mstyle={"stroke": "none"})
    marks = [
        axb.scatterplot(data.index, data[smckey + "dist"], size=12, marker="o", color=color, opacity=0.7),
        axb.scatterplot(data.index, data[simkey + "dist"], size=6, marker='s', **style),
    ]
    return canvas

In [48]:
for nspecies in [1, 2, 8]:
    for smc in [True, False]:
        for topo in [True, False]:
            data = load_data(f"./validation_full_{nspecies}pops.npy")
            canvas = plot_distances(data, topo=topo, log=True, smc=smc)
            ttt = 'topo' if topo else 'tree'
            #toyplot.svg.render(canvas, f"../manuscript/figures/alternatives/validation-dists-{nspecies}pops-{ttt}.svg")


## Plot error

In [310]:
def plot_error(data_full, data_smc, topo:bool=False):
    """Plot the percent error for waiting distances between the analytical expectation
    and result of stochastic simulation, when data were simulated under either the
    full coalescent with recombination, or the SMC'.    
    """
    
    smckey = "topo_" if topo else "tree_"
    simkey = "topo_" if topo else "tree_"
    color = toytree.color.COLORS2[0]
    canvas = toyplot.Canvas(width=350, height=300)
    
    # setup axes
    axb = canvas.cartesian(margin=65)
    axt = axb.share("y")#, yscale="log")
    axb.x.label.text = "N<sub>e</sub> (diploid)"
    axt.x.label.text = "Sptree edge lengths (coal units)"
    axb.y.label.text = "Mean percent error"
    axb.x.domain.min = 0
    axb.y.domain.min = 0
    axb.y.domain.max = 10
    
    # style axes
    for ax in (axb.x, axt.x, axb.y):
        ax.domain.show = False
        ax.ticks.show = True
        ax.ticks.near = 7.5
        ax.ticks.far = 0
        ax.ticks.labels.offset = 12
        ax.ticks.labels.style["font-size"] = 16
        ax.label.offset = 35
        ax.label.style["font-size"] = 16
        ax.ticks.style["stroke-width"] = 3
        ax.spine.style["stroke-width"] = 3

    avgdist = np.min([i.dist for i in sptree if not i.is_root()])
    axb.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(NEFF_MIN, NEFF_MAX, 4),
    )
    axt.x.ticks.locator = toyplot.locator.Explicit(
        np.linspace(NEFF_MIN, NEFF_MAX, 4),
        (avgdist / (2 * np.linspace(NEFF_MIN, NEFF_MAX, 4))).round(1),
    )

    axb.hlines([5], style={"stroke": "grey", "stroke-width": 2, "stroke-dasharray": "6,6", "stroke-opacity": 0.7})

    # plot data
    axb.fill(
       data.index,
       data_full[smckey + "dist_error_perc"] - data_full[smckey + "dist_error_perc_sem"],
       data_full[smckey + "dist_error_perc"] + data_full[smckey + "dist_error_perc_sem"],
       opacity=0.33,
    )
    axb.fill(
       data.index,
       data_smc[smckey + "dist_error_perc"] - data_smc[smckey + "dist_error_perc_sem"],
       data_smc[smckey + "dist_error_perc"] + data_smc[smckey + "dist_error_perc_sem"],
       opacity=0.33, color='black',
    )
    style = dict(opacity=0.8, color='black', mstyle={"stroke": "none"})
    marks = [
        axb.plot(data_full.index, data_full[smckey + "dist_error_perc"], size=6, marker="o", color=color, opacity=0.7),
        axb.plot(data_smc.index, data_smc[simkey + "dist_error_perc"], size=6, marker='s', **style),
    ]
    return canvas

In [313]:
for nspecies in [1, 2, 8]:
    for topo in [True, False]:
        fdata = load_data(f"./validation_full_{nspecies}pops.npy")
        sdata = load_data(f"./validation_smc_{nspecies}pops.npy")
        canvas = plot_error(fdata, sdata, topo=topo)
        #toyplot.svg.render(canvas, "../manuscript/figures/alternatives/error-{nspecies}-{topo}.svg")