In [1]:
import pandas as pd
import numpy as np
import math
import os
import pickle
from collections import OrderedDict
from IPython.display import display, clear_output, Markdown, HTML
import csv

from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.io as pio
from plotly.colors import DEFAULT_PLOTLY_COLORS

from simulator import simulate as _simulate
from simulator_plotting import *

init_notebook_mode(connected=True)

def simulate(landscape, **kwargs):
    # wrapper function for the c extension's simulate

    # landscape could contain Series or ndarrays. convert them to lists.
    try:
        landscape = [i if isinstance(i, list) else i.tolist() for i in landscape]
    except (TypeError, AttributeError) as err:
        raise TypeError('invalid landscape') from err
    
    return _simulate(landscape, **kwargs)


pairs = [('CTX', 'SAM'), ('AM', 'AMC'), ('ZOX', 'CXM')]
pairs = [[dataset2.loc[item] for item in pair] for pair in pairs]

# Saving results as CSV
For use in R with ggmuller

In [2]:
def results_to_csv(results, short_name, name=None):
    if not name:
        name = short_name
    tree_file = 'march/{}_tree.csv'.format(short_name)
    pop_file = 'march/{}_pop.csv'.format(short_name)
    np.savetxt('march/{}_name.txt'.format(short_name), ['"' + name + '"'], fmt='%s')
    tr = results['trace']
    phylo = results['phylogeny']
    t_parents=[]
    t_idents=[]
    for ident, parent in enumerate(phylo):
        if parent != -2 and parent != -1:
            t_parents.append(parent)
            t_idents.append(ident)
    t_idents_ = ["{0:0>4b}".format(x) for x in t_idents]
    t_parents = ["{0:0>4b}".format(x) for x in t_parents]
    treedf = pd.DataFrame(data={'Parent': t_parents, 'Identity': t_idents_})
    treedf.to_csv(tree_file, quoting=csv.QUOTE_NONNUMERIC, index_label=False)

    sz = len(tr['0000'])
    df = pd.DataFrame(tr)
    gens=[]
    idents=[]
    pops=[]
    for i in range(sz):
        if i % 10 != 0: # only include every 10th timestep for smoother plot
            continue
        for ident, pop in enumerate(df.loc[i]):
            if ident in t_idents or ident == 0:
                gens.append(i)
                idents.append(ident)
                pops.append(math.log10(pop) if pop > 1 else 0) # log transform for more reasonable plot
    idents = ["{0:0>4b}".format(x) for x in idents]
    newdf = pd.DataFrame(data={'Generation': gens, 'Identity': idents, 'Population': pops})
    newdf.set_index('Generation', inplace=True)
    newdf.to_csv(pop_file, quoting=csv.QUOTE_NONNUMERIC)
    return tree_file, pop_file

In [3]:
results_to_csv(simulate([dataset1[2].iloc[4].tolist()], carrying_cap=10**9), 'ds1_2_4_K9', 'P. vivax treated with 53.60uM pyrimethamine at K=10^9')

('march/ds1_2_4_K9_tree.csv', 'march/ds1_2_4_K9_pop.csv')

In [None]:
results_to_csv(simulate([dataset1[2].iloc[4].tolist()], carrying_cap=10**6), 'ds1_2_4_K6', 'P. vivax treated with 53.60uM pyrimethamine at K=10^6')

In [None]:
# fails
results_to_csv(simulate([dataset1[2].iloc[4].tolist()], carrying_cap=10**4), 'ds1_2_4_K4', 'P. vivax treated with 53.60uM pyrimethamine at K=10^4')

In [10]:
results_to_csv(simulate(pairs[1], frequency=200), 'AM+AMC at f=200')

('march/AM+AMC at f=200_tree.csv', 'march/AM+AMC at f=200_pop.csv')

In [12]:
simulate(pairs[1], frequency=200)

{'trace': {'0000': array([1000000000,  999999991,  999999982, ...,          0,          0,
                  0], dtype=int64),
  '0001': array([0, 2, 5, ..., 0, 0, 0], dtype=int64),
  '0010': array([0, 3, 6, ..., 0, 0, 0], dtype=int64),
  '0011': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '0100': array([0, 2, 5, ..., 0, 0, 0], dtype=int64),
  '0101': array([ 0,  0,  0, ...,  9, 10,  9], dtype=int64),
  '0110': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '0111': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1000': array([0, 2, 4, ..., 0, 0, 0], dtype=int64),
  '1001': array([0, 0, 0, ..., 5, 5, 5], dtype=int64),
  '1010': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1011': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1100': array([0, 0, 0, ..., 7, 7, 6], dtype=int64),
  '1101': array([        0,         0,         0, ..., 999999964, 999999965,
         999999966], dtype=int64),
  '1110': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1111': array([ 0,  0,  0, ..., 14, 14

In [156]:
for n in ['CTX', 'SAM', 'AM', 'AMC', 'ZOX', 'CXM']:
    results_to_csv(simulate([dataset2.loc[n].tolist()]), n)

In [158]:
# CTX and SAM one switch only
results = simulate(pairs[0], durations=[75, 1125])
results_to_csv(results, 'CTX+SAM', 'CTX + SAM with a single switch')
results['T_f']

125

In [159]:
# ZOX and CXM
results = simulate(pairs[2], durations=[50, 1150])
results_to_csv(results, 'ZOX+CXM', 'ZOX + CXM with a single switch')
results['T_f']

72

In [160]:
# 3 switches
AM = dataset2.loc['AM'].tolist()
AMC = dataset2.loc['AMC'].tolist()

results = simulate([AM, AMC, AM, AMC, AM, AMC], durations=[75, 75, 75, 75, 75, 1000])
results_to_csv(results, 'AM+AMC', 'AM + AMC with 5 switches')
results['T_f']

529

In [162]:
res = simulate([dataset1[2].iloc[4].tolist()], carrying_cap=10**9)
display(res)
plot_simulation(res)

{'trace': {'0000': array([1000000000,  999999983,  999999946, ...,          0,          0,
                  0], dtype=int64),
  '0001': array([0, 3, 7, ..., 0, 0, 0], dtype=int64),
  '0010': array([ 0,  5, 21, ...,  0,  0,  0], dtype=int64),
  '0011': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '0100': array([0, 3, 8, ..., 0, 0, 0], dtype=int64),
  '0101': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '0110': array([     0,      0,      0, ..., 472838, 462094, 451595], dtype=int64),
  '0111': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1000': array([ 0,  6, 18, ...,  0,  0,  0], dtype=int64),
  '1001': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1010': array([ 0,  0,  0, ..., 36, 36, 36], dtype=int64),
  '1011': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1100': array([0, 0, 0, ..., 6, 7, 7], dtype=int64),
  '1101': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
  '1110': array([        0,         0,         0, ..., 999527096, 999537840,
         999548340], dtype=int64)

In [163]:
results_to_csv(res, 'ds1_2_4_K9', 'P. vivax treated with 53.60uM pyrimethamine at K=10^9')

('march/ds1_2_4_K9_tree.csv', 'march/ds1_2_4_K9_pop.csv')

In [176]:
[np.log10(i) if i != 0 else 0 for i in pd.DataFrame(res['trace']).iloc[1200]]

[0,
 0,
 0,
 0,
 0,
 0,
 5.6547491248285935,
 0,
 0,
 0,
 1.5563025007672873,
 0,
 0.8450980400142568,
 0,
 8.999803802243628,
 1.3424226808222062]

In [180]:
8.999803802243628 / (5.6547491248285935 + 1.5563025007672873 + 0.8450980400142568 + 8.999803802243628 + 1.3424226808222062) /2

0.2445814709275659

In [184]:
plot_simulation(res, ['0000', '0001', '0010', '0100','1000','0011','0110','1010','1110','0111','1111','1100'])

In [3]:
r = dataset1[2].iloc[4].tolist()
r

[0.0,
 0.011000000000000001,
 1.396,
 0.82,
 0.48700000000000004,
 0.963,
 1.4869999999999999,
 1.4709999999999999,
 1.006,
 1.234,
 1.429,
 1.0,
 1.011,
 1.077,
 1.51,
 1.393]

## Pure Python simulator that records all paths

In [2]:
num_loci = 4
num_genotypes = 1 << num_loci

paths = [[0]]
def all_paths(r, current_path=[0]):
    current_genotype = current_path[-1]
    mutations = [current_genotype ^ (1 << i) for i in range(num_loci)]
    beneficial_mutations = [m for m in mutations if r[m] > r[current_genotype]]
    potential_paths = [current_path + [m] for m in beneficial_mutations]
    for p in potential_paths:
        if p not in paths:
            paths.append(p)
            all_paths(r, p)
#all_paths(r)
#[["{0:0>4b}".format(n) for n in path] for path in paths]

def greedy_path(r, seed=0):
    greedy_path = (seed,)
    current_genotype = seed
    while True:
        mutations = [current_genotype ^ (1 << i) for i in range(num_loci)]
        better_genotype = False
        for mut in mutations:
            if r[mut] > r[current_genotype]:
                better_genotype = True
                current_genotype = mut
        if better_genotype:
            greedy_path += (current_genotype,)
            continue
        else:
            break
    return greedy_path

def local_optima(r):
    optima = []
    for genotype, rate in enumerate(r):
        mutations = [genotype ^ (1 << i) for i in range(num_loci)]
        better_genotype = False
        for mut in mutations:
            if r[mut] > rate:
                better_genotype = True
        if not better_genotype:
            optima.append(genotype)
    optima.remove(np.argmax(r))
    return optima

In [86]:
r = dataset1[2].iloc[4].tolist()
','.join(['{0:0>4b}'.format(g) for g in local_optima(r)])

'1001,1110'

In [3]:
def remove_regressions(N):
    paths_to_delete = []
    paths_to_add = {}
    for path, n in N.items():
        new_path = ()
        i = 0
        while i < len(path):
            g1 = path[i]
            new_path += (g1,)
            for j, g2 in enumerate(path[i+1:]):
                if g1 == g2:
                    i = i+j+1
            i += 1
        if new_path != path:
            paths_to_delete.append(path)
            if new_path not in paths_to_add:
                paths_to_add[new_path] = n
            else:
                paths_to_add[new_path] += n
    for p in paths_to_delete:
        del N[p]
    for p, n in paths_to_add.items():
        if p not in N:
            N[p] = n
        else:
            N[p] += n
    return(N)
def remove_zeros(N):
    paths_to_delete = []
    for path, n in N.items():
        if n == 0:
            paths_to_delete.append(path)
    for p in paths_to_delete:
        del N[p]
    return(N)

In [484]:
remove_regressions((0, 2, 0, 2, 6, 14))

(0, 2, 6, 14)

In [562]:
r = dataset1[2].iloc[4].tolist()
#r = dataset2.loc['SAM'].tolist()

{(0, 2, 6): 411299,
 (0, 2, 6, 14): 984986174,
 (0, 2, 6, 2, 6, 14): 14602353,
 (0, 2, 6, 2, 6, 14, 15): 11,
 (0, 2, 6, 2, 6, 14, 6): 60,
 (0, 2, 6, 14, 10): 17,
 (0, 2, 6, 2, 6, 14, 12): 5,
 (0, 2, 6, 14, 12): 2,
 (0, 2, 6, 2, 6, 14, 10): 16,
 (0, 2, 6, 14, 15): 10,
 (0, 2, 6, 14, 6): 54}

In [6]:
#pure python sim

num_loci = 4
num_genotypes = 1 << num_loci

def simulate_py(landscape, K = 10**9, P = 10**-8, seed_genotype = 0, simulation_length = 1200):
    r = landscape.tolist()
    N = {(seed_genotype,): K}
    first_appearances = {(seed_genotype,): (0,)}
    trace = []
    trace.append([K if i == seed_genotype else 0 for i in range(num_genotypes)])
    optimal_genotype = np.argmax(r)
    critical_times = [None, None, None]
    for t in range(1, simulation_length+1):
        # growth
        for path, n in N.items():
            fractpart, intpart = math.modf(n * math.exp(r[path[-1]]))
            if np.random.random() < fractpart:
                intpart += 1
            N[path] = int(intpart)
        # mutation
        mutants = [[0, []] for i in range(num_genotypes)]
        for path, n in N.items():
            if n == 0:
                continue
            mutants[path[-1]][0] += n
            mutants[path[-1]][1].append(path)
        for i, (n, paths) in enumerate(mutants):
            if n == 0:
                continue
            mu = n * P
            n = np.random.poisson(mu) # expected number of mutants
            for j in range(n):
                mutant_path = paths[np.random.randint(len(paths))]
                mutant_genotype = mutant_path[-1]
                locus = np.random.randint(num_loci)
                mutated_genotype = mutant_genotype ^ (1 << locus);
                mutated_path = (*mutant_path, mutated_genotype)
                if mutated_path in N:
                    N[mutated_path] += 1
                else:
                    N[mutated_path] = 1
                    first_appearances[mutated_path] = first_appearances[mutant_path] + (t,)
                N[mutant_path] -= 1
                if N[mutant_path] == 0:
                    paths.remove(mutant_path)
        # death
        sigmaN = sum(N.values())
        if sigmaN == 0:
            break
        for path, n in N.items():
            fractpart, intpart = math.modf((n / sigmaN) * K)
            if np.random.random() < fractpart:
                intpart += 1
            N[path] = int(intpart)
        # record-keeping
        row = [0]*num_genotypes
        for path, n in N.items():
            row[path[-1]] += n
        trace.append(row)
        for i, multiplier in enumerate([0, 0.5, 0.99]):
            if critical_times[i] is not None:
                continue
            if row[optimal_genotype] > multiplier * K:
                critical_times[i] = t
    # clean-up
    #print(len(N))
    #N = remove_zeros(N)
    #N = remove_regressions(N)
    #{tuple(["{0:0>4b}".format(n) for n in k]):v for (k, v) in N.items()}
    return {'N': N,'trace': trace, 'opt': optimal_genotype, 'crit': critical_times}

In [7]:
simulate_py(dataset1[2].iloc[4])

{'N': {(0,): 0,
  (0, 4): 0,
  (0, 2): 0,
  (0, 1): 0,
  (0, 8): 0,
  (0, 2, 10): 0,
  (0, 2, 6): 202135,
  (0, 2, 3): 0,
  (0, 2, 0): 0,
  (0, 2, 0, 8): 0,
  (0, 8, 12): 0,
  (0, 2, 6, 7): 0,
  (0, 2, 6, 2): 0,
  (0, 2, 6, 14): 992882823,
  (0, 2, 6, 4): 0,
  (0, 2, 6, 2, 6): 0,
  (0, 2, 6, 2, 10): 0,
  (0, 2, 6, 2, 0): 0,
  (0, 2, 6, 2, 3): 0,
  (0, 2, 6, 2, 6, 2): 0,
  (0, 2, 6, 2, 6, 7): 0,
  (0, 2, 6, 2, 6, 14): 6914863,
  (0, 2, 6, 2, 6, 4): 0,
  (0, 2, 6, 2, 6, 2, 10): 0,
  (0, 2, 6, 2, 6, 2, 3): 0,
  (0, 2, 6, 2, 6, 2, 0): 0,
  (0, 2, 6, 2, 6, 2, 6): 0,
  (0, 2, 6, 14, 12): 3,
  (0, 2, 6, 2, 6, 14, 6): 53,
  (0, 2, 6, 2, 6, 14, 12): 3,
  (0, 2, 6, 14, 15): 12,
  (0, 2, 6, 14, 10): 19,
  (0, 2, 6, 2, 6, 14, 15): 15,
  (0, 2, 6, 2, 6, 14, 10): 16,
  (0, 2, 6, 14, 6): 58,
  (0, 2, 6, 14, 6, 7): 0,
  (0, 2, 6, 14, 6, 4): 0,
  (0, 2, 6, 14, 6, 14): 0,
  (0, 2, 6, 14, 6, 14, 10): 0,
  (0, 2, 6, 14, 6, 14, 6): 0,
  (0, 2, 6, 14, 6, 14, 15): 0,
  (0, 2, 6, 2, 6, 14, 6, 14): 0,
  (0, 2,

In [75]:
dataset1[2].iloc[4]

Genotype
0000    0.000
0001    0.011
0010    1.396
0011    0.820
0100    0.487
0101    0.963
0110    1.487
0111    1.471
1000    1.006
1001    1.234
1010    1.429
1011    1.000
1100    1.011
1101    1.077
1110    1.510
1111    1.393
Name: 53.60μM, dtype: float64

In [543]:
df = pd.DataFrame(trace, columns=["{0:0>4b}".format(n) for n in range(num_genotypes)])
tr = {k:list(v.values()) for (k,v) in df.to_dict().items()}
plot_simulation({'trace': tr, 'T_1': -1, 'T_d': -1, 'T_f': -1}, genotypes_to_plot=["{0:0>4b}".format(n) for n in (0, 1, 9, 13, 15)])

In [544]:
plot_simulation(simulate([r]))

## Preliminary analysis tables

In [34]:
def many_simulations(landscape, param={}, num=100):
    r = landscape.tolist()
    greedy = greedy_path(r)
    paths = {greedy: 0}
    fixation_times = []
    global_optimum = '{0:0>4b}'.format(np.argmax(r))
    loc_optima = ', '.join(['{0:0>4b}'.format(g) for g in local_optima(r)])
    for i in range(num):
        results = simulate_py(landscape, **param)
        if results['crit'][2] is not None:
            fixation_times.append(results['crit'][2])
        for path, count in results['N'].items():
            if path not in paths:
                paths[path] = count
            else:
                paths[path] += count
            
    return {
        'Success rate': len(fixation_times) / num,
        '# of paths': len(paths),
        'Path frequencies': paths.values(),
        'Greedy path': ','.join(['{0:0>4b}'.format(g) for g in greedy]),
        'Greedy rate': paths[greedy] / sum(paths.values()),
        'Avg time to fixation': '{} ± {:.1f}'.format(np.mean(fixation_times), np.std(fixation_times)) if len(fixation_times) > 0 else '',
        'Global optimum': global_optimum,
        'Local optima': loc_optima
    }

# ordering
column_names =  ['Success rate', '# of paths', 'Path frequencies', 'Greedy path', 
                 'Greedy rate', 'Local optima', 'Global optimum', 'Avg time to fixation']   

def many_landscapes(param={}, df=dataset2):
    data = []
    for name, ls in df.iterrows():
        display('Running simulations on {}...'.format(name))
        if name == 'ZOX':
            row = many_simulations(ls, dict(param, simulation_length=10000))
        else:
            row = many_simulations(ls, param)
        row['Name'] = name
        data.append(row)
    clear_output()
    return pd.DataFrame(data).set_index('Name').reindex(column_names, axis='columns')

### Figure 2

In [35]:
df = many_landscapes()

In [36]:
table1 = ['Greedy path', 'Local optima', 'Global optimum']
df[table1]

Unnamed: 0_level_0,Greedy path,Local optima,Global optimum
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMP,10011,"0011, 0110",1111
AM,10,0010,1101
CEC,100,"0100, 1110",11
CTX,100011,"0011, 0110, 1010",1111
ZOX,1000110111,1001,111
CXM,100,0100,111
CRO,100,"0011, 0100, 1010",1111
AMC,100,0100,1101
CAZ,10101,"0011, 0101",110
CTT,100,"0100, 1000, 1101, 1110",111


### Figure 3

In [37]:
table2 = ['Success rate', 'Greedy rate', 'Avg time to fixation']
df[table2]

Unnamed: 0_level_0,Success rate,Greedy rate,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMP,0.0,0.999996,
AM,0.0,1.0,
CEC,0.0,1.0,
CTX,0.01,0.99,1030.0 ± 0.0
ZOX,1.0,0.992158,3856.3 ± 27.9
CXM,0.0,1.0,
CRO,0.0,0.6,
AMC,0.0,1.0,
CAZ,1.0,0.0,124.43 ± 6.3
CTT,0.0,1.0,


## Comparing switching frequencies with violin plots

In [2]:
def highlight_yes(v):
    if v == 'Yes':
        return 'font-weight: bold'
    else:
        return ''
def highlight_max(s):
    is_max = s == s.max()
    return ['font-style: italic' if v else '' for v in is_max]

def pathway_analysis(landscape, num=100, pretty=False, **kwargs):
    if isinstance(landscape, list):
        name = '{} + {}'.format(landscape[0].name, landscape[1].name)
        landscape = [ls.tolist() for ls in landscape]
    else:
        name = landscape.name
        landscape = landscape.tolist()
    #success_count = 0
    greedy_path = ''
    paths = {}
    times = [] # T_f's
    global_optimum = ''
    local_optima = []
    for i in range(num):
        results = simulate(landscape, **kwargs)
        if not global_optimum:
            global_optimum = results['global_optimum']
            local_optima = ', '.join(results['local_optima'])
        if not greedy_path:
            greedy_path = ','.join(results['greedy_path'])
            paths[greedy_path] = [0, 0]
        actual_path = ','.join(results['actual_path'])
        if actual_path in paths:
            paths[actual_path][0] += 1
        else:
            paths[actual_path] = [1, 0] # (appearances, successful appearances)
        if results['T_f'] != -1:
            paths[actual_path][1] += 1
        times.append(results['T_f'])
    data = []
    total_success_count = sum([sc for c, sc in paths.values()])
    num_paths = sum([c > 0 for c, sc in paths.values()])
    num_successful_paths = sum([sc > 0 for c, sc in paths.values()])
    for path, (count, success_count) in paths.items():
        successful = success_count > 0
        row = OrderedDict()
        row['Pathway'] = path
        row['Successful?'] = 'Yes' if successful else 'No'
        row['Greedy?'] = 'Yes' if path is greedy_path else 'No'
        row['Number of appearances'] = count
        row['Weight'] = success_count / total_success_count if successful else None
        data.append(row)
    df = pd.DataFrame(data)
    s = df.style.applymap(highlight_yes, subset='Successful?').apply(highlight_max, subset='Weight')
    max_weight = df['Weight'].max()
    if not np.isnan(max_weight):
        dominant_path = df.iloc[df['Weight'].idxmax()]['Pathway']
    else:
        dominant_path = ""
    if pretty:
        display(s)
        display(Markdown(f'Global optimum: {global_optimum}'))
        display(Markdown(f'Weight of the dominant successful pathway: {max_weight}'))
        display(Markdown(f'Success rate: {num_successful_paths / num_paths}'))
    else:
        return {
            'paths': df,
            'global_optimum': global_optimum,
            'max_weight': max_weight,
            'success_count': total_success_count,
            'success_rate': num_successful_paths / num_paths,
            'dominant_path': dominant_path,
            'times': times
        }

def box_switching(landscapes, frequencies=[200,175,150,125,100,75,50,25,20,15,10,5,1], k=9, num=100, **kargs):
    global figin
    name = '{} + {}'.format(landscapes[0].name, landscapes[1].name)
    data = []
    wx = []
    wy = []
    shapes = []
    param = dict(kargs)
    param['carrying_cap'] = 10**k
    for i, f in enumerate(frequencies):
        param['frequency'] = f
        analysis = pathway_analysis(landscapes, num=num, **param)
        times = [t for t in analysis['times'] if t != -1]
        trace = dict(
            type = 'violin',
            #x = [f for i in range(len(times))],
            y = times,
            name = 'f={}'.format(f),
            points = False
            #meanline = {"visible": True}
        )
        data.append(trace)
        wx.append('f={}'.format(f))
        wy.append(analysis['success_count'] / num)
        if times:
            shapes.append(dict(
                type = 'line',
                xref = 'x',
                yref = 'y',
                x0 = i - 0.4,
                x1 = i + 0.4,
                y0 = np.mean(times),
                y1 = np.mean(times),
                line = {'color': DEFAULT_PLOTLY_COLORS[i % 10]}
                #layer = 'below'
            ))
    data.append(go.Scatter(
        x=wx,
        y=wy,
        name = 'Success rate',
        yaxis = 'y2',
        marker = dict(
            color = 'rgb(0, 0, 0)'
        ),
        mode='markers'
    ))
    layout = go.Layout(
        title = 'Running {} simulations on {} at different frequencies, K=10<sup>{}</sup>'.format(num, name, k),
        xaxis = dict(
            title = 'Switching frequency',
            tickvals = ['f={}'.format(f) for f in frequencies]
        ),
        yaxis = dict(
            title = 'Time to fixation',
            range=[0,1400]
        ),
        yaxis2 = dict(
            title = 'Success rate',
            overlaying = 'y',
            side = 'right',
            range=[-0.2,1.2],
            tickvals=[0,0.2,0.4,0.6,0.8,1.0],
            zeroline=False
        ),
        showlegend = False,
        shapes = shapes
    )
    fig = go.Figure(data=data, layout=layout)
    return fig

In [3]:
for p in pairs:
    fig = box_switching(p)
    iplot(fig, show_link=False)
    pio.write_image(fig, 'report/violins/{}.png'.format('+'.join([ls.name for ls in p])), scale=3)

p = pairs[0]
fig = box_switching(p, k=7)
iplot(fig, show_link=False)
pio.write_image(fig, 'report/violins/{} 7.png'.format('+'.join([ls.name for ls in p])), scale=3)

## Basin size and switching

In [10]:
def basin_size(landscape, num=10, **kwargs):
    size = 0
    if isinstance(landscape, list):
        name = ' + '.join([ls.name for ls in landscape])
        landscape = [ls.tolist() for ls in landscape]
    else:
        name = landscape.name
        landscape = landscape.tolist()
    param = dict(kwargs)
    counts = {}
    for seed in range(16):
        param['seed'] = seed
        for i in range(num):
            results = simulate(landscape, **param)
            #if results['T_f'] != -1:
            final_genotype = results['actual_path'][-1]
            if final_genotype in counts:
                counts[final_genotype] += 1
            else:
                counts[final_genotype] = 1
    return {gen: count / num for gen, count in counts.items()}

#{ls : basin_size([dataset2.loc[ls]]) for ls in dataset2.index}

In [11]:
for k in [9,7,6,5,4]:

    frequencies=[200,100,75,50,25,20,15,10,5,1]
    data=[go.Scatter(
        y=[basin_size(p, frequency=f, carrying_cap=10**k, timesteps=2000)[opt] for f in frequencies],
        x=frequencies,
        name=' + '.join([ls.name for ls in p])
    ) for p,opt in zip(pairs, ['1111', '1101', '0111'])]

    layout=go.Layout(
        xaxis=dict(
            range=[210,-10],
            tickvals=[200,150,100,75,50,25,20,15,10,5,1],
            zeroline=False,
            title='Switching frequency'
        ),
        yaxis=dict(
            title='Basin size'
        ),
        title=f'Basin size of the optimal genotype at different frequencies, K=10<sup>{k}</sup>',
        width=1000
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig, show_link=False)
    pio.write_image(fig, 'report/opt freq {}.png'.format(k), scale=3)

### Figure 1

In [8]:
data=[]
for k, x_offset in zip([9, 7, 5], [-.1, 0, .1]):
    x=[]
    y=[]
    for g in range(15):
        times = [simulate([dataset2.iloc[g]], carrying_cap=10**k, timesteps=3000)['T_f'] for i in range(100)]
        for t in times:
            if t != -1:
                x.append(g + x_offset)
                y.append(t)
    data.append(dict(type='box', x=x, y=y, name='10<sup>{}</sup>'.format(k)))

In [9]:
layout = go.Layout(
    xaxis=dict(
        range=[-1,15],
        tickvals=list(range(15)),
        ticktext=dataset2.index.tolist(),
        zeroline=False,
        title='Landscape'
    ),
    yaxis=dict(
        range=[0,3100],
        title='Time to fixation'
    ),
    title='Running simulations using three different carrying capacities',
    width=1000,
    height=500,
    legend=dict(orientation="h", y=1.1, x=0),
    margin=go.layout.Margin(
        l=50,
        r=10,
        b=35,
        t=90,
        pad=0
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False, config={"displayModeBar": False})
pio.write_image(fig, 'report/K comparison box1.png', scale=3)

### Figure 8

In [26]:
data=[]
for name in dataset2.index:
    bs = basin_size([dataset2.loc[name]])
    display(bs)
    y_vals = {'{0:0>4b}'.format(i) : 0 for i in range(num_genotypes)}
    for k,v in bs.items():
        y_vals[k] = v
    data.append(go.Scatter(
        y=list(y_vals.values()),
        name=name
    ))
fig = tools.make_subplots(rows=len(data), cols=1, print_grid=False)
for i, d in enumerate(data, 1):
    fig.append_trace(d, i, 1)
    fig['layout'][f'xaxis{i}'].update(
        ticktext=[format(i, '04b') for i in range(16)],
        tickvals=list(range(16)),
        range=[-0.1,15.1],
        zeroline=False
    )
    fig['layout'][f'yaxis{i}'].update(
        range=[-1,17],
        tickvals=[0,8,16],
        title=d.name,
        showticklabels=True
    )
    
fig['layout'].update(
    height=1050,
    width=850,
    showlegend=False,
    title='Basin sizes for static landscapes',
    margin=go.layout.Margin(r=20,l=50,t=50, b=30)
)
iplot(fig, show_link=False)
#pio.write_image(fig, 'report/figx2.png', scale=3)

{'0011': 5.0, '1111': 10.0, '0110': 1.0}

{'0010': 6.0, '1101': 10.0}

{'0100': 3.0, '0011': 6.0, '1110': 7.0}

{'0011': 4.5, '0110': 3.0, '1111': 6.3, '1010': 2.2}

{'0011': 7.0, '0111': 8.0, '1001': 1.0}

{'0100': 4.0, '0111': 12.0}

{'0100': 3.9, '0011': 2.1, '1010': 3.0, '1111': 7.0}

{'0100': 3.1, '1101': 12.9}

{'0110': 10.9, '0101': 4.0, '0011': 1.1}

{'0100': 3.0, '0111': 7.3, '1000': 3.7, '1101': 1.0, '1110': 1.0}

{'1111': 16.0}

{'0101': 9.0, '0110': 5.0, '0011': 2.0}

{'1111': 14.0, '1010': 2.0}

{'0101': 14.0, '1000': 2.0}

{'0000': 1.0, '1111': 11.0, '1010': 3.0, '0011': 1.0}

In [23]:
simulate([dataset2.loc['ZOX']])['actual_path']

['0000', '0010', '0011']