# Running many simulations to reveal the effects of population size

In [1]:
import pandas as pd
import numpy as np
import math
import os

from bokeh.palettes import d3

from IPython.display import display, display_html, display_markdown, clear_output

from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.io as pio

from simulator import *
from simulator_plotting import *

init_notebook_mode(connected=True)

In [2]:
def many_simulations(landscape, param={}, num=100):
    success_count = 0
    greedy_path = ''
    paths = {}
    T_f_sum = 0
    global_optimum = ''
    local_optima = []
    for i in range(num):
        results = simulate(landscape, **param)
        if results['T_f'] != -1:
            success_count += 1
            T_f_sum += results['T_f']
        if not greedy_path:
            greedy_path = ','.join(results['greedy_path'])
            paths[greedy_path] = 0
        actual_path = ','.join(results['actual_path'])
        if actual_path in paths:
            paths[actual_path] += 1
        else:
            paths[actual_path] = 1
        if not global_optimum:
            global_optimum = results['global_optimum']
            local_optima = ', '.join(results['local_optima'])
    return {
        'Success rate': success_count / num,
        '# of paths': len(paths),
        'Path frequencies': paths.values(),
        'Greedy path': greedy_path,
        'Greedy rate': paths[greedy_path] / num,
        'Avg time to fixation': T_f_sum / num,
        'Global optimum': global_optimum,
        'Local optima': local_optima
    }

# ordering
column_names =  ['Success rate', '# of paths', 'Path frequencies', 'Greedy path', 
                 'Greedy rate', 'Local optima', 'Global optimum', 'Avg time to fixation']   

def many_landscapes(param={}, df=dataset2):
    data = []
    for name, ls in df.iterrows():
        landscape = ls.tolist()
        display('Running simulations on {}...'.format(name))
        row = many_simulations(landscape, param)
        row['Name'] = name
        data.append(row)
    clear_output()
    return pd.DataFrame(data).set_index('Name').reindex(column_names, axis='columns')

## Analysis of path frequencies for all drugs

### K=10^9

In [3]:
K1 = many_landscapes()
K1

Unnamed: 0_level_0,Success rate,# of paths,Path frequencies,Greedy path,Greedy rate,Local optima,Global optimum,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMP,0.0,1,(100),10011,1.0,"0011, 0110",1111,0.0
AM,0.0,1,(100),10,1.0,0010,1101,0.0
CEC,0.0,1,(100),100,1.0,"0100, 1110",11,0.0
CTX,0.0,1,(100),100011,1.0,"0011, 0110, 1010",1111,0.0
ZOX,0.0,2,"(0, 100)",1000110111,0.0,1001,111,0.0
CXM,0.0,1,(100),100,1.0,0100,111,0.0
CRO,0.0,3,"(65, 13, 22)",100,0.65,"0011, 0100, 1010",1111,0.0
AMC,0.0,1,(100),100,1.0,0100,1101,0.0
CAZ,1.0,2,"(0, 100)",10101,0.0,"0011, 0101",110,126.47
CTT,0.0,1,(100),100,1.0,"0100, 1000, 1101, 1110",111,0.0


### K=10^8

In [4]:
K2 = many_landscapes({'carrying_cap': int(1.0e8), 'prob_mutation': 1.0e-7})
K2

Unnamed: 0_level_0,Success rate,# of paths,Path frequencies,Greedy path,Greedy rate,Local optima,Global optimum,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMP,0.0,2,"(98, 2)",10011,0.98,"0011, 0110",1111,0.0
AM,0.0,1,(100),10,1.0,0010,1101,0.0
CEC,0.0,1,(100),100,1.0,"0100, 1110",11,0.0
CTX,0.0,1,(100),100011,1.0,"0011, 0110, 1010",1111,0.0
ZOX,0.0,2,"(0, 100)",1000110111,0.0,1001,111,0.0
CXM,0.0,1,(100),100,1.0,0100,111,0.0
CRO,0.0,3,"(52, 29, 19)",100,0.52,"0011, 0100, 1010",1111,0.0
AMC,0.0,1,(100),100,1.0,0100,1101,0.0
CAZ,1.0,2,"(0, 100)",10101,0.0,"0011, 0101",110,111.39
CTT,0.0,1,(100),100,1.0,"0100, 1000, 1101, 1110",111,0.0


### K=10^7

In [5]:
K3 = many_landscapes({'carrying_cap': int(1.0e7), 'prob_mutation': 1.0e-6})
K3

Unnamed: 0_level_0,Success rate,# of paths,Path frequencies,Greedy path,Greedy rate,Local optima,Global optimum,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMP,0.0,2,"(98, 2)",10011,0.98,"0011, 0110",1111,0.0
AM,0.0,1,(100),10,1.0,0010,1101,0.0
CEC,0.0,1,(100),100,1.0,"0100, 1110",11,0.0
CTX,0.0,1,(100),100011,1.0,"0011, 0110, 1010",1111,0.0
ZOX,0.0,2,"(0, 100)",1000110111,0.0,1001,111,0.0
CXM,0.02,3,"(98, 1, 1)",100,0.98,0100,111,13.61
CRO,0.0,3,"(30, 44, 26)",100,0.3,"0011, 0100, 1010",1111,0.0
AMC,0.01,2,"(99, 1)",100,0.99,0100,1101,10.14
CAZ,1.0,2,"(0, 100)",10101,0.0,"0011, 0101",110,96.12
CTT,0.0,1,(100),100,1.0,"0100, 1000, 1101, 1110",111,0.0


### K=10^6

In [6]:
K4 = many_landscapes({'carrying_cap': int(1.0e6), 'prob_mutation': 1.0e-5})
K4

Unnamed: 0_level_0,Success rate,# of paths,Path frequencies,Greedy path,Greedy rate,Local optima,Global optimum,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMP,0.0,2,"(91, 9)",10011,0.91,"0011, 0110",1111,0.0
AM,0.0,1,(100),10,1.0,0010,1101,0.0
CEC,0.0,3,"(93, 1, 6)",100,0.93,"0100, 1110",11,0.0
CTX,0.05,3,"(94, 4, 2)",100011,0.94,"0011, 0110, 1010",1111,44.34
ZOX,0.0,2,"(0, 100)",1000110111,0.0,1001,111,0.0
CXM,0.31,3,"(69, 30, 1)",100,0.69,0100,111,182.21
CRO,0.0,3,"(14, 68, 18)",100,0.14,"0011, 0100, 1010",1111,0.0
AMC,0.13,4,"(87, 10, 1, 2)",100,0.87,0100,1101,92.35
CAZ,1.0,2,"(0, 100)",10101,0.0,"0011, 0101",110,82.38
CTT,0.02,2,"(98, 2)",100,0.98,"0100, 1000, 1101, 1110",111,11.75


In [7]:
K5 = many_landscapes({'carrying_cap': int(1.0e5), 'prob_mutation': 1.0e-4})
K6 = many_landscapes({'carrying_cap': int(1.0e4), 'prob_mutation': 1.0e-3})
K7 = many_landscapes({'carrying_cap': int(1.0e3), 'prob_mutation': 1.0e-2})

In [9]:
K7

Unnamed: 0_level_0,Success rate,# of paths,Path frequencies,Greedy path,Greedy rate,Local optima,Global optimum,Avg time to fixation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMP,0.0,16,"(18, 16, 9, 2, 11, 4, 4, 2, 11, 6, 6, 3, 3, 3,...",10011,0.18,"0011, 0110",1111,0.0
AM,0.0,12,"(16, 27, 4, 4, 34, 1, 2, 1, 2, 6, 1, 2)",10,0.16,0010,1101,0.0
CEC,0.0,7,"(0, 3, 48, 37, 5, 4, 3)",100,0.0,"0100, 1110",11,0.0
CTX,0.0,6,"(0, 14, 45, 38, 1, 2)",100011,0.0,"0011, 0110, 1010",1111,0.0
ZOX,0.0,6,"(65, 24, 3, 4, 3, 1)",1000110111,0.65,1001,111,0.0
CXM,0.0,6,"(0, 18, 19, 57, 5, 1)",100,0.0,0100,111,0.0
CRO,0.0,9,"(0, 34, 25, 26, 3, 4, 3, 3, 2)",100,0.0,"0011, 0100, 1010",1111,0.0
AMC,0.0,7,"(0, 16, 4, 38, 34, 4, 4)",100,0.0,0100,1101,0.0
CAZ,0.0,3,"(0, 81, 19)",10101,0.0,"0011, 0101",110,0.0
CTT,0.0,5,"(0, 36, 8, 26, 30)",100,0.0,"0100, 1000, 1101, 1110",111,0.0


In [8]:
data = [go.Bar(
    x=df.index.tolist(),
    y=df['Avg time to fixation'].tolist()
) for df in [K1, K2, K3, K4]]
fig = tools.make_subplots(rows=2, cols=2, 
                          subplot_titles=('K=10<sup>9</sup>', 'K=10<sup>8</sup>', 
                                          'K=10<sup>7</sup>', 'K=10<sup>6</sup>'), 
                          print_grid=False)
for i, trace in enumerate(data, 1):
    fig.append_trace(trace, math.ceil(i / 2), abs((i % 2) - 2))
for n in range(1, 5):
    fig['layout']['yaxis{}'.format(n)].update(title='Avg. time to fixation')
fig['layout'].update(showlegend=False)
iplot(fig, show_link=False)
pio.write_image(fig, 'fig1.pdf', width=1056, height=816)

data = [go.Scatter(
    x=['10<sup>9</sup>', '10<sup>8</sup>',
       '10<sup>7</sup>', '10<sup>6</sup>',
       '10<sup>5</sup>', '10<sup>4</sup>',
       '10<sup>3</sup>'],
    y=[df['Success rate'].mean() for df in [K1, K2, K3, K4, K5, K6, K7]]
), go.Scatter(
    x=['10<sup>9</sup>', '10<sup>8</sup>',
       '10<sup>7</sup>', '10<sup>6</sup>',
       '10<sup>5</sup>', '10<sup>4</sup>',
       '10<sup>3</sup>'],
    y=[df['# of paths'].sum() for df in [K1, K2, K3, K4, K5, K6, K7]]
)]
fig = tools.make_subplots(rows=1, cols=2, print_grid=False)
fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
fig['layout']['xaxis1'].update(title='Carrying capacity')
fig['layout']['xaxis2'].update(title='Carrying capacity')
fig['layout']['yaxis1'].update(title='Avg. success rate')
fig['layout']['yaxis2'].update(title='Total paths exposed')
fig['layout'].update(showlegend=False)
iplot(fig, show_link=False)
pio.write_image(fig, 'fig2.pdf', width=1056, height=544)

## Analysis of path frequencies for switching between selected pairs of drugs

In [10]:
pairs = [('CTX', 'SAM'), ('ZOX', 'CXM'), ('AM', 'AMC'), ('CTT', 'CAZ'), ('FEP', 'CAZ')]

data = [
    {
    'Name': '{} + {}'.format(pair[0], pair[1]),
    'Landscape 1': dataset2.loc[pair[0]].tolist(),
    'Landscape 2': dataset2.loc[pair[1]].tolist()
    }
    for pair in pairs
]
pair_df = pd.DataFrame(data).set_index('Name')
frequencies = [200, 100, 50]

In [33]:
K1 = [many_landscapes({'frequency': f, 'carrying_cap': int(1.0e9), 'prob_mutation': 1.0e-8}, pair_df) for f in frequencies]
K2 = [many_landscapes({'frequency': f, 'carrying_cap': int(1.0e8), 'prob_mutation': 1.0e-7}, pair_df) for f in frequencies]
K3 = [many_landscapes({'frequency': f, 'carrying_cap': int(1.0e7), 'prob_mutation': 1.0e-6}, pair_df) for f in frequencies]
K4 = [many_landscapes({'frequency': f, 'carrying_cap': int(1.0e6), 'prob_mutation': 1.0e-5}, pair_df) for f in frequencies]

In [65]:
for K, dfset, letter in zip(['K=10<sup>9</sup>', 'K=10<sup>8</sup>','K=10<sup>7</sup>', 'K=10<sup>6</sup>'], [K1, K2, K3, K4], ['a', 'b', 'c', 'd']):
    data = [go.Bar(
        x=df.index.tolist(),
        y=df['Avg time to fixation'].tolist(),
        name=freq
    ) for df, freq in zip(dfset, frequencies)]

    layout = go.Layout(
        barmode='group',
        xaxis = dict(title='Drug pair'),
        yaxis = dict(title='Avg. time to fixation'),
        title = K,
        width = 600,
        height = 400
    )

    fig = go.Figure(data=data, layout=layout)
    iplot(fig, show_link=False)
    pio.write_image(fig, 'fig3{}.pdf'.format(letter))

In [66]:
data = [go.Scatter(
    x=['10<sup>9</sup>', '10<sup>8</sup>',
       '10<sup>7</sup>', '10<sup>6</sup>'],
    y=[df[i]['Success rate'].mean() for df in [K1, K2, K3, K4]],
    name=f,
) for i, f in enumerate(frequencies)]

data[2].update(visible='legendonly')

layout = go.Layout(
    xaxis = dict(title='Carrying capacity'),
    yaxis = dict(title='Avg. success rate')
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)
pio.write_image(fig, 'fig4.pdf')

In [67]:
data = []
for i, f in enumerate(frequencies):
    averages = []
    for df in [K1, K2, K3, K4]:
        times = [t for t in df[i]['Avg time to fixation'].tolist() if t != 0]
        averages.append(sum(times)/len(times))
    data.append(go.Scatter(
        x=['10<sup>9</sup>', '10<sup>8</sup>',
           '10<sup>7</sup>', '10<sup>6</sup>'],
        y=averages,
        name=f
    ))

layout = go.Layout(
    xaxis = dict(title='Carrying capacity'),
    yaxis = dict(title='Avg. time to fixation')
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)
pio.write_image(fig, 'fig5.pdf')