# Simulating a new dataset
[Reference](http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0122283&type=printable)

In [1]:
# Grab our code from before
include("simulator.jl");
include("switching.jl");

We first obtain our new landscapes from a CSV file.

In [2]:
barlow_raw = CSV.read("barlow.csv"; nullable=false)
barlow = DataFrame(Matrix(barlow_raw[2:end])') # transpose
names!(barlow.colindex, map(parse, barlow_raw[1]))
# the order of the genotypes needs to match our current code
barlow[:Sort] = [ "0000", "1000", "0100", "0010", "0001", "1100", "1010", "1001", 
                  "0110", "0101", "0011", "1110", "1101", "1011", "0111", "1111" ]
sort!(barlow, cols=:Sort)
delete!(barlow, :Sort)
barlow

Unnamed: 0,AMP,AM,CEC,CTX,ZOX,CXM,CRO,AMC,CAZ,CTT,SAM,CPR,CPD,TZP,FEP
1,1.851,1.778,2.258,0.16,0.993,1.748,1.092,1.435,2.134,2.125,1.879,1.743,0.595,2.679,2.59
2,2.082,1.782,1.996,0.085,0.805,1.7,0.287,1.573,2.656,1.922,2.533,1.662,0.245,2.906,2.572
3,1.948,2.042,2.151,1.936,2.069,2.07,2.554,1.061,2.618,2.804,0.133,1.763,2.604,2.427,2.393
4,2.434,1.752,2.648,2.348,2.683,1.938,3.042,1.457,2.688,0.588,0.094,1.785,3.043,0.141,2.832
5,2.024,1.448,2.396,1.653,1.698,2.94,2.88,1.672,2.042,3.291,2.456,2.018,1.761,3.038,2.44
6,2.198,1.544,1.846,0.138,2.01,2.173,0.656,1.625,2.756,2.888,2.437,2.05,1.471,3.309,2.808
7,2.033,1.184,2.23,2.295,2.138,2.918,2.732,0.073,2.924,3.082,0.083,2.042,2.91,2.528,2.652
8,0.034,0.063,0.214,2.269,2.688,3.272,0.436,0.068,0.251,3.508,0.094,0.218,3.096,0.143,0.611
9,1.57,1.72,0.234,0.185,1.106,0.423,0.83,1.417,0.288,3.238,2.198,1.553,0.432,2.709,2.067
10,2.165,2.008,0.172,0.14,1.171,1.578,0.54,1.351,0.576,2.966,2.57,0.256,0.388,2.5,2.446


## Analysis of landscapes

In [3]:
# returns the global optimum and an array of local optima (by their indices)
function find_optima(landscape)
    global_optimum = indmax(landscape)
    local_optima = Int64[]
    for i = 1:num_genotypes
        neighbor_growth_rates = [landscape[j] for j in mutational_neighbors[i,:]]
        if landscape[i] > maximum(neighbor_growth_rates) && i != global_optimum
            push!(local_optima, i)
        end
    end
    #return (map(i -> genotypes[i], local_optima))
    return global_optimum, local_optima
    #return typeof(indmax(local_optima))
end

# find all the optima for our current dataset
optima = colwise(find_optima, barlow)
optima = [i[1] for i in optima] # don't know why this is necessary but it is

# return user-readable string given an array of genotype indices
function indicestostring(indices)
    s = ""
    count = 1
    for i in indices
        s *= genotypes[i]
        if count != length(indices)
            s *= ", "
        end
        count += 1
    end
    return s
end

# start our dataframe
df = DataFrame()
df[:Drug] = barlow_raw[1]
df[:LocalOptima] = [indicestostring(i[2]) for i in optima]
df[:GlobalOptimum] = [genotypes[i[1]] for i in optima]

# collect more data for our dataframe by running simulations
dominantgenotypes = String[]
timetofix = Int64[]
isstuck = String[]
for col in eachcol(barlow)
    ct, trace, trajectory = run_simulation(col[2], "0000")
    maxgenotype = findmax(trace[1200,:])
    if maxgenotype[1] > 0.5 * carrying_capacity
        dominantgenotype = genotypes[maxgenotype[2]]
    else
        dominantgenotype = ""
    end
    push!(dominantgenotypes, dominantgenotype)
    push!(timetofix, ct[3])
    if dominantgenotype != genotypes[indmax(col[2])]
        push!(isstuck, "Yes")
    else
        push!(isstuck, "No")
    end
end
df[:DominantGenotype] = dominantgenotypes
df[:TimeToFixation] = timetofix
df[:Stuck] = isstuck

sorteddf = sort(df, cols=:GlobalOptimum, rev=true)

Unnamed: 0,Drug,LocalOptima,GlobalOptimum,DominantGenotype,TimeToFixation,Stuck
1,AMP,"0011, 0110",1111,11,0,Yes
2,CTX,"0011, 0110, 1010",1111,11,0,Yes
3,CRO,"0011, 0100, 1010",1111,100,0,Yes
4,SAM,,1111,1111,447,No
5,CPD,1010,1111,1111,420,No
6,FEP,"0000, 0011, 1010",1111,0,0,Yes
7,AM,0010,1101,10,0,Yes
8,AMC,0100,1101,100,0,Yes
9,ZOX,1001,111,11,0,Yes
10,CXM,0100,111,100,0,Yes


## CTX and SAM
CTX alone:

In [4]:
criticaltimes, trace, trajectory = run_simulation(barlow[:CTX], "0000")
plot_abundance(trace, criticaltimes, trajectory)

SAM alone:

In [5]:
criticaltimes, trace, trajectory = run_simulation(barlow[:SAM], "0000")
plot_abundance(trace, criticaltimes, trajectory, vlines=1)

CTX followed by SAM:

In [6]:
landscape_set = [(barlow[:CTX], 60), (barlow[:SAM], 1140)]
criticaltimes, trace = run_simulation_specific(landscape_set, "0000", 1200)
plot_abundance(trace, criticaltimes, ["0000", "0001", "0010", "0100", "0011", "1011", "1101", "1111"], vlines=1) #0100

## ZOX and CXM
ZOX alone:

In [7]:
criticaltimes, trace, trajectory = run_simulation(barlow[:ZOX], "0000")
plot_abundance(trace, criticaltimes, ["0000", "0010", "0011", "0111"])

CXM alone:

In [8]:
criticaltimes, trace, trajectory = run_simulation(barlow[:CXM], "0000")
plot_abundance(trace, criticaltimes, ["0000", "0001", "0010", "0100", "1100", "0110", "0101"])

ZOX followed by CXM:

In [9]:
landscape_set = [(barlow[:ZOX],100),(barlow[:CXM],1100)]
criticaltimes, trace = run_simulation_specific(landscape_set, "0000", 1200)
plot_abundance(trace, criticaltimes, ["0000", "0010", "0011", "0111", "1011", "0100"], vlines=1)

## AM and AMC
AM alone:

In [10]:
criticaltimes, trace, trajectory = run_simulation(barlow[:AM], "0000")
plot_abundance(trace, criticaltimes)

AMC alone:

In [11]:
criticaltimes, trace, trajectory = run_simulation(barlow[:AMC], "0000")
plot_abundance(trace, criticaltimes)

If we start with AMC and switch to AM, we do indeed reach the global optimum:

In [12]:
landscape_set = [(barlow[:AMC],200),(barlow[:AM],1000)]
criticaltimes, trace = run_simulation_specific(landscape_set, "0000", 1200)
plot_abundance(trace, criticaltimes, ["0000", "0010", "0100", "1001", "1100", "0101", "1101"])

Although not always... Sometimes the population still gets stuck on 0010.

In [15]:
landscape_set = [(barlow[:AMC],200),(barlow[:AM],1000)]
criticaltimes, trace = run_simulation_specific(landscape_set, "0000", 1200)
plot_abundance(trace, criticaltimes, ["0000", "0010", "0100", "1001", "1100", "0101", "1101"])

The following code runs a simulation 100 times and outputs the numper of times the global optimum appears.

In [16]:
landscape_set = [(barlow[:AMC],200),(barlow[:AM],1100)]
manysims = [run_simulation_specific(landscape_set, "0000", 1200)[1][3] for i = 1:100]
length(find(manysims))

64

We can reach the global optimum 100% of the time by switching back and forth between AMC and AM. In the following simulation, the population is exposed to AMC for 200 timesteps, then AM for 100 timesteps, then AMC for 100 timesteps, and then AM for the remainder of the simulation.

In [17]:
landscape_set = [(barlow[:AMC],200),(barlow[:AM],100),(barlow[:AMC],100),(barlow[:AM],1000)]
criticaltimes, trace = run_simulation_specific(landscape_set, "0000", 1200)
plot_abundance(trace, criticaltimes, ["0000", "0010", "0100", "0001", "1001", "1100", "0101", "1101", "1011", "1000"])

Indeed, if we run the simulation 100 times, the global optimum appears 100 times.

In [18]:
landscape_set = [(barlow[:AMC],200),(barlow[:AM],100),(barlow[:AMC],100),(barlow[:AM],1000)]
manysims = [run_simulation_specific(landscape_set, "0000", 1200)[1][3] for i = 1:100]
length(find(manysims))

100