In [1]:
"""
A 1000 run of the gillespied function, with constant inputs 
"""

"A 1000 run of the gillespied function, with constant inputs \n"

In [2]:
using Distributed

In [3]:
length(Sys.cpu_info())
addprocs(3) ;

In [4]:
@everywhere using Random, Distributions, Plots, DelimitedFiles

In [5]:
@everywhere struct SPN
    init::Vector{Real}
    k::Vector{Real}
    S::Array{Real}
    Tmax::Real
    dt::Real
    function SPN(init, k, S, Tmax, dt)
        new(init, k, S, Tmax, dt)
    end
end

In [6]:
@everywhere init(N::SPN) = Float64.(N.init)
@everywhere k(N::SPN) = Float64.(N.k)
@everywhere S(N::SPN) = Float64.(N.S)
@everywhere Tmax(N::SPN) = Float64.(N.Tmax)
@everywhere dt(N::SPN) = Float64.(N.dt)
@everywhere n(N::SPN) = trunc(Int, N.Tmax/N.dt)

In [7]:
@everywhere function hazard(x::Vector{Float64}, th::Vector{Float64}, error::Float64)::Vector{Float64}
    k = th[1:5]
    Kc = th[6:7]
    if error>=0
        # deining the k1, & k2 and then subbing them into a vecotr is WAY WAY quicker
        # compared to calcing them in the vector
        k1 = k[1]+error*Kc[1]
        k2 = k[2]+error*Kc[1]
        return [x[1], x[2], x[1], x[2], x[1]].*[k1, k2, k[3], k[4], k[5]]
    else 
        k1 = 2*k[1]/(1+exp(-error*Kc[2]))
        k2 = 2*k[2]/(1+exp(-error*Kc[2]))
        return [x[1], x[2], x[1], x[2], x[1]].*[k1, k2, k[3], k[4], k[5]]
    end
end

In [8]:
@everywhere function hazzy(x::Vector{Float64}, th::Vector{Float64})::Vector{Float64}
    return [x[1], x[2], x[1], x[2], x[1]].*th[1:5]
end

In [9]:
@everywhere const post = [[2,0,0,0,1] [0,2,0,0,1]]
@everywhere const pre = [[1,0,1,0,1] [0,1,0,1,0]]
@everywhere const SS = post - pre
@everywhere const kk = [3.06e-8, 3.06e-8, 3.06e-8, 3.06e-8, 0.0, 8.99e-9, 2e-3];

In [10]:
@everywhere function gen_inits(μ::Real, σ::Real, α::Real, β::Real)::Vector{Float64}
    CC = rand(Normal(μ, σ))
    hh = rand(Beta(α, β))
    return round.( [CC*(1-hh), CC*hh] )
end

"""
When specifying the parameters as ::Float64 the function is consistently a slower
...weird but okay.
"""

"When specifying the parameters as ::Float64 the function is consistently a slower\n...weird but okay.\n"

In [11]:
@everywhere function gillespied(N)
    c = k(N)
    x = init(N)
    δt = dt(N)
    nn = n(N)
    SS = S(N)
    tt = 0.0
    xmat = fill(-1, (2, nn))
    i = 1
    target = 0.0
    C0 = sum(x)
    while i <= nn
        error = C0 - sum(x)
        h = hazzy(x, c)
        h0 = sum(h)
        if h0<1e-10
            xmat[:,i:nn] = fill(0.0, (2,nn-i+1))
            return xmat'
        else
            Exp = Exponential(1/h0)
            tt = tt + rand(Exp)
        end
        while tt>=target && i<=nn
            xmat[:,i] = x
            i += 1
            target += δt
        end
        Cat = Categorical(h/h0)
        r = rand(Cat)
        x += SS'[:,r]
    end
    return xmat'
end

In [12]:
@everywhere function replace_nan(x)
    """
    replaces NaN's caused by 0/0 in mutation load calculation
    """
    for i=eachindex(x)
        x[i] = isnan(x[i]) ? 0.0 : x[i]
    end
end

In [13]:
@everywhere function raw_to_summ(sims)::Array{Float64}
    """
    converts the species populations from the gillespie algorithm to 
    copy number and mutation load
    """
    Nsim = size(sims)[3] # no. of simulations
    n = size(sims)[1] # length of one simulation
    out = Array{Float64}(undef, n,2,Nsim)
    for i=1:Nsim
        copy_num = sum(sims[:,:,i], dims=2)
        mut_load = sims[:,2,i]./copy_num
        replace_nan(copy_num)
        replace_nan(mut_load)
        out[:,:,i] = hcat(copy_num, mut_load)
    end
    out
end

In [14]:
@everywhere function quantiles(sims, p)
    """
    returns quantile summaries from simulations
    """
    Nsim = size(sims)[3] # Nsim: number of simulations
    n = size(sims)[1] # length of one simulation
    out = Array{Float64}(undef, n,length(p),2)
    for t=1:n
        out[t,:,1] = quantile([sims[t,1,i] for i=1:Nsim], p)
        out[t,:,2] = quantile([sims[t,2,i] for i=1:Nsim], p)
    end
    out
end

In [15]:
@everywhere Nsim = 10000
@everywhere Tsim = 80*365*24*3600
@everywhere δt = 24*3600 ; 

In [16]:
Ntest = SPN([100,100], kk, SS, Tsim, δt)
@time gillespied(Ntest);
"""
a single run takes 0.548 seconds
"""

  0.582585 seconds (2.91 M allocations: 172.890 MiB, 7.48% gc time, 91.32% compilation time)


"a single run takes 0.548 seconds\n"

In [17]:
"""
simulations_single = Array{Float64}(undef, n(Ntest), 2, Nsim)
@time for i=1:Nsim
    simulations_single[:,:,i] = gillespied(Ntest)
end
This is a lot slower (doubly slow) than the previous version without the SPN structure
For 1000 simulations 26.536 seconds
"""

"simulations_single = Array{Float64}(undef, n(Ntest), 2, Nsim)\n@time for i=1:Nsim\n    simulations_single[:,:,i] = gillespied(Ntest)\nend\nThis is a lot slower (doubly slow) than the previous version without the SPN structure\nFor 1000 simulations 26.536 seconds\n"

In [18]:
#summ_single = raw_to_summ(simulations_single);
#qnts_single = quantiles(summ_single, [0.025,0.1,0.5,0.9,0.975]) ;

In [19]:
# The arguments are: 1) a function 'f' and 2) a list with the input.
@everywhere function simulation_map(f, lst)
    np = nworkers()            # Number of processes available.
    Nsim  = length(lst)  # Number of elements to apply the function.
    nn = n(lst[1]) # dimension for output
    output = Array{Any}(undef, nn,2,Nsim) # Where we will write the results. As we do not know
                             # the type (Integer, Tuple...) we write "Any"
    i = 1
    nextidx() = (idx = i; i += 1; idx) # Function to know which is the next work item.
                                       # In this case it is just an index.
    @sync begin #@sync: must complete all jobs in block
        for p = 1:np # loops through all processes (workers)
            if p != myid() || np == 1 # first worker used only if all others are busy 
                @async begin # launch several tasks simultaneaously
                    while true
                        idx = nextidx()
                        if idx > Nsim
                            break
                        end
                        output[:,:,idx] = fetch(remotecall(f, p, lst[idx]))
                    end
                end
            end
        end
    end
    output
end

In [20]:
N_lst = [ Ntest for i=1:Nsim ] ;

In [21]:
@time raw_simulations = simulation_map(gillespied, N_lst) ;
"""
seems to be about as quick as the gillespied algorithm was before the SPN structure
1000 simulations takes ~15 seconds (13.592 s)
10000 simulations: ~170 seconds
"""

169.864269 seconds (4.73 M allocations: 8.840 GiB, 22.07% gc time, 0.09% compilation time)


"seems to be about as quick as the gillespied algorithm was before the SPN structure\n1000 simulations takes ~15 seconds (13.592 s)\n"

In [22]:
simulations = raw_to_summ(raw_simulations);
sims_qntl = quantiles(simulations, [0.025,0.25,0.5,0.75,0.975]) ;

In [23]:
writedlm("Simulations/CN_qnt_gill_jl.txt", sims_qntl[:,:,1])
writedlm("Simulations/ML_qnt_gill_jl.txt", sims_qntl[:,:,2])

In [24]:
function distributions_t(sims, t, Tsim, δt)
    t_tot = [δt:δt:Tsim;]
    Nsim = size(sims)[3]
    nt = length(t)
    sim_t = Array{Float64}(undef, Nsim,nt,2)
    for i=1:Nsim
        for j=1:nt
            sim_t[i,j,1] = sims[findall(x->x==t[j], t_tot),1,i][1]
            sim_t[i,j,2] = sims[findall(x->x==t[j], t_tot),2,i][1]
        end
    end
    sim_t
end


distributions_t (generic function with 1 method)

In [25]:
dist_sims = distributions_t(simulations, [10:10:80;]*365*24*3600, Tsim, δt) ; 

In [26]:
writedlm("Simulations/CN_ts_gill_jl.txt", dist_sims[:,:,1])
writedlm("Simulations/ML_ts_gill_jl.txt", dist_sims[:,:,2])