In [27]:
using Markdown
using InteractiveUtils
using Random, Distributions, StatsBase, Parameters, DataFrames, Plots
using CSV, Tables
using Pkg

#physical constants
const kt_boltzmann = 0.001987204118 * (273.15 + 37)
const ΔΔG = Normal(1.0, 1.7)

#fixed parameters
const G = 10 # number_of_genes
const sim_length = 500 # number of generations 500
const F = -5.0 # initial_free_energy (of all proteins)

#variable parameters (some will be modified in the loop)
U = Poisson(5.0) # mutation_rate - 4.5
#L will be defined in the loop
N = 10000 # start_popsize
K = 10000 # carrying_capacity
R = 9 # fecundity - 4
const Rep = 1 #Replicate No.
const start_fitness = prod([1 / (1 + ℯ^(ΔG/kt_boltzmann)) for ΔG in fill(F, G)])

#mutable struct representing an individual bacteriophage in the simulation
mutable struct Virus
    μ_counts::Vector{Int64} #number of mutations that have occurred in each gene
    ΔG_list::Vector{Float64} #free energy (ΔG) for each gene
    fitness::Float64 #calculated fitness of the virus, affecting its reproductive success
    zombie::Bool #flag indicating whether the virus has become inviable (fitness <= 0)
end

#updates the fitness of a given virus based on the current free energy (ΔG) of its genes
function update_fitness!(virus::Virus)
    virus.fitness = prod([1 / (1 + ℯ^(ΔG/kt_boltzmann)) for ΔG in virus.ΔG_list])
end

#applies mutation to virus, derived from Poisson Distribution 'U'
function mutate!(virus::Virus, L) # L is now an argument
    number_of_mutations = only(rand(U, 1))
    ΔΔG_values = rand(ΔΔG, number_of_mutations)
    mutgene_coord = rand((1:G), number_of_mutations)
    for (index, gene_id) in enumerate(mutgene_coord)
        virus.μ_counts[gene_id] += 1
        virus.ΔG_list[gene_id] = virus.ΔG_list[gene_id] + ΔΔG_values[index]
        if rand() < L #Simplified random number generation
            virus.fitness = 0
        end
    end
    (virus.fitness > 0) && (update_fitness!(virus))
    if virus.fitness <= 0
        virus.zombie = true
    end
    return virus.fitness
end

#creates new offspring virus by copying parent + its mutations
function reproduce(parent::Virus, L)
    sprog = deepcopy(parent)
    mutate!(sprog, L)
    return sprog
end

#creates initial virus population
function initialize_population()
    initial_population = [Virus(zeros(Int, G), fill(F, G), start_fitness, false) for _ in 1:N]
    return initial_population
end

#used to implement fitness-proportional selection
function get_weights(populace)
    weights = [v.fitness for v in populace]
    return Weights(weights / sum(weights))
end

#round a floating-point number to an integer
function probabilistic_round(number)
    frac = abs(number - floor(number))
    if rand() < frac # Simplified random number generation
        return ceil(Int, number)
    end
    return floor(Int, number)
end

#creates empty DataFrame
function initialize_report()
    report = DataFrame(psiz = Int[], q1fit = Float64[], meanfit = Float64[],
        q2fit = Float64[], maxfit = Float64[], minfree = Float64[],
        meanfree = Float64[], maxfree = Float64[], minmut = Float64[],
        meanmut = Float64[], maxmut = Float64[])
    return report
end

#updates DatFrame
function report_update!(populace, report)
    push!(report,
        [length(populace), #psiz
        quantile([v.fitness for v in populace], 0.25), #q1fit
        mean([v.fitness for v in populace]), #meanfit
        median([v.fitness for v in populace]), #q2fit
        maximum([v.fitness for v in populace]), #maxfit
        mean([minimum(v.ΔG_list) for v in populace]), #minfree
        mean([mean(v.ΔG_list) for v in populace]), #meanfree
        mean([maximum(v.ΔG_list) for v in populace]), #maxfree
        minimum([sum(v.μ_counts) for v in populace]), #minmut
        mean([sum(v.μ_counts) for v in populace]), #meanmut
        maximum([sum(v.μ_counts) for v in populace]), #maxmut
        ])
end

#creates four plots
function plot_simulation(report)
    abscissa = range(1, size(report, 1))
    p1 = plot(abscissa, report.psiz, ylims = (0, maximum(report.psiz)),
        label = "pop size", linewidth = 3,
        title = "A")
    p2 = plot(abscissa, [report.q1fit, report.meanfit, report.q2fit, report.maxfit],
        label = ["Q1 fitness" "mean fitness" "median fitness" "max fitness"], linewidth = 3, title = "B")
    p3 = plot(abscissa, [report.minfree, report.meanfree, report.maxfree],
        label = ["min ΔG" "mean ΔG" "max ΔG"],
        linewidth = 3, title = "C")
    p4 = plot(abscissa, [report.minmut, report.meanmut, report.maxmut],
        label = ["min # μ count" "mean # μ" "max # μ"],
        linewidth = 3, title = "D")
    plot(p1, p2, p3, p4, titleloc = :left, titlefont = font(20), layout = (2, 2), size = (1000, 700))
end

#simulates one generation of the population. fitness pushed to next generation
function synchronized_generation(populace, L) # L is now an argument
    next_generation = []
    for parent in populace
        for r in 1:probabilistic_round(R * parent.fitness)
            child = reproduce(parent, L)
            (child.fitness > 0) && (push!(next_generation, child)) # could be a zombie test
        end
    end
    return next_generation
end

#runs main sim and collects data into DataFrame
function synchronized_simulation(L) # L is now an argument
    population = initialize_population()
    population_size = N
    report = initialize_report()
    SIM_DURATION = sim_length
    while SIM_DURATION > 0
        SIM_DURATION -= 1
        if SIM_DURATION % 20 == 0 # can change reporting frequency here
            println("generation:", sim_length - SIM_DURATION)
        end
        report_update!(population, report)
        population = synchronized_generation(population, L) # Pass L here
        population_size = length(population)

        if population_size > K
            population = sample(population, K, replace = false)
            population_size = K

        elseif population_size == 0
            SIM_DURATION = 0
        end
    end
    return report
end

#simulation run with the paramaters at the top 
function run_sim(seed::Int, output_dir::String, L::Float64, N::Int, U_poisson::Poisson, R::Int, K::Int) # L, N, U, R, K are now arguments
    Random.seed!(seed)

    variable1 = "L$(L)"
    variable2 = "N$(N)"
    variable3 = "U$(Int(mean(U_poisson)))" # Extract mean from Poisson
    variable4 = "K$(K)"
    variable5 = "R$(R)"
    seed_str = "$seed"

    csv_filename = joinpath(output_dir, "$(variable1)_$(variable2)_$seed_str.csv")
    png_filename = joinpath(output_dir, "$(variable1)_$(variable2)_$seed_str.png")

    synchronized_report = synchronized_simulation(L) # Pass L here

    #Add new columns for L, N, seed, U, R, K
    synchronized_report[!, :L] .= L
    synchronized_report[!, :N] .= N
    synchronized_report[!, :seed] .= seed
    synchronized_report[!, :U] .= Int(mean(U_poisson)) # Store the mean of Poisson U
    synchronized_report[!, :R] .= R
    synchronized_report[!, :K] .= K

    CSV.write(csv_filename, synchronized_report)
    plot_simulation(synchronized_report) # Plot after adding columns to report if they affect plot
    png(png_filename)

    #Combine to master CSV document based on N
    master_csv_dir = joinpath(base_dir, "master_csvs")
    mkpath(master_csv_dir)
    master_csv_filename = joinpath(master_csv_dir, "master_N$(N).csv")

    #Check if master CSV exists to determine if headers are needed
    if !isfile(master_csv_filename) || filesize(master_csv_filename) == 0
        CSV.write(master_csv_filename, synchronized_report, header=true)
    else
        CSV.write(master_csv_filename, synchronized_report, append=true, header=false)
    end

    return csv_filename, png_filename
end

base_dir = "C:\\Users\\jade-\\Desktop\\Simulation\\N=K=10000"
mkpath(base_dir)

mut_dir = joinpath(base_dir, "U$(Int(mean(U)))") # Use Int(mean(U)) for directory name
group_dir = joinpath(mut_dir, "R$(R)_N$(N)_K$(K)") # L is removed from here

num_runs = 5

#Main loop over L values
for L in 0.0:0.1:1.0
    println("Running simulations for L = $L")
    L_dir = joinpath(group_dir, "L$(L)") # Create a directory for each L
    mkpath(L_dir)

    for i in 1:num_runs
        seed = rand(Int)
        println("  Run $i, seed = $seed")
        run_dir = joinpath(L_dir, "run_$i")
        mkpath(run_dir)
        # Pass all relevant parameters to run_sim
        run_sim(seed, run_dir, L, N, U, R, K)
    end
    println("Simulations for L = $L complete.\n")
end

println("All simulation runs complete.")

Running simulations for L = 0.0
  Run 1, seed = 8863481429565871728
generation:20
generation:40
generation:60
generation:80
generation:100
generation:120
generation:140
generation:160
generation:180
generation:200
generation:220
generation:240
generation:260
generation:280
generation:300
generation:320
generation:340
generation:360
generation:380
generation:400
generation:420
generation:440
generation:460
generation:480
generation:500
  Run 2, seed = 4173060926133299913
generation:20
generation:40
generation:60
generation:80
generation:100
generation:120
generation:140
generation:160
generation:180
generation:200
generation:220
generation:240
generation:260
generation:280
generation:300
generation:320
generation:340
generation:360
generation:380
generation:400
generation:420
generation:440
generation:460
generation:480
generation:500
  Run 3, seed = 5268974514795451946
generation:20
generation:40
generation:60
generation:80
generation:100
generation:120
generation:140
generation:160
ge

  Run 4, seed = -7505417974138592022
  Run 5, seed = 3009881097975746351
Simulations for L = 0.9 complete.

Running simulations for L = 1.0
  Run 1, seed = -3340000596145790305
  Run 2, seed = 5098848500588579190
  Run 3, seed = -7091871157515044724
  Run 4, seed = 6120517164211091937
  Run 5, seed = 4331323525556270354
Simulations for L = 1.0 complete.

All simulation runs complete.
