In [None]:
using CSV
using DataFrames
using PyPlot
using Statistics
using Printf

rc("font", family="sans-serif", size=16)

In [None]:
firstQuarter(array) = quantile(array, 0.25)
thirdQuarter(array) = quantile(array, 0.75)

overview_df = CSV.read("experiments_deep\\overview_timed.csv")
bellman_df = CSV.read("experiments_deep\\bellman_notable.csv")


function getBellman(dataset, consideredLength, remainingPoints)
    relevantEntries = filter(row -> row[:dataset] == dataset && row[:length] == consideredLength && row[:remaining_points] == remainingPoints, bellman_df)
    return relevantEntries[:mse][1]
end


function getBellmanTime(consideredLength, remainingPoints) # the run time of Bellman does not depend on the underlying data (same run time across data sets of same length)
    relevantEntries = filter(row -> row[:length] == consideredLength && row[:remaining_points] == remainingPoints, bellman_df)
    return mean(relevantEntries[:time])
end

function plotTimeUntilBellman(dataset)
    relevantEntries = filter(row -> row[:dataset] == dataset && row[:loop] && occursin("contains special", row[:description]), overview_df)
    sort!(relevantEntries, [:consideredLength])
    bellmanTimes = []
    consideredLengths = []
    absoluteTimes = []
    for i in 1:size(relevantEntries, 1)
        consideredLength = relevantEntries[:consideredLength][i]
        remainingPoints = relevantEntries[:remaining_points][i]
        bellman_mse = getBellman(dataset, consideredLength, remainingPoints)
        df = CSV.read("experiments_deep/"*relevantEntries[:name][i]*".csv")
        numberRuns = length(unique(df[:run_id]))
        df_successful = filter(row -> row[:best_valid_mse] <= bellman_mse, df)
        finish_df = by(df_successful, :run_id, :total_time => minimum)
        numberSuccessful = size(finish_df, 1)
        bellmanTime = getBellmanTime(consideredLength, remainingPoints)
        push!(bellmanTimes, bellmanTime)
        push!(consideredLengths, consideredLength/1000)
        push!(absoluteTimes, finish_df.total_time_minimum)
        @assert numberSuccessful == numberRuns
    end
    for i in 1:length(consideredLengths)
       println(consideredLengths[i], " ", mean(absoluteTimes[i])) 
    end
    w, h = plt[:figaspect](0.6)
    figure(figsize=(w,h))
    plot(consideredLengths, bellmanTimes, label="Bellman", linewidth=2, marker="o")
    plot(consideredLengths, median.(absoluteTimes), label="PLA-GA", linewidth=2, marker="o")
    #fill_between(consideredLengths, firstQuarter.(absoluteTimes), thirdQuarter.(absoluteTimes), alpha=0.3, color="orange")
    for i in 1:length(bellmanTimes)
        speedUp = bellmanTimes[i] / median(absoluteTimes[i])
        speedUpFormatted = @sprintf("%.3g", speedUp)
        if 4 < length(speedUpFormatted)
            speedUpFormatted = @sprintf("%.2f", speedUp)
        end
        if length(speedUpFormatted) < 3
            speedUpFormatted = "  " * speedUpFormatted
        end
        annotate(speedUpFormatted, (consideredLengths[i] -7, bellmanTimes[i]+100))
    end
    xlabel("n in thousands")
    ylabel("WCT in seconds")
    legend(loc="upper left")
    ylim(0)
end

In [None]:
plotSpecs = [("Ham", (0,93), (0, 13000)), ("Rock", (0,103), (0, 17000)), ("subject103_5", (0,103), (0, 17000)),
    ("subject103_6", (0,103), (0, 17000))
]
spec = 1
ds = plotSpecs[spec][1]
plotTimeUntilBellman(ds)
xlim(plotSpecs[spec][2])
ylim(plotSpecs[spec][3])
tight_layout()
savefig("Scaling $ds.pdf", bbox_inches="tight")