# Benchmark of file formats

In [None]:
using DrWatson
@quickactivate "dare"

using DataFrames

using ReinforcementLearning
using IntervalSets
using LinearAlgebra
using ControlSystems
using CUDA
using Plots

include(srcdir("nodeconstructor.jl"))
include(srcdir("save_all_episodes_hook.jl"))

include(srcdir("env.jl"));

In [None]:
using Arrow
using CSV
using Serialization
using JLSO
using JSONTables
using CodecZlib
using ZipFile
using JDF
using StatsPlots # for charts
using Mmap # for compression

## Create Env data

In [None]:
CM = [0.0  1.0
 -1.0  0.0]

parameters = Dict()
source_list = []

source = Dict()

source["pwr"] = 45000.0
source["v_rip"] = 0.01556109320329396
source["vdc"] = 750
source["i_rip"] = 0.10108821490394984
source["fltr"] = "LCL"
source["R1"] = 0.4022094955070556
source["R2"] = 0.4022094955070556
source["R_C"] = 0.0006447094780419011
source["L1"] = 0.001005523738767639
source["L2"] = 0.001005523738767639
source["C"] = 2.302533850149647e-5;

push!(source_list, source);

load_list = []
load = Dict()

load["impedance"] = "RLC"
load["R"] = 30236.0;
load["L"] = 57.042;
load["C"] = 39.18;
push!(load_list, load);

cable_list = []

cable = Dict()
cable["R"] = 6.84059
cable["L"] = 0.00250127
cable["C"] = 3.7898e-6;
push!(cable_list, cable);

parameters["source"] = source_list
parameters["cable"] = cable_list
parameters["load"] = load_list;
parameters["grid"] = Dict("fs" => 10000.0, "phase" => 1, "v_rms" => 230);

In [None]:
nc = NodeConstructor(num_sources=1, num_loads=1, parameters=parameters, CM=CM);

In [None]:
A, B, C, D = get_sys(nc);

In [None]:
limits = Dict("i_lim" => 20, "v_lim" => 600);

In [None]:
states = get_states(nc)
norm_array = []
for state_name in states
    if startswith(state_name, "i")
        push!(norm_array, limits["i_lim"])
    elseif startswith(state_name, "u")
        push!(norm_array, limits["v_lim"])
    end
end

In [None]:
ns = length(A[1,:]);
na = length(B[1,:]);

In [None]:
# time step
ts = 1e-5
V_source = 300;

In [None]:
x0 = [ 0.0 for i = 1:length(A[1,:]) ]
Ad = exp(A*ts)
Bd = A \ (Ad - C) * B;

In [None]:
env = SimEnv(A=A, B=B, C=C, Ad=Ad, Bd=Bd, norm_array=norm_array, x0=x0, v_dc=V_source, ts=rationalize(ts), convert_state_to_cpu=true);

In [None]:
h = SaveAllEpisodes()

In [None]:
policy = RandomPolicy(action_space(env))

In [None]:
run(policy, env, StopAfterEpisode(10000), h)

In [None]:
h.df

### Write data

In [None]:
println("First run")
println("CSV.jl")
csvwrite1 = @elapsed @time CSV.write("bigdf1.csv", h.df)
println("Serialization")
serializewrite1 = @elapsed @time open(io -> serialize(io, h.df), "bigdf.bin", "w")
# println("JDF.jl")
# jdfwrite1 = @elapsed @time JDF.save("bigdf.jdf", h.df)
println("JLSO.jl")
jlsowrite1 = @elapsed @time JLSO.save("bigdf.jlso", :data => h.df)
println("Arrow.jl")
arrowwrite1 = @elapsed @time Arrow.write("bigdf.arrow", h.df)
println("JSONTables.jl arraytable")
jsontablesawrite1 = @elapsed @time open(io -> arraytable(io, h.df), "bigdf1.json", "w")
println("JSONTables.jl objecttable")
jsontablesowrite1 = @elapsed @time open(io -> objecttable(io, h.df), "bigdf2.json", "w")
println("Second run")
println("CSV.jl")
csvwrite2 = @elapsed @time CSV.write("bigdf1.csv", h.df)
println("Serialization")
serializewrite2 = @elapsed @time open(io -> serialize(io, h.df), "bigdf.bin", "w")
# println("JDF.jl")
# jdfwrite2 = @elapsed @time JDF.save("bigdf.jdf", h.df)
println("JLSO.jl")
jlsowrite2 = @elapsed @time JLSO.save("bigdf.jlso", :data => h.df)
println("Arrow.jl")
arrowwrite2 = @elapsed @time Arrow.write("bigdf.arrow", h.df)
println("JSONTables.jl arraytable")
jsontablesawrite2 = @elapsed @time open(io -> arraytable(io, h.df), "bigdf1.json", "w")
println("JSONTables.jl objecttable")
jsontablesowrite2 = @elapsed @time open(io -> objecttable(io, h.df), "bigdf2.json", "w")

In [None]:
groupedbar(
    # Exclude JSONTables.jl arraytable due to timing
    repeat(["CSV.jl", "Serialization", "JLSO.jl", "Arrow.jl", "JSONTables.jl\nobjecttable"],
            inner = 2),
    [csvwrite1, csvwrite2, serializewrite1, serializewrite1,
     jlsowrite1, jlsowrite2, arrowwrite1, arrowwrite2, jsontablesowrite2, jsontablesowrite2],
    group = repeat(["1st", "2nd"], outer = 5),
    ylab = "Second",
    title = "Write Performance\nDataFrame: bigdf\nSize: $(size(h.df))"
)

## Used storage

In [None]:
data_files = ["bigdf1.csv", "bigdf.bin", "bigdf.arrow", "bigdf1.json", "bigdf2.json"]
df = DataFrame(file = data_files, size = getfield.(stat.(data_files), :size))
# append!(df, DataFrame(file = "episode_data/bigdf.jdf", size=reduce((x,y)->x+y.size,
#                                                       stat.(joinpath.("bigdf.jdf", readdir("bigdf.jdf"))),
#                                                       init=0)))
sort!(df, :size)

In [None]:
@df df plot(:file, :size/1024^2, seriestype=:bar, title = "Format File Size (MB)", label="Size", ylab="MB")

## Read data

In [None]:
println("First run")
println("CSV.jl")
csvread1 = @elapsed @time CSV.read("bigdf1.csv", DataFrame)
println("Serialization")
serializeread1 = @elapsed @time open(deserialize, "bigdf.bin")
# println("JDF.jl")
# jdfread1 = @elapsed @time JDF.load("bigdf.jdf") |> DataFrame
println("JLSO.jl")
jlsoread1 = @elapsed @time JLSO.load("bigdf.jlso")
println("Arrow.jl")
arrowread1 = @elapsed @time df_tmp = Arrow.Table("bigdf.arrow") |> DataFrame
arrowread1copy = @elapsed @time copy(df_tmp)
println("JSONTables.jl arraytable")
jsontablesaread1 = @elapsed @time open(jsontable, "bigdf1.json")
println("JSONTables.jl objecttable")
jsontablesoread1 = @elapsed @time open(jsontable, "bigdf2.json")
println("Second run")
csvread2 = @elapsed @time CSV.read("bigdf1.csv", DataFrame)
println("Serialization")
serializeread2 = @elapsed @time open(deserialize, "bigdf.bin")
# println("JDF.jl")
# jdfread2 = @elapsed @time JDF.load("bigdf.jdf") |> DataFrame
println("JLSO.jl")
jlsoread2 = @elapsed @time JLSO.load("bigdf.jlso")
println("Arrow.jl")
arrowread2 = @elapsed @time df_tmp = Arrow.Table("bigdf.arrow") |> DataFrame
arrowread2copy = @elapsed @time copy(df_tmp)
println("JSONTables.jl arraytable")
jsontablesaread2 = @elapsed @time open(jsontable, "bigdf1.json")
println("JSONTables.jl objecttable")
jsontablesoread2 = @elapsed @time open(jsontable, "bigdf2.json");

In [None]:
# Exclude JSON\narraytable arraytable due to much longer timing
groupedbar(
    repeat(["CSV.jl", "Serialization", "JLSO.jl", "Arrow.jl", "Arrow.jl\ncopy", #"JSON\narraytable",
            "JSON\nobjecttable"], inner = 2),
    [csvread1, csvread2, serializeread1, serializeread2, jlsoread1, jlsoread2,
     arrowread1, arrowread2, arrowread1+arrowread1copy, arrowread2+arrowread2copy,
     # jsontablesaread1, jsontablesaread2,
     jsontablesoread1, jsontablesoread2],    
    group = repeat(["1st", "2nd"], outer = 6),
    ylab = "Second",
    title = "Read Performance\nDataFrame: bigdf\nSize: $(size(h.df))"
)