In [1]:
using PyCall

┌ Info: Precompiling PyCall [438e738f-606a-5dbb-bf0a-cddfbfd45ab0]
└ @ Base loading.jl:1273


In [2]:
gym = pyimport("gym")

PyObject <module 'gym' from '/home/d9w/.julia/conda/3/lib/python3.7/site-packages/gym/__init__.py'>

In [3]:
cartpole = gym.make("CartPole-v1")

PyObject <TimeLimit<CartPoleEnv<CartPole-v1>>>

In [4]:
state = cartpole.reset()
action = cartpole.action_space.sample()
next_state, reward, done, _ = cartpole.step(action)

([-0.047999404970835186, 0.19569221823410166, -0.018208187493774175, -0.2824486691725574], 1.0, false, Dict{Any,Any}())

In [5]:
typeof(state), typeof(action), typeof(reward), typeof(next_state)

(Array{Float64,1}, Int64, Float64, Array{Float64,1})

In [6]:
replay_buffer_size = Int(1e6)
nb_samples = Int(2e6)
nb_batches = Int(1e4)
batch_size = 50

50

In [7]:
abstract type Buffer end
abstract type Transition end

In [8]:
struct GoodTransition <: Transition
    state::Array{Float64}
    action::Int64
    reward::Float64
    next_state::Array{Float64}
end

In [9]:
struct BadTransition <: Transition
    state
    action
    reward
    next_state
end

In [10]:
t = GoodTransition(state, action, reward, next_state)
tbad = BadTransition(state, action, reward, next_state)

BadTransition([-0.048005595216497256, 0.0003095122831035524, -0.01852863128277015, 0.016022189449798746], 1, 1.0, [-0.047999404970835186, 0.19569221823410166, -0.018208187493774175, -0.2824486691725574])

In [11]:
tqdm = pyimport("tqdm")

function test_insertion_tqdm(buffer::Buffer, nb_samples::Int, transition::Transition)
    state = cartpole.reset()
    for _ in tqdm.trange(nb_samples)
        append(buffer, transition)
    end
end

function test_sampling_tqdm(buffer::Buffer, nb_batches::Int)
    for _ in tqdm.trange(nb_batches)
        sample(buffer, batch_size)
    end
end

test_sampling_tqdm (generic function with 1 method)

In [12]:
function test_insertion_timev(buffer::Buffer, nb_samples::Int, transition::Transition)
    println("Insertion of ", nb_samples, " samples:")
    @timev for i in 1:nb_samples
        append(buffer, transition)
    end
end

function test_sampling_timev(buffer::Buffer, nb_batches::Int)
    println("Sampling of ", nb_batches, " batches:")
    @timev for i in 1:nb_batches
        sample(buffer, batch_size)
    end
end  

test_sampling_timev (generic function with 1 method)

In [13]:
struct ReplayBuffer1 <: Buffer
    data::Vector{Transition}
    capacity::Int64
end

function ReplayBuffer1(capacity::Int64)
    ReplayBuffer1(Vector{Transition}(undef, 0), capacity)
end

function append(buffer::ReplayBuffer1, t::Transition)
    if length(buffer.data) == buffer.capacity
        popfirst!(buffer.data)
    end
    push!(buffer.data, t)
end

function sample(buffer::ReplayBuffer1, batch_size::Int)
    rand(buffer.data, batch_size)
end

sample (generic function with 1 method)

In [14]:
mutable struct ReplayBuffer2 <: Buffer
    data::Vector{Transition}
    capacity::Int64
    i::Int64
    filled::Bool
end

function ReplayBuffer2(capacity::Int64)
    ReplayBuffer2(Vector{Transition}(undef, capacity), capacity, 1, false)
end

function append(buffer::ReplayBuffer2, t::Transition)
    buffer.data[buffer.i] = t
    buffer.i += 1
    if buffer.i > buffer.capacity
        buffer.filled = true
        buffer.i = 1
    end
end

function sample(buffer::ReplayBuffer2, batch_size::Int)
    if buffer.filled
        println
        return rand(buffer.data, batch_size)
    else
        return rand(buffer.data[1:buffer.i-1], batch_size)
    end
end

sample (generic function with 2 methods)

In [15]:
using DataStructures
struct ReplayBuffer3 <: Buffer
    data::CircularDeque{Transition}
    capacity::Int64
end

function ReplayBuffer3(capacity::Int64)
    ReplayBuffer3(CircularDeque{Transition}(capacity), capacity)
end

function append(buffer::ReplayBuffer3, t::Transition)
    if length(buffer.data) == buffer.capacity
        popfirst!(buffer.data)
    end
    push!(buffer.data, t)
end

function sample(buffer::ReplayBuffer3, batch_size::Int)
    inds = rand(1:buffer.data.n, batch_size)
    [buffer.data[i] for i in inds]
end

sample (generic function with 3 methods)

In [16]:
memory = ReplayBuffer1(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, t)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.033650 seconds (1 allocation: 0 bytes)
elapsed time (ns): 33650101


  0%|                                              | 0/2000000 [00:00<?, ?it/s]  3%|▉                             | 60735/2000000 [00:00<00:03, 607347.68it/s]  7%|██                           | 140432/2000000 [00:00<00:02, 654030.24it/s] 12%|███▌                         | 241532/2000000 [00:00<00:02, 731515.70it/s] 15%|████▍                        | 307599/2000000 [00:00<00:02, 708715.48it/s] 20%|█████▊                       | 401406/2000000 [00:00<00:02, 764811.63it/s] 25%|███████▎                     | 502646/2000000 [00:00<00:01, 825365.30it/s] 29%|████████▌                    | 587778/2000000 [00:00<00:01, 796949.95it/s] 35%|██████████▏                  | 700311/2000000 [00:00<00:01, 873409.55it/s] 41%|███████████▊                 | 813838/2000000 [00:00<00:01, 938339.17it/s] 46%|█████████████▎               | 921106/2000000 [00:01<00:01, 974969.30it/s] 51%|██████████████▎             | 1020226/2000000 [00:01<00:01, 835982.30it/s] 56%|███████████████▊            | 1128

realloc() calls:   1
Sampling of 10000 batches:
  0.012889 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 12888662
bytes allocated:   4960000
pool allocs:       10000


In [17]:
# without tqdm
memory = ReplayBuffer1(replay_buffer_size)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.025682 seconds (20 allocations: 17.001 MiB)
elapsed time (ns): 25681733
bytes allocated:   17826496
pool allocs:       6
non-pool GC allocs:9
malloc() calls:    1
realloc() calls:   4
Sampling of 10000 batches:
  0.014975 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 14975079
bytes allocated:   4960000
pool allocs:       10000
Insertion of 2000000 samples:
  0.032196 seconds (1 allocation: 16.000 MiB)
elapsed time (ns): 32195624
bytes allocated:   16777216
realloc() calls:   1
Sampling of 10000 batches:
  0.012500 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 12499507
bytes allocated:   4960000
pool allocs:       10000


In [18]:
# with untyped transitions
memory = ReplayBuffer1(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, tbad)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, tbad)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.037837 seconds (1 allocation: 0 bytes)
elapsed time (ns): 37836858
realloc() calls:   1
Sampling of 10000 batches:
  0.015089 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 15088945
bytes allocated:   4960000
pool allocs:       10000



  0%|                                              | 0/2000000 [00:00<?, ?it/s]  5%|█▍                            | 99036/2000000 [00:00<00:01, 990356.22it/s] 10%|██▊                          | 196522/2000000 [00:00<00:01, 985650.46it/s] 14%|███▉                         | 271300/2000000 [00:00<00:01, 891773.40it/s] 19%|█████▍                       | 378640/2000000 [00:00<00:01, 939460.91it/s] 24%|███████                      | 484397/2000000 [00:00<00:01, 972025.97it/s] 30%|████████▋                    | 596932/2000000 [00:00<00:01, 914469.10it/s] 35%|██████████                   | 695369/2000000 [00:00<00:01, 934373.28it/s] 40%|███████████▋                 | 809836/2000000 [00:00<00:01, 988874.03it/s] 46%|████████████▉               | 925862/2000000 [00:00<00:01, 1034724.57it/s] 51%|██████████████▍             | 1027172/2000000 [00:01<00:01, 897265.81it/s] 57%|███████████████▉            | 1139845/2000000 [00:01<00:00, 955651.92it/s] 62%|█████████████████▍          | 124

In [19]:
memory = ReplayBuffer2(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, t)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.002682 seconds
elapsed time (ns): 2681918
Sampling of 10000 batches:
  0.012090 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 12089850
bytes allocated:   4960000
pool allocs:       10000



  0%|                                              | 0/2000000 [00:00<?, ?it/s]  5%|█▍                          | 103729/2000000 [00:00<00:01, 1037288.52it/s]  9%|██▌                          | 175673/2000000 [00:00<00:01, 915895.23it/s] 14%|████▏                        | 285855/2000000 [00:00<00:01, 964730.72it/s] 19%|█████▌                       | 385247/2000000 [00:00<00:01, 973304.80it/s] 25%|██████▉                     | 493332/2000000 [00:00<00:01, 1003251.65it/s] 29%|████████▍                    | 577701/2000000 [00:00<00:01, 930389.36it/s] 34%|█████████▉                   | 686759/2000000 [00:00<00:01, 973276.54it/s] 40%|███████████▏                | 803194/2000000 [00:00<00:01, 1023671.89it/s] 45%|█████████████                | 901660/2000000 [00:00<00:01, 921958.39it/s] 50%|██████████████              | 1007935/2000000 [00:01<00:01, 960116.49it/s] 56%|███████████████▏           | 1124220/2000000 [00:01<00:00, 1013102.83it/s] 61%|█████████████████▏          | 122

In [20]:
# without tqdm
memory = ReplayBuffer2(replay_buffer_size)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.003688 seconds
elapsed time (ns): 3687722
Sampling of 10000 batches:
  0.014673 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 14672556
bytes allocated:   4960000
pool allocs:       10000
Insertion of 2000000 samples:
  0.003341 seconds
elapsed time (ns): 3341376
Sampling of 10000 batches:
  0.022213 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 22212982
bytes allocated:   4960000
pool allocs:       10000


In [21]:
# with untyped transitions
memory = ReplayBuffer2(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, tbad)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, tbad)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.003395 seconds
elapsed time (ns): 3395242
Sampling of 10000 batches:
  0.023782 seconds (10.00 k allocations: 4.730 MiB)
elapsed time (ns): 23781751
bytes allocated:   4960000
pool allocs:       10000



  0%|                                              | 0/2000000 [00:00<?, ?it/s]  5%|█▍                            | 99892/2000000 [00:00<00:01, 998918.57it/s] 10%|██▊                          | 193885/2000000 [00:00<00:01, 980446.06it/s] 15%|████▏                       | 301226/2000000 [00:00<00:01, 1006596.95it/s] 18%|█████▎                       | 367885/2000000 [00:00<00:01, 873006.42it/s] 24%|██████▉                      | 476205/2000000 [00:00<00:01, 926969.26it/s] 30%|████████▌                    | 591874/2000000 [00:00<00:01, 985694.30it/s] 35%|█████████▊                  | 697174/2000000 [00:00<00:01, 1004964.51it/s] 40%|███████████▍                 | 791833/2000000 [00:00<00:01, 927861.96it/s] 45%|█████████████▏               | 909298/2000000 [00:00<00:01, 990277.31it/s] 51%|█████████████▊             | 1021203/2000000 [00:01<00:00, 1025684.26it/s] 56%|███████████████▋            | 1123479/2000000 [00:01<00:00, 928925.17it/s] 62%|█████████████████▎          | 123

In [22]:
memory = ReplayBuffer3(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, t)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.011384 seconds
elapsed time (ns): 11383516
Sampling of 10000 batches:
  0.041396 seconds (40.00 k allocations: 9.918 MiB)
elapsed time (ns): 41395613
bytes allocated:   10400000
pool allocs:       40000



  0%|                                              | 0/2000000 [00:00<?, ?it/s]  5%|█▍                            | 95077/2000000 [00:00<00:02, 950764.11it/s] 10%|██▉                          | 201263/2000000 [00:00<00:01, 981572.50it/s] 15%|████▍                        | 304922/2000000 [00:00<00:01, 997450.77it/s] 19%|█████▍                       | 373574/2000000 [00:00<00:01, 878134.87it/s] 24%|██████▉                      | 482473/2000000 [00:00<00:01, 932287.02it/s] 30%|████████▋                    | 596944/2000000 [00:00<00:01, 987246.29it/s] 35%|██████████▏                  | 703155/2000000 [00:00<00:01, 914117.95it/s] 41%|███████████▉                 | 820244/2000000 [00:00<00:01, 978491.54it/s] 46%|█████████████▎               | 918046/2000000 [00:00<00:01, 978349.61it/s] 51%|██████████████▎             | 1022260/2000000 [00:01<00:00, 996649.64it/s] 56%|███████████████▋            | 1121003/2000000 [00:01<00:00, 887664.34it/s] 61%|█████████████████▏          | 122

In [23]:
# without tqdm
memory = ReplayBuffer3(replay_buffer_size)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)
test_insertion_timev(memory, nb_samples, t)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.014940 seconds
elapsed time (ns): 14940080
Sampling of 10000 batches:
  0.048817 seconds (40.00 k allocations: 9.918 MiB)
elapsed time (ns): 48817220
bytes allocated:   10400000
pool allocs:       40000
Insertion of 2000000 samples:
  0.010601 seconds
elapsed time (ns): 10600678
Sampling of 10000 batches:
  0.053972 seconds (40.00 k allocations: 9.918 MiB, 34.19% gc time)
elapsed time (ns): 53971604
gc time (ns):      18454133
bytes allocated:   10400000
pool allocs:       40000
GC pauses:         1


In [24]:
# with untyped transitions
memory = ReplayBuffer3(replay_buffer_size)
test_insertion_tqdm(memory, nb_samples, tbad)
test_sampling_tqdm(memory, nb_batches)
test_insertion_timev(memory, nb_samples, tbad)
test_sampling_timev(memory, nb_batches)

Insertion of 2000000 samples:
  0.007042 seconds
elapsed time (ns): 7041657
Sampling of 10000 batches:
  0.038978 seconds (40.00 k allocations: 9.918 MiB)
elapsed time (ns): 38977791
bytes allocated:   10400000
pool allocs:       40000



  0%|                                              | 0/2000000 [00:00<?, ?it/s]  4%|█                             | 71313/2000000 [00:00<00:02, 713128.98it/s]  7%|█▉                           | 133366/2000000 [00:00<00:02, 682571.49it/s] 10%|██▉                          | 200506/2000000 [00:00<00:02, 679179.44it/s] 14%|████                         | 281973/2000000 [00:00<00:02, 624734.81it/s] 17%|████▊                        | 332665/2000000 [00:00<00:02, 584011.82it/s] 20%|█████▋                       | 395511/2000000 [00:00<00:02, 596656.17it/s] 23%|██████▌                      | 454592/2000000 [00:00<00:02, 594889.52it/s] 26%|███████▌                     | 517902/2000000 [00:00<00:02, 605856.28it/s] 30%|████████▌                    | 592191/2000000 [00:00<00:02, 641346.18it/s] 33%|█████████▍                   | 654365/2000000 [00:01<00:02, 546141.84it/s] 36%|██████████▍                  | 721601/2000000 [00:01<00:02, 578733.64it/s] 39%|███████████▍                 | 78