In [72]:
include("/Users/johnbrother/Documents/Codes/julia/RNN-RF/MCTS-RF_env.jl")

calc_score (generic function with 1 method)

In [3]:

using Distributions
using StatsBase

mutable struct Node
    visit_count::Int
    prior::Float32
    value_sum::Float32
    children::Dict{Int, Node}
    action::Int
end

function init_node(prior::Float32)
    return Node(0, prior, 0.0, Dict(), 0)
end

function has_children(node::Node)
    return length(node.children) > 0
end

function st_value(node::Node)
    if node.visit_count == 0
        return 0.0
    else
        return node.value_sum / node.visit_count
    end
end

st_value (generic function with 1 method)

In [4]:
#it stores sigle game history and child visit pi
mutable struct Agent
    #counts
    history::Vector{Int}
    branch_left::Vector{Int}
    child_visit_pi::Vector{Vector{Float32}}
end

function init_agt()
    return Agent([], [-1], [])
end

#finishの判定
function is_finish(env::Env, agt::Agent)
    #max_turnに達したか、branchがなくなって最後のstateがval_numに達したか
    return env.max_turn == length(agt.history) || (length(agt.branch_left) == 0 && agt.history[end]<=env.val_num)
end

#行動可能なactionのリストを返す
function legal_action(env::Env, agt::Agent)
    if(isempty(agt.history))
        return [i for i in 1:env.act_ind]
    elseif(agt.history[end]>env.val_num && agt.history[end]<=env.val_num+env.br_num)
        return [i for i in 1:env.act_ind if(i!=agt.history[end])]
    else
        return [i for i in 1:env.act_ind]
    end
end

#historyにactionを追加し、branch_leftを更新
function apply!(agt::Agent, act::Int)
    push!(agt.history, act)
    if act <= env.val_num
        pop!(agt.branch_left)
    end
end

#child_visit_piの計算
function store_search_statistics!(root::Node, agt::Agent)
    agt.child_visit_pi = zeros(Float32, env.act_ind)
    sum_visits = sum([child.visit_count for child in root.children])
    for child in root.children
        agt.child_visit_pi[child.action] = child.visit_count/sum_visits
    end
end

function make_image(env::Env, agt::Agent, turn::Int)
    input_data = zeros(Int, env.input_dim)
    for act_ind in 1:env.act_ind
        ind = findall(x->x==act_ind, agt.history[1:turn])
        for it in ind
            input_data[(act_ind-1)*env.max_turn+it] = 1
        end
    end
    return input_data
    #append!(copy(agt.history[1:turn]), zeros(Int, env.max_turn-turn))
end

function make_target(env::Env,agt::Agent, turn::Int)
    return [agt.child_visit_pi[turn]; calc_score(agt.history, env)]
end

function add_exploration_noise!(env::Env, node::Node)
    actions = Int.(keys(node.children))
    noise = Dirichlet(env.α * ones(Float32, length(actions)))
    for it in 1:length(actions)
        node.children[actions[it]].prior = node.children[actions[it]].prior * (1-env.frac) + noise[it] * env.frac
    end
end

function ucb_score(env::Env, parent::Node, child::Node)
    pb_c = log((parent.visit_count + env.Cb + 1) / env.Cb) + env.Ci
    pb_c *= sqrt(parent.visit_count) / (child.visit_count + 1)
    prior_score = pb_c * child.prior
    value_score = child.value_sum / (child.visit_count + 1)
    return prior_score + value_score
end

function select_child(env::Env, node::Node)
    actions = Int.(keys(node.children))
    children = [node.children[a] for a in actions]
    it = findmax([ucb_score(env, node, child) for child in children])
    return actions[it], children[it]
end

function backpropagate!(search_path::Vector{Node}, value::Float32)
    for node in search_path
        node.value_sum += value
        node.visit_count += 1
    end
end

function select_action(root::Node)
    actions = Int.(keys(root.children))
    visits = [child.visit_count for child in root.children]
    return actions[argmax(visits)]
end

select_action (generic function with 1 method)

In [5]:
function evaluate!(env::Env, agt::Agent,node::Node, model)
    @show Y = model(make_image(env, agt, length(agt.history)))
    @show value = Y[end] 
    @show pol_log = Y[1:end-1]
    @show A = legal_action(env, agt)
    @show policy = softmax(pol_log[a] for a in legal_action(env, agt))
    #
    #=
    for it in 1:length(A)
        node.children[A[it]] = init_node(policy[it])
    end=#
    return value
end

evaluate! (generic function with 1 method)

In [74]:
en = init_Env(["10", "2", "100", "0.3", "0.25", "100", "4", "10.0", "0.2", "1.0", "0.7", "0.5", "100", "0.1", "0.1"])

Env(10, 2, 2, 3, 1, 6, 60, 128, 7, 100, 10, 128, 0.0001f0, 0.9f0, Step{Float32, Base.Iterators.Repeated{Int64}}(0.2f0, 0.1f0, Base.Iterators.Repeated{Int64}(20)), 100, 0.3f0, 0.25f0, 100, 4, 10.0f0, 0.2f0, 1.0f0, 0.7f0, 0.5f0, ComplexF32[-2.0f0 + 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.7f0 + 0.0f0im; 0.0f0 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im 0.0f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.7f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im], ComplexF32[0.0f0 + 0.0f0im -0.2f0 + 0.0f0im -0.2f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.2f0 - 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.2f0 + 0.0f0im; -0.2f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im -0.2f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.2f0 - 0.0f0im -0.2f0 - 0.0f0im 0.0f0 + 0.0f0im], 0.0062831854f0, 100, 0.1f0, 0.1f0)

In [7]:
ag = init_agt()

Agent(Int64[], [-1], Vector{Float32}[])

In [8]:
root0 = init_node(Float32(0.0))

Node(0, 0.0f0, 0.0f0, Dict{Int64, Node}(), 0)

In [9]:
model = Chain(Dense(zeros(Float32, en.output,en.input_dim)))
#Chain(zeros(Float32, en.input_dim, en.output))

Chain(
  Dense(60 => 7),                       [90m# 427 parameters[39m[36m  (all zero)[39m
) 

In [16]:
function evaluate!(env::Env, agt::Agent,node::Node, model)
    Y = model(make_image(env, agt, length(agt.history)))
    value = Y[end] 
    pol_log = Y[1:end-1]
    A = legal_action(env, agt)
    policy = softmax([pol_log[a] for a in A])
    
    for it in 1:length(A)
        node.children[A[it]] = init_node(policy[it])
    end
    return value
end

evaluate! (generic function with 1 method)

In [17]:

value = evaluate!(en, ag, root0, model)

Y = model(make_image(env, agt, length(agt.history))) = Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
value = Y[end] = 0.0f0
pol_log = Y[1:end - 1] = Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
A = legal_action(env, agt) = [1, 2, 3, 4, 5, 6]
policy = softmax([pol_log[a] for a = legal_action(env, agt)]) = Float32[0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667]


0.0f0

In [21]:
function add_exploration_noise!(env::Env, node::Node)
    actions = Int.(keys(node.children))
    noise = rand(Dirichlet(env.α * ones(Float32, length(actions))))
    for it in 1:length(actions)
        node.children[actions[it]].prior = node.children[actions[it]].prior * (1-env.frac) + noise[it] * env.frac
    end
end

add_exploration_noise! (generic function with 1 method)

In [22]:
add_exploration_noise!(en, root0)

In [42]:
function ucb_score(env::Env, parent::Node, child::Node)
    pb_c = log((parent.visit_count + env.Cb + 1) / env.Cb) + env.Ci
    pb_c *= sqrt(parent.visit_count) / (child.visit_count + 1)
    prior_score = pb_c * child.prior
    value_score = child.value_sum / (child.visit_count + 1)
    return prior_score + value_score
end

function select_child(env::Env, node::Node)
    actions = Int.(keys(node.children))
    children = [node.children[a] for a in actions]
    score_v = [ucb_score(env, node, child) for child in children]
    it = rand(findall(x -> x==maximum(score_v), score_v))
    return actions[it], children[it]
end

select_child (generic function with 1 method)

In [43]:
action_test, nodes_test = select_child(en, root0)

actions = Int.(keys(node.children)) = [5, 4, 6, 2, 3, 1]
children = [node.children[a] for a = actions] = Node[Node(0, 0.12507942f0, 0.0f0, Dict{Int64, Node}(), 0), Node(0, 0.21653697f0, 0.0f0, Dict{Int64, Node}(), 0), Node(0, 0.2223872f0, 0.0f0, Dict{Int64, Node}(), 0), Node(0, 0.12534882f0, 0.0f0, Dict{Int64, Node}(), 0), Node(0, 0.18564755f0, 0.0f0, Dict{Int64, Node}(), 0), Node(0, 0.12500003f0, 0.0f0, Dict{Int64, Node}(), 0)]
score_v = [ucb_score(env, node, child) for child = children] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
it = rand(findall((x->begin
                    #= /Users/johnbrother/Documents/Codes/julia/RNN-RF/test_batch.ipynb:13 =#
                    x == maximum(score_v)
                end), score_v)) = 1


(5, Node(0, 0.12507942f0, 0.0f0, Dict{Int64, Node}(), 0))

In [49]:
using ParameterSchedulers

In [76]:
model = Chain(Dense(en.input_dim=>en.middle_dim, relu), BatchNorm(en.middle_dim), Dense(en.middle_dim=>en.middle_dim, relu), BatchNorm(en.middle_dim), Dense(en.middle_dim=>en.output))

Chain(
  Dense(60 => 128, relu),               [90m# 7_808 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 128, relu),              [90m# 16_512 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 7),                      [90m# 903 parameters[39m
) [90m        # Total: 10 trainable arrays, [39m25_735 parameters,
[90m          # plus 4 non-trainable, 512 parameters, summarysize [39m103.355 KiB.

In [77]:
opt = ParameterSchedulers.Scheduler(en.scheduler, Momentum())

Scheduler(Step{Float32, Base.Iterators.Repeated{Int64}}(0.2f0, 0.1f0, Base.Iterators.Repeated{Int64}(20)), Momentum(0.01, 0.9, IdDict{Any, Any}()))

In [78]:
input_data = rand(Int, en.input_dim, 10)
target_data = rand(Float32, en.output, 10)

7×10 Matrix{Float32}:
 0.580108  0.861642   0.168259  0.851203  …  0.475472  0.117645  0.147819
 0.729251  0.0405666  0.394469  0.685885     0.966118  0.178081  0.80836
 0.761326  0.0225155  0.368836  0.534373     0.518768  0.892446  0.530313
 0.366759  0.109585   0.554901  0.457887     0.538702  0.278796  0.46016
 0.941067  0.110541   0.834772  0.676243     0.930275  0.847453  0.420174
 0.446698  0.269922   0.200423  0.572285  …  0.584625  0.86627   0.538078
 0.95214   0.907361   0.201611  0.631629     0.498009  0.496413  0.679903

In [98]:
function loss(x,y)
    y1 = model(x)
    return sum([((y1[end,i]-y[end,i])^2 - (y1[1:end-1, i])' * log.(y[1:end-1, i])) for i in 1:10])/10 + (1f-4)*sum(Flux.params(model)[1].^2)
end

loss (generic function with 1 method)

In [90]:
params_squared = reshape(Flux.params(model), (1, length(Flux.params(model)))) .^ 2

MethodError: MethodError: no method matching reshape(::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}, ::Tuple{Int64, Int64})

Closest candidates are:
  reshape(!Matched::StructArrays.StructArray{T}, ::Tuple{Vararg{Int64, N}} where N) where T
   @ StructArrays ~/.julia/packages/StructArrays/dNQpc/src/structarray.jl:476
  reshape(!Matched::StructArrays.StructArray{T}, ::Tuple{Vararg{Union{Colon, Int64}}}) where T
   @ StructArrays ~/.julia/packages/StructArrays/dNQpc/src/structarray.jl:476
  reshape(!Matched::StructArrays.StructArray{T}, ::Tuple{Vararg{Union{Colon, Integer}}}) where T
   @ StructArrays ~/.julia/packages/StructArrays/dNQpc/src/structarray.jl:476
  ...


In [97]:
sum(Flux.params(model)[1].^2)

127.82771f0

In [99]:
Flux.train!(loss, Flux.params(model), [(input_data, target_data)], opt)

In [40]:
x = [1]
rand(x)

1

In [41]:
rand(x, 20)

20-element Vector{Int64}:
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1

In [4]:
x = [(i, rand(1:8)) for i in 1:8]

8-element Vector{Tuple{Int64, Int64}}:
 (1, 4)
 (2, 2)
 (3, 3)
 (4, 7)
 (5, 6)
 (6, 7)
 (7, 6)
 (8, 2)

In [5]:
[ i*j for (i,j) in x]

8-element Vector{Int64}:
  4
  4
  9
 28
 30
 42
 42
 16

In [8]:
a = [1, 3, 5, 7, 9, 3, 5, 7]
findall(x->x==3, a)

2-element Vector{Int64}:
 2
 6

In [58]:
using Flux

In [19]:
test_batch = [ (rand(Float32,2), rand(Float32,1)) for i in 1:3]

3-element Vector{Tuple{Vector{Float32}, Vector{Float32}}}:
 ([0.523947, 0.3047166], [0.21535373])
 ([0.13237017, 0.8566941], [0.06494528])
 ([0.9243327, 0.81498635], [0.3492868])

In [59]:
test_imag = rand(Float32,2, 100)
test_target = rand(Float32,1, 100)

1×100 Matrix{Float32}:
 0.320748  0.94637  0.392362  0.88544  …  0.296158  0.5328  0.00955468

In [60]:
model = Chain(Dense(2,3, tanh), BatchNorm(3), Dense(3,1))

Chain(
  Dense(2 => 3, tanh),                  [90m# 9 parameters[39m
  BatchNorm(3),                         [90m# 6 parameters[39m[90m, plus 6[39m
  Dense(3 => 1),                        [90m# 4 parameters[39m
) [90m        # Total: 6 trainable arrays, [39m19 parameters,
[90m          # plus 2 non-trainable, 6 parameters, summarysize [39m588 bytes.

In [61]:
opt = ADAM()

Adam(0.001, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

In [35]:
model(rand(Int, 2,100))

1×100 Matrix{Float32}:
 0.668815  1.67469  0.668815  -0.668815  …  0.668815  -0.668815  0.668815

In [62]:
loss(x, y) =  sum([(y[i] .- model(x)[i]).^2 for i=1:length(y) ])/length(y)

loss (generic function with 2 methods)

In [64]:
Flux.train!(loss, Flux.params(model), [(test_imag, test_target)], opt)

In [1]:
using SymPy

In [2]:
x = symbols("x")

x

In [18]:
ex1 = sin(x)

sin(x)

In [20]:
M1 = [1.0 0.0; 0.0 1.0]
M2 = [0.0 1.0; 1.0 0.0]

2×2 Matrix{Float64}:
 0.0  1.0
 1.0  0.0

In [21]:
ex2 = M1 + M2*ex1

2×2 Matrix{Sym}:
 1.00000000000000        1.0*sin(x)
       1.0*sin(x)  1.00000000000000

In [24]:
ex3 = ex2.integrate((x, 0.0, x))

2×2 Matrix{Sym}:
            1.0*x  1.0 - cos(x)
 1.0 - cos(x)             1.0*x

In [8]:
t = collect(1:10)

10-element Vector{Int64}:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10

In [17]:
@time A = [N(ex1(i)) for i in t]

  0.039646 seconds (37.73 k allocations: 2.323 MiB, 74.94% compilation time)


10-element Vector{Int64}:
   4
   9
  16
  25
  36
  49
  64
  81
 100
 121

In [15]:
typeof(A)

Vector{Sym}[90m (alias for [39m[90mArray{Sym, 1}[39m[90m)[39m

In [25]:
using LinearAlgebra
using Flux
using ParameterSchedulers
using SymPy

#=
struct Env
    max_turn::Int
    num_player::Int
    val_num::Int
    br_num::Int
    fn_num::Int
    act_ind::Int
    input_dim::Int
    middle_dim::Int
    output::Int

    #training parameter
    training_step::Int
    checkpoint_interval::Int
    batch_size::Int
    η::Float32
    momentum::Float32
    scheduler

    num_simulation::Int
    α::Float32
    frac::Float32

    t_step::Int
    HS_size::Int
    Ω::Float32
    ξ::Float32
    Jz::Float32
    Jx::Float32
    hz::Float32
    H_0::Hermitian{ComplexF32, Matrix{ComplexF32}}
    V_t::Hermitian{ComplexF32, Matrix{ComplexF32}}
    dt::Float32

    Cb::Int
    Ci::Float32
    C::Float32 #L2 norm weight
end=#

x = symbols("x")
sx = sin(x)

sin(x)

In [41]:
using ParameterSchedulers
#max_turn, num_player, num_simulation, α, frac, t_step, HS_size, Ω, ξ, Jz, Jx, hz, Cb, Ci, C
function init_Env(args::Vector{String})
    max_turn = parse(Int, args[1])
    num_player = parse(Int, args[2])
    val_num::Int = 2
    br_num::Int = 3
    fn_num::Int = 2
    act_ind = val_num+br_num+fn_num
    input_dim = act_ind*max_turn
    middle_dim = 128
    output =  act_ind + 1

    #training parameter
    training_step = 100000
    checkpoint_interval = 1000
    batch_size = 1024
    η = 1f-4
    momentum = 0.9
    scheduler = Step(λ = 2f-1, γ = Float32(0.1), step_sizes = 20000)


    num_simulation = parse(Int, args[3])
    α = parse(Float32, args[4])
    frac = parse(Float32, args[5])

    t_step = parse(Int, args[6])
    HS_size = parse(Int, args[7])
    Ω = parse(Float32, args[8])
    ξ = parse(Float32, args[9])
    Jz = parse(Float32, args[10])
    Jx = parse(Float32, args[11])
    hz = parse(Float32, args[12])
    H_0 = Hermitian([ -Jz-2hz 0 0 -Jx; 0 Jz -Jx 0; 0 -Jx Jz 0; -Jx 0 0 -Jz+2hz])
    V_t = Hermitian([ 0 -ξ -ξ 0; -ξ 0 0 -ξ; -ξ 0 0 -ξ; 0 -ξ -ξ 0])
    dt = 2pi/t_step/Ω

    Cb = parse(Int, args[13])
    Ci = parse(Float32, args[14])
    C = parse(Float32, args[15])

    return Env(max_turn, num_player, val_num, br_num, fn_num, act_ind, input_dim, middle_dim, output, training_step, checkpoint_interval, batch_size, η, momentum, scheduler, num_simulation, α, frac, t_step, HS_size, Ω, ξ, Jz, Jx, hz, H_0, V_t, dt, Cb, Ci, C)
end

init_Env (generic function with 1 method)

In [42]:
en = init_Env(["10", "2", "100", "0.3", "0.25", "100", "4", "10.0", "0.2", "1.0", "0.7", "0.5", "100", "0.1", "0.1"])

Env(10, 2, 2, 3, 2, 7, 70, 128, 8, 100000, 1000, 1024, 0.0001f0, 0.9f0, Step{Float32, Base.Iterators.Repeated{Int64}}(0.2f0, 0.1f0, Base.Iterators.Repeated{Int64}(20000)), 100, 0.3f0, 0.25f0, 100, 4, 10.0f0, 0.2f0, 1.0f0, 0.7f0, 0.5f0, ComplexF32[-2.0f0 + 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.7f0 + 0.0f0im; 0.0f0 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im 0.0f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.7f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im], ComplexF32[0.0f0 + 0.0f0im -0.2f0 + 0.0f0im -0.2f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.2f0 - 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.2f0 + 0.0f0im; -0.2f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im -0.2f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.2f0 - 0.0f0im -0.2f0 - 0.0f0im 0.0f0 + 0.0f0im], 0.0062831854f0, 100, 0.1f0, 0.1f0)

In [105]:
function calc_Kt(history::Vector{Int}, env::Env)
    MV = []
    his = copy(history)
    #println(length(his))
    for it in 1:length(his)
        sw = pop!(his)
        if(sw==1)
            push!(MV, env.H_0)
        elseif(sw==2)
            push!(MV, env.V_t*sin(x))
        elseif(sw==3)
            A = pop!(MV)
            B = pop!(MV)
            C = A + B
            push!(MV, C)
        elseif(sw==4)
            A = pop!(MV)
            B = pop!(MV)
            C = -1.0im*(A*B - B*A)
            push!(MV, C)
        elseif(sw==5)
            A = pop!(MV)
            B = pop!(MV)
            C = (A*B + B*A)/2
            push!(MV, C)
        elseif(sw==6)
            A = pop!(MV)
            B = A.integrate((x, 0, x))
            push!(MV, B)
        end
        #@show MV
    end
    t = collect(0:env.dt:2pi)

    Ks = MV[end]
    #println(Ks)
    Kt::Vector{Hermitian{ComplexF32, Matrix{ComplexF32}}} = [Hermitian(N(Ks.subs(x,t[i]))) for i in 1:env.t_step]
    #Kt = [Hermitian(N(Ks.subs(x,t[i]))) for i in 1:env.t_step]
    return Kt
end

calc_Kt (generic function with 1 method)

In [106]:
Kt = calc_Kt([3, 6, 2, 1], en)

100-element Vector{Hermitian{ComplexF32, Matrix{ComplexF32}}}:
 [-2.0f0 + 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.7f0 + 0.0f0im; 0.0f0 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im 0.0f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.7f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im]
 [-2.0f0 + 0.0f0im -3.947829f-6 + 0.0f0im -3.947829f-6 + 0.0f0im -0.7f0 + 0.0f0im; -3.947829f-6 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im -3.947829f-6 + 0.0f0im; -3.947829f-6 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im -3.947829f-6 + 0.0f0im; -0.7f0 - 0.0f0im -3.947829f-6 - 0.0f0im -3.947829f-6 - 0.0f0im 0.0f0 + 0.0f0im]
 [-2.0f0 + 0.0f0im -1.579116f-5 + 0.0f0im -1.579116f-5 + 0.0f0im -0.7f0 + 0.0f0im; -1.579116f-5 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im -1.579116f-5 + 0.0f0im; -1.579116f-5 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im -1.579116f-5 + 0.0f0im; -0.7f0 - 0.0f0im -1.579116f-5 - 0.0f0im -1.579116f-5 - 0.0f0im 0.0f0 + 0.0f0im]
 [-2.0f0 + 0.0f0im -3.55

In [66]:
test_his = [3, 6, 2, 1]
MV0 = []

Any[]

In [67]:
#A = pop!(test_his)
push!(MV0, en.H_0)
MV0[1]

4×4 Hermitian{ComplexF32, Matrix{ComplexF32}}:
 -2.0+0.0im   0.0+0.0im   0.0+0.0im  -0.7+0.0im
  0.0-0.0im   1.0+0.0im  -0.7+0.0im   0.0+0.0im
  0.0-0.0im  -0.7-0.0im   1.0+0.0im   0.0+0.0im
 -0.7-0.0im   0.0-0.0im   0.0-0.0im   0.0+0.0im

In [71]:
typeof(en.H_0+en.V_t*sx)

Matrix{Sym}[90m (alias for [39m[90mArray{Sym, 2}[39m[90m)[39m

In [68]:
push!(MV0, en.V_t*sx)
@show MV0

MV0 = Any[ComplexF32[-2.0f0 + 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.7f0 + 0.0f0im; 0.0f0 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im 0.0f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.7f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im], Sym[0 -0.200000002980232*sin(x) -0.200000002980232*sin(x) 0; -0.200000002980232*sin(x) 0 0 -0.200000002980232*sin(x); -0.200000002980232*sin(x) 0 0 -0.200000002980232*sin(x); 0 -0.200000002980232*sin(x) -0.200000002980232*sin(x) 0]]


2-element Vector{Any}:
 ComplexF32[-2.0f0 + 0.0f0im 0.0f0 + 0.0f0im 0.0f0 + 0.0f0im -0.7f0 + 0.0f0im; 0.0f0 - 0.0f0im 1.0f0 + 0.0f0im -0.7f0 + 0.0f0im 0.0f0 + 0.0f0im; 0.0f0 - 0.0f0im -0.7f0 - 0.0f0im 1.0f0 + 0.0f0im 0.0f0 + 0.0f0im; -0.7f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 - 0.0f0im 0.0f0 + 0.0f0im]
 Sym[0 -0.200000002980232*sin(x) -0.200000002980232*sin(x) 0; -0.200000002980232*sin(x) 0 0 -0.200000002980232*sin(x); -0.200000002980232*sin(x) 0 0 -0.200000002980232*sin(x); 0 -0.200000002980232*sin(x) -0.200000002980232*sin(x) 0]

In [64]:
using SymPy

In [69]:
A = pop!(MV0)
B = A.integrate((x, 0, x))
push!(MV0, B)
@show MV0

PyCall.PyError: PyError ($(Expr(:escape, :(ccall(#= /Users/johnbrother/.julia/packages/PyCall/twYvK/src/pyfncall.jl:43 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'ValueError'>
ValueError('Invalid limits given: ((array([1], dtype=int64), 0, array([1], dtype=int64)),)')
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/matrices/matrices.py", line 501, in integrate
    return self.applyfunc(lambda x: x.integrate(*args, **kwargs))
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/matrices/common.py", line 1989, in applyfunc
    return self._eval_applyfunc(f)
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/matrices/common.py", line 1931, in _eval_applyfunc
    out = self._new(self.rows, self.cols, [f(x) for x in self])
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/matrices/common.py", line 1931, in <listcomp>
    out = self._new(self.rows, self.cols, [f(x) for x in self])
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/matrices/matrices.py", line 501, in <lambda>
    return self.applyfunc(lambda x: x.integrate(*args, **kwargs))
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/core/expr.py", line 3726, in integrate
    return integrate(self, *args, **kwargs)
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/integrals/integrals.py", line 1564, in integrate
    integral = Integral(*args, **kwargs)
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/integrals/integrals.py", line 96, in __new__
    obj = AddWithLimits.__new__(cls, function, *symbols, **assumptions)
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/concrete/expr_with_limits.py", line 547, in __new__
    pre = _common_new(cls, function, *symbols,
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/concrete/expr_with_limits.py", line 57, in _common_new
    limits, orientation = _process_limits(*symbols, discrete=discrete)
  File "/Users/johnbrother/.julia/conda/3/lib/python3.9/site-packages/sympy/concrete/expr_with_limits.py", line 194, in _process_limits
    raise ValueError('Invalid limits given: %s' % str(symbols))


In [78]:
A = pop!(MV0)
B = pop!(MV0)
C = A+B
push!(MV0, C)
@show MV0

MV0 = Any[Sym[-2.00000000000000 0.200000002980232*cos(x) - 0.200000002980232 0.200000002980232*cos(x) - 0.200000002980232 -0.699999988079071; 0.200000002980232*cos(x) - 0.200000002980232 1.00000000000000 -0.699999988079071 0.200000002980232*cos(x) - 0.200000002980232; 0.200000002980232*cos(x) - 0.200000002980232 -0.699999988079071 1.00000000000000 0.200000002980232*cos(x) - 0.200000002980232; -0.699999988079071 0.200000002980232*cos(x) - 0.200000002980232 0.200000002980232*cos(x) - 0.200000002980232 0]]


1-element Vector{Any}:
 Sym[-2.00000000000000 0.200000002980232*cos(x) - 0.200000002980232 0.200000002980232*cos(x) - 0.200000002980232 -0.699999988079071; 0.200000002980232*cos(x) - 0.200000002980232 1.00000000000000 -0.699999988079071 0.200000002980232*cos(x) - 0.200000002980232; 0.200000002980232*cos(x) - 0.200000002980232 -0.699999988079071 1.00000000000000 0.200000002980232*cos(x) - 0.200000002980232; -0.699999988079071 0.200000002980232*cos(x) - 0.200000002980232 0.200000002980232*cos(x) - 0.200000002980232 0]

In [59]:
M = MV0[end]

UndefVarError: UndefVarError: `MV0` not defined

In [80]:
t = collect(0:en.dt:2pi)

1000-element Vector{Float64}:
 0.0
 0.0062831854447722435
 0.012566370889544487
 0.01884955633431673
 0.025132741779088974
 0.03141592722386122
 0.03769911266863346
 0.043982298113405704
 0.05026548355817795
 0.05654866900295019
 ⋮
 6.226636775769293
 6.2329199612140656
 6.239203146658838
 6.24548633210361
 6.251769517548382
 6.2580527029931545
 6.264335888437927
 6.270619073882699
 6.276902259327471

In [89]:
[Hermitian(N(M.subs(x, t[i]))) for i in 1:en.t_step]

100-element Vector{Hermitian{Real, Matrix{Real}}}:
 [-2.0 0 0 -0.699999988079071; 0 1.0 -0.699999988079071 0; 0 -0.699999988079071 1.0 0; -0.699999988079071 0 0 0]
 [-2.0 -3.947829004313785e-6 -3.947829004313785e-6 -0.699999988079071; -3.947829004313785e-6 1.0 -0.699999988079071 -3.947829004313785e-6; -3.947829004313785e-6 -0.699999988079071 1.0 -3.947829004313785e-6; -0.699999988079071 -3.947829004313785e-6 -3.947829004313785e-6 0]
 [-2.0 -1.57911601636751e-5 -1.57911601636751e-5 -0.699999988079071; -1.57911601636751e-5 1.0 -0.699999988079071 -1.57911601636751e-5; -1.57911601636751e-5 -0.699999988079071 1.0 -1.57911601636751e-5; -0.699999988079071 -1.57911601636751e-5 -1.57911601636751e-5 0]
 [-2.0 -3.552952592367209e-5 -3.552952592367209e-5 -0.699999988079071; -3.552952592367209e-5 1.0 -0.699999988079071 -3.552952592367209e-5; -3.552952592367209e-5 -0.699999988079071 1.0 -3.552952592367209e-5; -0.699999988079071 -3.552952592367209e-5 -3.552952592367209e-5 0]
 [-2.0 -6.316214704737977

In [52]:
A = [1, 2, 3, 4 ,5]

5-element Vector{Int64}:
 1
 2
 3
 4
 5

In [58]:
[ (try a = A[i] catch a=8 end) for i in 1:10]

ErrorException: syntax: missing last argument in "A[i]:" range expression 

In [30]:
using Flux

In [31]:
model = Chain(Dense(3, 8, relu), Dense(8, 8, relu), Flux.Parallel(vcat, Chain(Dense(8, 4, relu), Dense(4,4), softmax), Dense(8, 1, tanh)))

Chain(
  Dense(3 => 8, relu),                  [90m# 32 parameters[39m
  Dense(8 => 8, relu),                  [90m# 72 parameters[39m
  Parallel(
    vcat,
    Chain(
      Dense(8 => 4, relu),              [90m# 36 parameters[39m
      Dense(4 => 4),                    [90m# 20 parameters[39m
      NNlib.softmax,
    ),
    Dense(8 => 1, tanh),                [90m# 9 parameters[39m
  ),
) [90m                  # Total: 10 arrays, [39m169 parameters, 1.441 KiB.

In [33]:
input = rand(Float32, 3)

3-element Vector{Float32}:
 0.26861
 0.9730785
 0.04579234

In [34]:
out = model(input)

5-element Vector{Float32}:
 0.27015024
 0.24141714
 0.2559261
 0.2325065
 0.16742308

In [35]:
using Distributed

In [36]:
addprocs(4)

4-element Vector{Int64}:
 2
 3
 4
 5

In [44]:
using SharedArrays

In [61]:
A = []

Any[]

In [68]:
@everywhere include("MCTS-RF_env.jl")

In [76]:
@everywhere include("MCTS-RF_agt.jl")

In [73]:
A = SharedVector([agt = init_agt() for i in 1:4])

4-element SharedVector{typeof(init_agt)}:
 init_agt (generic function with 1 method)
 init_agt (generic function with 1 method)
 init_agt (generic function with 1 method)
 init_agt (generic function with 1 method)

In [74]:
@everywhere using Flux

In [77]:
@distributed for i in 1:4
    agt = A[i]
    model = Dense(zeros(Float32, env.output,env.input_dim))
    play_physics!(en, agt, model)
end

Task (runnable) @0x00007f0924a2f080

In [78]:
A[1]

init_agt (generic function with 1 method)

In [1]:
using CUDA
using Flux

In [2]:
Flux.GPU_BACKEND

"CUDA"

In [4]:
model_cpu = Chain(Dense(120, 128), BatchNorm(128), Dense(128, 128, relu), BatchNorm(128), Dense(128, 128, relu), Parallel(vcat, Chain(Dense(128,6, tanh), Dense(6,6)), Chain(Dense(128, 1, tanh), Dense(1,1))))

Chain(
  Dense(120 => 128),                    [90m# 15_488 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 128, relu),              [90m# 16_512 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 128, relu),              [90m# 16_512 parameters[39m
  Parallel(
    vcat,
    Chain(
      Dense(128 => 6, tanh),            [90m# 774 parameters[39m
      Dense(6 => 6),                    [90m# 42 parameters[39m
    ),
    Chain(
      Dense(128 => 1, tanh),            [90m# 129 parameters[39m
      Dense(1 => 1),                    [90m# 2 parameters[39m
    ),
  ),
) [90m        # Total: 18 trainable arrays, [39m49_971 parameters,
[90m          # plus 4 non-trainable, 512 parameters, summarysize [39m198.777 KiB.

In [5]:
model_gpu = gpu(model_cpu)

Chain(
  Dense(120 => 128),                    [90m# 15_488 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 128, relu),              [90m# 16_512 parameters[39m
  BatchNorm(128),                       [90m# 256 parameters[39m[90m, plus 256[39m
  Dense(128 => 128, relu),              [90m# 16_512 parameters[39m
  Parallel(
    vcat,
    Chain(
      Dense(128 => 6, tanh),            [90m# 774 parameters[39m
      Dense(6 => 6),                    [90m# 42 parameters[39m
    ),
    Chain(
      Dense(128 => 1, tanh),            [90m# 129 parameters[39m
      Dense(1 => 1),                    [90m# 2 parameters[39m
    ),
  ),
) [90m        # Total: 18 trainable arrays, [39m49_971 parameters,
[90m          # plus 4 non-trainable, 512 parameters, summarysize [39m3.195 KiB.

In [6]:
x_c = rand(Int32, 120, 1024)
y_c = rand(Float32, 7, 1024)

7×1024 Matrix{Float32}:
 0.944783   0.546551  0.306746   0.995058    …  0.175194  0.430924  0.223316
 0.826923   0.59864   0.0632063  0.738193       0.234876  0.151102  0.158971
 0.161442   0.1614    0.354368   0.279563       0.908646  0.386788  0.523774
 0.326405   0.554026  0.141668   0.359125       0.277155  0.127923  0.263541
 0.0600685  0.307791  0.367611   0.548013       0.599808  0.471523  0.598647
 0.18569    0.487638  0.765314   0.426737    …  0.55001   0.359032  0.708868
 0.501412   0.792106  0.983987   0.00653476     0.17026   0.705653  0.685402

In [7]:
x_g = gpu(x_c)
y_g = gpu(y_c)

7×1024 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.944783   0.546551  0.306746   0.995058    …  0.175194  0.430924  0.223316
 0.826923   0.59864   0.0632063  0.738193       0.234876  0.151102  0.158971
 0.161442   0.1614    0.354368   0.279563       0.908646  0.386788  0.523774
 0.326405   0.554026  0.141668   0.359125       0.277155  0.127923  0.263541
 0.0600685  0.307791  0.367611   0.548013       0.599808  0.471523  0.598647
 0.18569    0.487638  0.765314   0.426737    …  0.55001   0.359032  0.708868
 0.501412   0.792106  0.983987   0.00653476     0.17026   0.705653  0.685402

In [27]:
function loss_g(x, y, z)
    y1 = model_gpu(x)
    return sum([((y1[end,i]-y[end,i])^2 - target[1:end-1,i]' * log.(softmax(y1[1:end-1,i]))) for i in 1:z])/z + sum(Flux.params(model)[1].^2)/z
end

loss_g (generic function with 2 methods)

In [8]:
loss_g(x, y) = sum((y - model_gpu(x)).^2)/1024

loss_g (generic function with 1 method)

In [9]:
opt = ADAM()

Adam(0.001, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

In [14]:
@time Flux.train!(loss_g, Flux.params(model_gpu), [(x_g, y_g)], opt)

  0.004044 seconds (4.83 k allocations: 347.859 KiB)


In [15]:
loss_c(x, y) = sum((y - model_cpu(x)).^2)/1024

loss_c (generic function with 1 method)

In [22]:
@time Flux.train!(loss_c, Flux.params(model_cpu), [(x_c, y_c)], opt)

  0.024550 seconds (1.39 k allocations: 16.310 MiB)


In [28]:
@time Flux.train!(loss_g, Flux.params(model_gpu), [(gpu(x_c), gpu(y_c), 1024)], opt)

ErrorException: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.

In [1]:
maximum([1,3])

3

In [2]:
dic_test = Dict()

Dict{Any, Any}()

In [3]:
dic_test[1] = 5
dic_test[3] = 8

8

In [4]:
maximum(keys(dic_test))

3

In [5]:
dic_test[maximum(keys(dic_test))]

8