In [20]:
include("MCTS-RF_env.jl")
include("MCTS-RF_agt.jl")

#1局1局の情報をストックする
mutable struct ReplayBuffer
    buffer::Vector{Agent}
    buffer_size::Int
    batch_size::Int
    #count::Int
end

function init_buffer(buffer_size::Int, batch_size::Int)
    return ReplayBuffer([], buffer_size, batch_size)
end


function save_game!(buffer::ReplayBuffer, agt::Agent)
    if length(buffer.buffer) > buffer.buffer_size
        popfirst!(buffer.buffer)
    end
    push!(buffer.buffer, agt)
end
#=
@everywhere function save_game!(buffer, agt::Agent, buffer_size::Int)
    if length(buffer) > buffer_size
        popfirst!(buffer)
    end
    push!(buffer, agt)
end=#

#=function sample_batch!(env::Env, buffer::ReplayBuffer, imag::SharedArray, target::SharedArray)
    games = sample(buffer.buffer, weights([length(agt.history) for agt in buffer.buffer]), buffer.batch_size, replace=false)
    g_turn = [(g, rand(1:length(g.history))) for g in games]
    #imag = SharedArray(zeros(Int, env.input_dim, buffer.batch_size))
    #target = SharedArray(zeros(Float32, env.output, buffer.batch_size))
    @sync @distributed for it in 1:buffer.batch_size
        g, turn = g_turn[it]
        imag[:,it] = make_image(env, g, turn)
        target[:,it] = make_target(env, g, turn)
    end
    #return imag, target
    #return [(make_image(env, g, turn), make_target(env, g, turn)) for (g, turn) in g_turn]
    #return [make_image(env, g, turn) for (g, turn) in g_turn], [make_target(env, g, turn) for (g, turn) in g_turn]
end=#

mutable struct Storage
    storage::Dict{Int, Chain}
    random_out::Chain
    scores::Dict{Vector{Int}, Float32}
end

function init_storage(env)
    #return Storage(Dict(), Chain(Dense(zeros(Float32, env.output,env.input_dim))))
    return Storage(Dict(), Chain(Dense(zeros(Float32, env.output,env.input_dim))), Dict())
end

function latest_model(storage::Storage)
    if(isempty(storage.storage))
        return storage.random_out
    else
        return storage.storage[maximum(keys(storage.storage))]
    end
end


#cpu並列化予定
#=function sample_batch!(env::Env, buffer::ReplayBuffer, scores::Dict{Vector{Int}, Float32})
    games = sample(buffer.buffer, weights([length(agt.history) for agt in buffer.buffer]), buffer.batch_size, replace=false)
    g_turn = [(g, rand(1:length(g.history))) for g in games]
    imag = SharedArray(zeros(Int, env.input_dim, buffer.batch_size))
    target = SharedArray(zeros(Float32, env.output, buffer.batch_size))
    @sync @distributed for it in 1:buffer.batch_size
        g, turn = g_turn[it]
        imag[:,it] = make_image(env, g, turn)
        target[:,it] = make_target(env, g, scores, turn)
    end
    tar_data = sdata(target)
    for it in 1:buffer.batch_size
        g, turn = g_turn[it]
        scores[g.history] = tar_data[end,it]
    end

    return sdata(imag), tar_data
end=#

function sample_batch(env::Env, buffer::ReplayBuffer)
    games = sample(buffer.buffer, weights([length(agt.history) for agt in buffer.buffer]), buffer.batch_size, replace=false)
    g_turn = [(g, rand(1:length(g.history))) for g in games]
    #imag = SharedArray(zeros(Int, env.input_dim, buffer.batch_size))
    #target = SharedArray(zeros(Float32, env.output, buffer.batch_size))
    imag = zeros(Int, env.input_dim, buffer.batch_size)
    target = zeros(Float32, env.output, buffer.batch_size)
    for it in 1:buffer.batch_size
        g, turn = g_turn[it]
        imag[:,it] = make_image(env, g, turn)
        target[:,it] = make_target(env, g, turn)
        println("------------")
        println("sample $(it)")
        println("history: $(g.history),  turn:$(turn)")
        println("target; $(target[:,it])")
    end
    return imag, target
    #return [(make_image(env, g, turn), make_target(env, g, turn)) for (g, turn) in g_turn]
    #return [make_image(env, g, turn) for (g, turn) in g_turn], [make_target(env, g, turn) for (g, turn) in g_turn]
end



#cpu並列化予定
function run_selfplay(env::Env, buffer::ReplayBuffer, storage::Storage)
    for it in 1:env.num_player
        model = latest_model(storage)
        game = play_physics!(env, model)
        println("game$(it): $(game.history)")
        println("child_visit: $(game.child_visit_pi)")
        save_game!(buffer, game)
    end
end

#cpu並列化予定
function run_selfplay(env::Env, buffer::ReplayBuffer, storage::Storage, ratio::Float32)
    for it in 1:env.num_player
        model = latest_model(storage)
        game = play_physics!(env, model, ratio)
        println("game$(it): $(game.history)")
        println("child_visit: $(game.child_visit_pi)")
        save_game!(buffer, game)
    end
end

#gpu並列化予定
#=function loss(image::SharedMatrix{Int}, target::SharedMatrix{Float32}, env::Env, model)
    y1 = model(image)
    return sum([((y1[end,i]-target[end,i])^2 - target[1:end-1,i]' * log.(softmax(y1[1:end-1,i]).+1f-6)) for i in 1:env.batch_size])/env.batch_size + env.C * sum(Flux.params(model)[1].^2)
end=#

sqnorm(x) = sum(abs2, x)

function loss(image::Matrix{Int}, target::Matrix{Float32}, env::Env, model)
    y1 = model(image)
    return sum([((y1[end,i]-target[end,i])^2 - target[1:end-1,i]' * log.(softmax(y1[1:end-1,i]).+1f-6)) for i in 1:env.batch_size])/env.batch_size + env.C * sum(sqnorm, Flux.params(model))
end


tanh10(x) = Float32(10)*tanh(x/10)
tanh2(x) = Float32(2)*tanh(x)

#gpu並列化予定
function train_model!(env::Env, buffer::ReplayBuffer, storage::Storage)
    model = Chain(Dense(env.input_dim, env.middle_dim), Tuple(Chain(Parallel(+, Chain(BatchNorm(env.middle_dim), Dense(env.middle_dim, env.middle_dim, relu)),Dense(env.middle_dim, env.middle_dim, relu)), identity) for i in 1:env.depth)..., Flux.flatten, Flux.Parallel(vcat, Dense(env.middle_dim, env.act_ind, tanh), Dense(env.middle_dim, 1, tanh10)))

    opt = ADAM()
    #ParameterSchedulers.Scheduler(env.scheduler, Momentum())
    iv_batch = []
    tv_batch = []
    for b_num in 1:env.batch_num
        #image_batch, target_batch = sample_batch!(env, buffer, storage.scores)
        image_batch, target_batch = sample_batch(env, buffer)
        push!(iv_batch, image_batch)
        push!(tv_batch, target_batch)
    end
    l = 0.0
    for it in 1:env.training_step
        for b_num in 1:env.batch_num
            #Flux.train!(loss, Flux.params(model), [(iv_batch[b_num], tv_batch[b_num], env, model)], opt)
            val, grads = Flux.withgradient(Flux.params(model)) do
                loss(iv_batch[b_num],tv_batch[b_num],env,model)
            end
            Flux.Optimise.update!(opt, Flux.params(model), grads)
            l+=val/(env.batch_num*env.training_step)
        end
    end
    storage.storage[env.training_step] = model

    return l
end

train_model! (generic function with 1 method)

In [22]:
env = init_Env(["10", "10", "128", "16", "800", "8", "1", "500", "0.3", "0.25", "50", "4", "20.0", "0.4", "1.0", "0.7", "0.5", "128", "1.25", "0.00001", "0"])
storage = init_storage(env)

Storage(Dict{Int64, Chain}(), Chain(Dense(60 => 7)), Dict{Vector{Int64}, Float32}())

In [23]:
ld = []

for it in 1:10
    println("=============")
    println("it=$(it);")

    replay_buffer = init_buffer(1000, env.batch_size)
    
    if(it<10)
        ratio = Float32(10.0)
        #@time run_selfplay(env, replay_buffer, storage)
        run_selfplay(env, replay_buffer, storage, ratio)
        ll = train_model!(env, replay_buffer, storage)
    else
        #run_selfplay(env, replay_buffer, storage)
        ratio = Float32(10.0)
        if(it<10)
            ratio = Float32(0.2)
        elseif(it<20)
            ratio = Float32(0.05)
        elseif(it<30)
            ratio = Float32(0.01)
        else
            ratio = Float32(0.002)
        end
        run_selfplay(env, replay_buffer, storage, ratio)
        ll = train_model!(env, replay_buffer, storage)
    end
    #@report_call run_selfplay(env, replay_buffer, storage)
    #ll = @report_call train_model!(env, replay_buffer, storage)
    println("loss_average: $(ll)")
    push!(ld,ll)
    if(it%10==0)
        for tes in 1:5
            game = play_physics!(env, latest_model(storage), 1f-6)
            score = calc_score(game.history, env)
            println("$(game.history), score:$(score)")
        end
    end
end

it=1;


game1: [1]
child_visit: Vector{Float32}[[0.228, 0.124, 0.126, 0.206, 0.174, 0.142]]
game2: [2]
child_visit: Vector{Float32}[[0.124, 0.222, 0.204, 0.192, 0.132, 0.126]]
game3: [5, 6, 3, 2, 3, 1, 6, 1, 2]
child_visit: Vector{Float32}[[0.162, 0.126, 0.144, 0.132, 0.31, 0.126], [0.162, 0.16, 0.212, 0.166, 0.0, 0.3], [0.0, 0.252, 0.312, 0.202, 0.234, 0.0], [0.15, 0.252, 0.0, 0.2, 0.246, 0.152], [0.124, 0.144, 0.294, 0.128, 0.126, 0.184], [0.306, 0.176, 0.0, 0.158, 0.152, 0.208], [0.126, 0.124, 0.144, 0.156, 0.158, 0.292], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0]]
game4: [4, 3, 6, 4, 1, 1, 6, 1, 1]
child_visit: Vector{Float32}[[0.124, 0.126, 0.126, 0.376, 0.124, 0.124], [0.222, 0.196, 0.238, 0.0, 0.15, 0.194], [0.23, 0.152, 0.0, 0.15, 0.166, 0.302], [0.0, 0.214, 0.274, 0.324, 0.188, 0.0], [0.29, 0.18, 0.15, 0.0, 0.23, 0.15], [0.278, 0.14, 0.202, 0.124, 0.128, 0.128], [0.126, 0.128, 0.164, 0.15, 0.174, 0.258], [0.576, 0.424, 0.0, 0.0, 0.0, 0.0], [0.624, 0.376, 0.

game6: [5, 6, 3, 2, 6, 3, 1, 2, 2]
child_visit: Vector{Float32}[[0.134, 0.124, 0.162, 0.18, 0.258, 0.142], [0.186, 0.158, 0.15, 0.19, 0.0, 0.316], [0.0, 0.188, 0.326, 0.276, 0.21, 0.0], [0.23, 0.288, 0.0, 0.182, 0.15, 0.15], [0.16, 0.168, 0.126, 0.15, 0.132, 0.264], [0.0, 0.252, 0.364, 0.196, 0.188, 0.0], [0.614, 0.386, 0.0, 0.0, 0.0, 0.0], [0.414, 0.586, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0]]
game7: [1]
child_visit: Vector{Float32}[[0.282, 0.124, 0.206, 0.124, 0.134, 0.13]]
game8: [1]
child_visit: Vector{Float32}[[0.286, 0.132, 0.158, 0.124, 0.174, 0.126]]
game9: [4, 2, 5, 3, 6, 5, 1, 1, 1, 2]
child_visit: Vector{Float32}[[0.128, 0.126, 0.126, 0.268, 0.156, 0.196], [0.15, 0.322, 0.208, 0.0, 0.156, 0.164], [0.13, 0.128, 0.124, 0.124, 0.368, 0.126], [0.15, 0.184, 0.282, 0.156, 0.0, 0.228], [0.178, 0.172, 0.0, 0.162, 0.154, 0.334], [0.0, 0.188, 0.264, 0.258, 0.29, 0.0], [0.542, 0.458, 0.0, 0.0, 0.0, 0.0], [0.604, 0.396, 0.0, 0.0, 0.0, 0.0], [0.616, 0.384, 0.0, 0.0, 0.0,

------------
sample 1
history: [4, 3, 6, 4, 1, 1, 6, 1, 1],  turn:7
target; Float32[0.126, 0.128, 0.164, 0.15, 0.174, 0.258, 4.636553]


------------
sample 2
history: [4, 2, 5, 3, 6, 5, 1, 1, 1, 2],  turn:1
target; Float32[0.128, 0.126, 0.126, 0.268, 0.156, 0.196, -0.89178914]
------------
sample 3
history: [1],  turn:1
target; Float32[0.286, 0.132, 0.158, 0.124, 0.174, 0.126, 4.636553]


------------
sample 4
history: [4, 5, 6, 4, 2, 3, 1, 1, 1, 1],  turn:1
target; Float32[0.182, 0.13, 0.174, 0.192, 0.152, 0.17, 0.027052775]


------------
sample 5
history: [5, 6, 3, 2, 3, 1, 6, 1, 2],  turn:3
target; Float32[0.0, 0.252, 0.312, 0.202, 0.234, 0.0, -0.23397714]


------------
sample 6
history: [5, 6, 3, 2, 6, 3, 1, 2, 2],  turn:5
target; Float32[0.16, 0.168, 0.126, 0.15, 0.132, 0.264, 3.093681]


------------
sample 7
history: [2],  turn:1
target; Float32[0.124, 0.222, 0.204, 0.192, 0.132, 0.126, -1.389799]
------------
sample 8
history: [1],  turn:1
target; Float32[0.228, 0.124, 0.126, 0.206, 0.174, 0.142, 4.636553]


loss_average: 1.8996705028694123
it=2;


game1: [6, 3, 6, 4, 2, 6, 2, 1]
child_visit: Vector{Float32}[[0.084, 0.02, 0.08, 0.148, 0.102, 0.566], [0.0, 0.078, 0.606, 0.23, 0.086, 0.0], [0.26, 0.192, 0.0, 0.14, 0.13, 0.278], [0.0, 0.05, 0.044, 0.846, 0.06, 0.0], [0.242, 0.312, 0.202, 0.0, 0.174, 0.07], [0.062, 0.122, 0.24, 0.15, 0.15, 0.276], [0.0, 0.396, 0.208, 0.198, 0.198, 0.0], [0.306, 0.156, 0.168, 0.132, 0.12, 0.118]]


game2: [1]
child_visit: Vector{Float32}[[0.362, 0.038, 0.092, 0.152, 0.102, 0.254]]


game3: [5, 3, 2, 6, 5, 4, 1, 1, 2, 1]
child_visit: Vector{Float32}[[0.15, 0.03, 0.094, 0.174, 0.312, 0.24], [0.25, 0.06, 0.356, 0.276, 0.0, 0.058], [0.148, 0.38, 0.0, 0.134, 0.148, 0.19], [0.126, 0.084, 0.14, 0.196, 0.22, 0.234], [0.0, 0.204, 0.288, 0.202, 0.306, 0.0], [0.056, 0.12, 0.22, 0.342, 0.0, 0.262], [0.614, 0.386, 0.0, 0.0, 0.0, 0.0], [0.512, 0.488, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0], [0.618, 0.382, 0.0, 0.0, 0.0, 0.0]]


game4: [1]
child_visit: Vector{Float32}[[0.412, 0.022, 0.13, 0.154, 0.1, 0.182]]


game5: [6, 3, 2, 1]
child_visit: Vector{Float32}[[0.276, 0.022, 0.106, 0.144, 0.098, 0.354], [0.0, 0.032, 0.664, 0.156, 0.148, 0.0], [0.228, 0.334, 0.0, 0.218, 0.122, 0.098], [0.47, 0.078, 0.132, 0.036, 0.088, 0.196]]


game6: [4, 6, 3, 4, 3, 1, 2, 2, 1, 1]
child_visit: Vector{Float32}[[0.194, 0.02, 0.136, 0.236, 0.18, 0.234], [0.09, 0.062, 0.374, 0.0, 0.068, 0.406], [0.0, 0.122, 0.404, 0.37, 0.104, 0.0], [0.226, 0.148, 0.0, 0.412, 0.1, 0.114], [0.04, 0.164, 0.36, 0.0, 0.326, 0.11], [0.518, 0.482, 0.0, 0.0, 0.0, 0.0], [0.45, 0.55, 0.0, 0.0, 0.0, 0.0], [0.404, 0.596, 0.0, 0.0, 0.0, 0.0], [0.566, 0.434, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0]]


game7: [1]
child_visit: Vector{Float32}[[0.284, 0.032, 0.184, 0.154, 0.106, 0.24]]


game8: [6, 3, 2, 1]
child_visit: Vector{Float32}[[0.106, 0.024, 0.082, 0.146, 0.112, 0.53], [0.0, 0.08, 0.662, 0.158, 0.1, 0.0], [0.212, 0.386, 0.0, 0.132, 0.114, 0.156], [0.384, 0.092, 0.132, 0.108, 0.104, 0.18]]


game9: [6, 3, 2, 6, 5, 4, 1, 2, 2]
child_visit: Vector{Float32}[[0.172, 0.022, 0.198, 0.208, 0.166, 0.234], [0.0, 0.032, 0.606, 0.164, 0.198, 0.0], [0.222, 0.326, 0.0, 0.14, 0.13, 0.182], [0.292, 0.084, 0.116, 0.062, 0.096, 0.35], [0.0, 0.234, 0.204, 0.172, 0.39, 0.0], [0.048, 0.14, 0.2, 0.328, 0.0, 0.284], [0.666, 0.334, 0.0, 0.0, 0.0, 0.0], [0.428, 0.572, 0.0, 0.0, 0.0, 0.0], [0.418, 0.582, 0.0, 0.0, 0.0, 0.0]]


game10: [5, 3, 6, 4, 5, 1, 1, 2, 2, 2]
child_visit: Vector{Float32}[[0.18, 0.04, 0.094, 0.158, 0.294, 0.234], [0.052, 0.074, 0.516, 0.302, 0.0, 0.056], [0.14, 0.144, 0.0, 0.128, 0.112, 0.476], [0.0, 0.054, 0.152, 0.752, 0.042, 0.0], [0.074, 0.248, 0.2, 0.0, 0.286, 0.192], [0.532, 0.468, 0.0, 0.0, 0.0, 0.0], [0.634, 0.366, 0.0, 0.0, 0.0, 0.0], [0.414, 0.586, 0.0, 0.0, 0.0, 0.0], [0.378, 0.622, 0.0, 0.0, 0.0, 0.0], [0.382, 0.618, 0.0, 0.0, 0.0, 0.0]]
------------
sample 1
history: [1],  turn:1
target; Float32[0.412, 0.022, 0.13, 0.154, 0.1, 0.182, 4.636553]


------------
sample 2
history: [6, 3, 2, 6, 5, 4, 1, 2, 2],  turn:4
target; Float32[0.292, 0.084, 0.116, 0.062, 0.096, 0.35, 3.4857893]


------------
sample 3
history: [6, 3, 2, 1],  turn:4
target; Float32[0.384, 0.092, 0.132, 0.108, 0.104, 0.18, -5.1887093]
------------
sample 4
history: [1],  turn:1
target; Float32[0.362, 0.038, 0.092, 0.152, 0.102, 0.254, 4.636553]


------------
sample 5
history: [5, 3, 2, 6, 5, 4, 1, 1, 2, 1],  turn:3
target; Float32[0.148, 0.38, 0.0, 0.134, 0.148, 0.19, -1.1168467]


------------
sample 6
history: [5, 3, 6, 4, 5, 1, 1, 2, 2, 2],  turn:2
target; Float32[0.052, 0.074, 0.516, 0.302, 0.0, 0.056, -2.2777739]


------------
sample 7
history: [6, 3, 2, 1],  turn:2
target; Float32[0.0, 0.032, 0.664, 0.156, 0.148, 0.0, -5.1887093]


------------
sample 8
history: [4, 6, 3, 4, 3, 1, 2, 2, 1, 1],  turn:1
target; Float32[0.194, 0.02, 0.136, 0.236, 0.18, 0.234, 1.4911596]


loss_average: 1.6558842215454206
it=3;


game1: [1]
child_visit: Vector{Float32}[[0.462, 0.08, 0.308, 0.092, 0.026, 0.032]]


game2: [1]
child_visit: Vector{Float32}[[0.608, 0.038, 0.206, 0.086, 0.026, 0.036]]


game3: [1]
child_visit: Vector{Float32}[[0.414, 0.058, 0.28, 0.136, 0.074, 0.038]]


game4: [1]
child_visit: Vector{Float32}[[0.404, 0.138, 0.286, 0.102, 0.036, 0.034]]


game5: [1]
child_visit: Vector{Float32}[[0.426, 0.054, 0.248, 0.124, 0.068, 0.08]]


game6: [1]
child_visit: Vector{Float32}[[0.402, 0.05, 0.346, 0.098, 0.068, 0.036]]


game7: [1]
child_visit: Vector{Float32}[[0.608, 0.038, 0.218, 0.082, 0.024, 0.03]]


game8: [1]
child_visit: Vector{Float32}[[0.362, 0.196, 0.242, 0.098, 0.034, 0.068]]


game9: [1]
child_visit: Vector{Float32}[[0.426, 0.086, 0.234, 0.19, 0.03, 0.034]]


game10: [1]
child_visit: Vector{Float32}[[0.522, 0.046, 0.24, 0.096, 0.028, 0.068]]
------------
sample 1
history: [1],  turn:1
target; Float32[0.362, 0.196, 0.242, 0.098, 0.034, 0.068, 4.636553]
------------
sample 2
history: [1],  turn:1
target; Float32[0.462, 0.08, 0.308, 0.092, 0.026, 0.032, 4.636553]
------------
sample 3
history: [1],  turn:1
target; Float32[0.404, 0.138, 0.286, 0.102, 0.036, 0.034, 4.636553]
------------
sample 4
history: [1],  turn:1
target; Float32[0.414, 0.058, 0.28, 0.136, 0.074, 0.038, 4.636553]
------------
sample 5
history: [1],  turn:1
target; Float32[0.608, 0.038, 0.206, 0.086, 0.026, 0.036, 4.636553]
------------
sample 6
history: [1],  turn:1
target; Float32[0.426, 0.086, 0.234, 0.19, 0.03, 0.034, 4.636553]
------------
sample 7
history: [1],  turn:1
target; Float32[0.522, 0.046, 0.24, 0.096, 0.028, 0.068, 4.636553]
------------
sample 8
history: [1],  turn:1
target; Float32[0.402, 0.05, 0.346, 0.098, 0.068, 0.036, 4.636553]


loss_average: 1.7908473565476015
it=4;


game1: [5, 6, 4, 5, 6, 2, 1, 2, 1]
child_visit: Vector{Float32}[[0.13, 0.228, 0.12, 0.12, 0.256, 0.146], [0.152, 0.146, 0.152, 0.242, 0.0, 0.308], [0.0, 0.184, 0.25, 0.382, 0.184, 0.0], [0.146, 0.202, 0.174, 0.0, 0.24, 0.238], [0.156, 0.152, 0.152, 0.204, 0.0, 0.336], [0.432, 0.568, 0.0, 0.0, 0.0, 0.0], [0.624, 0.376, 0.0, 0.0, 0.0, 0.0], [0.37, 0.63, 0.0, 0.0, 0.0, 0.0], [0.55, 0.45, 0.0, 0.0, 0.0, 0.0]]


game2: [5, 3, 2, 2, 1]
child_visit: Vector{Float32}[[0.124, 0.124, 0.12, 0.136, 0.274, 0.222], [0.146, 0.146, 0.404, 0.158, 0.0, 0.146], [0.146, 0.366, 0.0, 0.146, 0.146, 0.196], [0.12, 0.22, 0.124, 0.124, 0.2, 0.212], [0.356, 0.12, 0.122, 0.12, 0.156, 0.126]]


game3: [5, 6, 4, 2, 5, 4, 1, 1, 2, 1]
child_visit: Vector{Float32}[[0.15, 0.136, 0.214, 0.12, 0.238, 0.142], [0.268, 0.146, 0.156, 0.158, 0.0, 0.272], [0.0, 0.268, 0.188, 0.362, 0.182, 0.0], [0.146, 0.34, 0.156, 0.0, 0.208, 0.15], [0.122, 0.12, 0.218, 0.162, 0.254, 0.124], [0.15, 0.156, 0.146, 0.382, 0.0, 0.166], [0.624, 0.376, 0.0, 0.0, 0.0, 0.0], [0.582, 0.418, 0.0, 0.0, 0.0, 0.0], [0.482, 0.518, 0.0, 0.0, 0.0, 0.0], [0.63, 0.37, 0.0, 0.0, 0.0, 0.0]]


game4: [5, 6, 4, 5, 6, 1, 1, 2, 1]
child_visit: Vector{Float32}[[0.178, 0.142, 0.12, 0.146, 0.242, 0.172], [0.2, 0.148, 0.146, 0.158, 0.0, 0.348], [0.0, 0.194, 0.224, 0.324, 0.258, 0.0], [0.164, 0.166, 0.234, 0.0, 0.28, 0.156], [0.154, 0.166, 0.216, 0.146, 0.0, 0.318], [0.508, 0.492, 0.0, 0.0, 0.0, 0.0], [0.506, 0.494, 0.0, 0.0, 0.0, 0.0], [0.374, 0.626, 0.0, 0.0, 0.0, 0.0], [0.526, 0.474, 0.0, 0.0, 0.0, 0.0]]


game5: [3, 2, 4, 6, 3, 5, 1, 1, 1, 1]
child_visit: Vector{Float32}[[0.156, 0.168, 0.28, 0.12, 0.156, 0.12], [0.148, 0.404, 0.0, 0.15, 0.15, 0.148], [0.12, 0.192, 0.132, 0.314, 0.12, 0.122], [0.148, 0.152, 0.146, 0.0, 0.24, 0.314], [0.0, 0.232, 0.344, 0.224, 0.2, 0.0], [0.194, 0.176, 0.0, 0.21, 0.264, 0.156], [0.55, 0.45, 0.0, 0.0, 0.0, 0.0], [0.628, 0.372, 0.0, 0.0, 0.0, 0.0], [0.63, 0.37, 0.0, 0.0, 0.0, 0.0], [0.606, 0.394, 0.0, 0.0, 0.0, 0.0]]


game6: [1]
child_visit: Vector{Float32}[[0.306, 0.12, 0.12, 0.202, 0.132, 0.12]]


game7: [4, 2, 3, 5, 6, 4, 2, 1, 2, 1]
child_visit: Vector{Float32}[[0.148, 0.142, 0.122, 0.27, 0.132, 0.186], [0.188, 0.262, 0.152, 0.0, 0.176, 0.222], [0.122, 0.12, 0.292, 0.15, 0.126, 0.19], [0.186, 0.146, 0.0, 0.184, 0.338, 0.146], [0.196, 0.172, 0.18, 0.148, 0.0, 0.304], [0.0, 0.214, 0.192, 0.374, 0.22, 0.0], [0.37, 0.63, 0.0, 0.0, 0.0, 0.0], [0.522, 0.478, 0.0, 0.0, 0.0, 0.0], [0.4, 0.6, 0.0, 0.0, 0.0, 0.0], [0.6, 0.4, 0.0, 0.0, 0.0, 0.0]]


game8: [2]
child_visit: Vector{Float32}[[0.126, 0.31, 0.124, 0.12, 0.192, 0.128]]


game9: [5, 1, 1]
child_visit: Vector{Float32}[[0.122, 0.208, 0.12, 0.176, 0.254, 0.12], [0.406, 0.148, 0.15, 0.146, 0.0, 0.15], [0.234, 0.12, 0.232, 0.12, 0.152, 0.142]]


game10: [6, 4, 6, 4, 6, 3, 2, 2, 2, 1]
child_visit: Vector{Float32}[[0.122, 0.188, 0.122, 0.128, 0.166, 0.274], [0.0, 0.184, 0.228, 0.404, 0.184, 0.0], [0.17, 0.146, 0.194, 0.0, 0.146, 0.344], [0.0, 0.194, 0.188, 0.428, 0.19, 0.0], [0.146, 0.202, 0.146, 0.0, 0.166, 0.34], [0.0, 0.19, 0.344, 0.184, 0.282, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0], [0.386, 0.614, 0.0, 0.0, 0.0, 0.0], [0.464, 0.536, 0.0, 0.0, 0.0, 0.0], [0.546, 0.454, 0.0, 0.0, 0.0, 0.0]]
------------
sample 1
history: [6, 4, 6, 4, 6, 3, 2, 2, 2, 1],  turn:10
target; Float32[0.546, 0.454, 0.0, 0.0, 0.0, 0.0, 4.636553]


------------
sample 2
history: [2],  turn:1
target; Float32[0.126, 0.31, 0.124, 0.12, 0.192, 0.128, -1.389799]
------------
sample 3
history: [5, 1, 1],  turn:1
target; Float32[0.122, 0.208, 0.12, 0.176, 0.254, 0.12, 4.6365523]
------------
sample 4
history: [5, 6, 4, 2, 5, 4, 1, 1, 2, 1],  turn:5
target; Float32[0.122, 0.12, 0.218, 0.162, 0.254, 0.124, 4.636553]


------------
sample 5
history: [4, 2, 3, 5, 6, 4, 2, 1, 2, 1],  turn:4
target; Float32[0.186, 0.146, 0.0, 0.184, 0.338, 0.146, -3.0095108]


------------
sample 6
history: [5, 3, 2, 2, 1],  turn:2
target; Float32[0.146, 0.146, 0.404, 0.158, 0.0, 0.146, -2.4895284]


------------
sample 7
history: [3, 2, 4, 6, 3, 5, 1, 1, 1, 1],  turn:8
target; Float32[0.628, 0.372, 0.0, 0.0, 0.0, 0.0, -1.389799]


------------
sample 8
history: [5, 6, 4, 5, 6, 1, 1, 2, 1],  turn:1
target; Float32[0.178, 0.142, 0.12, 0.146, 0.242, 0.172, 1.7250766]


loss_average: 2.1745369115378708
it=5;


game1: [1]
child_visit: Vector{Float32}[[0.55, 0.036, 0.112, 0.176, 0.084, 0.042]]


game2: [1]
child_visit: Vector{Float32}[[0.572, 0.03, 0.034, 0.224, 0.066, 0.074]]


game3: [1]
child_visit: Vector{Float32}[[0.586, 0.03, 0.084, 0.15, 0.116, 0.034]]


game4: [1]
child_visit: Vector{Float32}[[0.544, 0.034, 0.044, 0.24, 0.104, 0.034]]


game5: [1]
child_visit: Vector{Float32}[[0.564, 0.058, 0.106, 0.158, 0.066, 0.048]]


game6: [1]
child_visit: Vector{Float32}[[0.542, 0.048, 0.062, 0.216, 0.096, 0.036]]


game7: [1]
child_visit: Vector{Float32}[[0.616, 0.034, 0.07, 0.166, 0.078, 0.036]]


game8: [1]
child_visit: Vector{Float32}[[0.718, 0.026, 0.032, 0.134, 0.058, 0.032]]


game9: [1]
child_visit: Vector{Float32}[[0.52, 0.032, 0.046, 0.214, 0.154, 0.034]]


game10: [1]
child_visit: Vector{Float32}[[0.56, 0.044, 0.134, 0.158, 0.07, 0.034]]
------------
sample 1
history: [1],  turn:1
target; Float32[0.718, 0.026, 0.032, 0.134, 0.058, 0.032, 4.636553]
------------
sample 2
history: [1],  turn:1
target; Float32[0.544, 0.034, 0.044, 0.24, 0.104, 0.034, 4.636553]
------------
sample 3
history: [1],  turn:1
target; Float32[0.55, 0.036, 0.112, 0.176, 0.084, 0.042, 4.636553]
------------
sample 4
history: [1],  turn:1
target; Float32[0.616, 0.034, 0.07, 0.166, 0.078, 0.036, 4.636553]
------------
sample 5
history: [1],  turn:1
target; Float32[0.572, 0.03, 0.034, 0.224, 0.066, 0.074, 4.636553]
------------
sample 6
history: [1],  turn:1
target; Float32[0.564, 0.058, 0.106, 0.158, 0.066, 0.048, 4.636553]
------------
sample 7
history: [1],  turn:1
target; Float32[0.52, 0.032, 0.046, 0.214, 0.154, 0.034, 4.636553]
------------
sample 8
history: [1],  turn:1
target; Float32[0.542, 0.048, 0.062, 0.216, 0.096, 0.036, 4.636553]


loss_average: 1.6948515672702342
it=6;


game1: [2]
child_visit: Vector{Float32}[[0.028, 0.358, 0.106, 0.234, 0.092, 0.182]]


game2: [6, 4, 1, 1]
child_visit: Vector{Float32}[[0.028, 0.098, 0.186, 0.182, 0.102, 0.404], [0.0, 0.184, 0.184, 0.45, 0.182, 0.0], [0.536, 0.084, 0.162, 0.0, 0.142, 0.076], [0.532, 0.16, 0.052, 0.106, 0.096, 0.054]]


game3: [6, 4, 1, 1]
child_visit: Vector{Float32}[[0.028, 0.3, 0.092, 0.128, 0.116, 0.336], [0.0, 0.184, 0.202, 0.432, 0.182, 0.0], [0.52, 0.08, 0.122, 0.0, 0.084, 0.194], [0.506, 0.22, 0.048, 0.048, 0.13, 0.048]]


game4: [3, 1, 1]
child_visit: Vector{Float32}[[0.032, 0.11, 0.442, 0.108, 0.108, 0.2], [0.548, 0.096, 0.0, 0.116, 0.122, 0.118], [0.512, 0.072, 0.066, 0.254, 0.048, 0.048]]


game5: [6, 4, 1, 1]
child_visit: Vector{Float32}[[0.036, 0.13, 0.186, 0.228, 0.13, 0.29], [0.0, 0.184, 0.202, 0.348, 0.266, 0.0], [0.514, 0.278, 0.088, 0.0, 0.054, 0.066], [0.568, 0.05, 0.05, 0.058, 0.05, 0.224]]


game6: [6, 2]
child_visit: Vector{Float32}[[0.034, 0.112, 0.114, 0.292, 0.122, 0.326], [0.0, 0.448, 0.184, 0.184, 0.184, 0.0]]


game7: [2]
child_visit: Vector{Float32}[[0.042, 0.222, 0.19, 0.182, 0.18, 0.184]]


game8: [4, 1, 1]
child_visit: Vector{Float32}[[0.03, 0.1, 0.114, 0.424, 0.226, 0.106], [0.772, 0.054, 0.058, 0.0, 0.062, 0.054], [0.53, 0.06, 0.11, 0.05, 0.05, 0.2]]


game9: [4, 1, 1]
child_visit: Vector{Float32}[[0.028, 0.112, 0.166, 0.25, 0.198, 0.246], [0.522, 0.15, 0.094, 0.0, 0.116, 0.118], [0.648, 0.07, 0.05, 0.108, 0.076, 0.048]]


game10: [5, 1, 1]
child_visit: Vector{Float32}[[0.034, 0.13, 0.12, 0.142, 0.436, 0.138], [0.518, 0.198, 0.13, 0.096, 0.0, 0.058], [0.48, 0.048, 0.048, 0.072, 0.052, 0.3]]
------------
sample 1
history: [6, 4, 1, 1],  turn:1
target; Float32[0.028, 0.098, 0.186, 0.182, 0.102, 0.404, 4.636553]
------------
sample 2
history: [6, 4, 1, 1],  turn:2
target; Float32[0.0, 0.184, 0.202, 0.348, 0.266, 0.0, 4.636553]
------------
sample 3
history: [5, 1, 1],  turn:2
target; Float32[0.518, 0.198, 0.13, 0.096, 0.0, 0.058, 4.6365523]
------------
sample 4
history: [4, 1, 1],  turn:1
target; Float32[0.03, 0.1, 0.114, 0.424, 0.226, 0.106, 4.636553]


------------
sample 5
history: [2],  turn:1
target; Float32[0.042, 0.222, 0.19, 0.182, 0.18, 0.184, -1.389799]
------------
sample 6
history: [4, 1, 1],  turn:1
target; Float32[0.028, 0.112, 0.166, 0.25, 0.198, 0.246, 4.636553]
------------
sample 7
history: [6, 4, 1, 1],  turn:4
target; Float32[0.506, 0.22, 0.048, 0.048, 0.13, 0.048, 4.636553]
------------
sample 8
history: [3, 1, 1],  turn:3
target; Float32[0.512, 0.072, 0.066, 0.254, 0.048, 0.048, 4.6365533]


loss_average: 1.6756239887326956
it=7;


game1: [5, 3, 4, 5, 1, 2, 2, 1, 2]
child_visit: Vector{Float32}[[0.088, 0.038, 0.04, 0.234, 0.508, 0.092], [0.088, 0.192, 0.308, 0.272, 0.0, 0.14], [0.11, 0.226, 0.0, 0.336, 0.258, 0.07], [0.04, 0.14, 0.268, 0.0, 0.312, 0.24], [0.648, 0.352, 0.0, 0.0, 0.0, 0.0], [0.408, 0.592, 0.0, 0.0, 0.0, 0.0], [0.472, 0.528, 0.0, 0.0, 0.0, 0.0], [0.648, 0.352, 0.0, 0.0, 0.0, 0.0], [0.392, 0.608, 0.0, 0.0, 0.0, 0.0]]


game2: [5, 6, 4, 6, 4, 2, 2, 1, 2]
child_visit: Vector{Float32}[[0.084, 0.046, 0.034, 0.256, 0.53, 0.05], [0.092, 0.202, 0.16, 0.24, 0.0, 0.306], [0.0, 0.2, 0.114, 0.444, 0.242, 0.0], [0.04, 0.158, 0.156, 0.0, 0.312, 0.334], [0.0, 0.246, 0.222, 0.31, 0.222, 0.0], [0.41, 0.59, 0.0, 0.0, 0.0, 0.0], [0.48, 0.52, 0.0, 0.0, 0.0, 0.0], [0.542, 0.458, 0.0, 0.0, 0.0, 0.0], [0.418, 0.582, 0.0, 0.0, 0.0, 0.0]]


game3: [4, 2, 4, 5, 3, 2, 2, 2, 2]
child_visit: Vector{Float32}[[0.106, 0.072, 0.038, 0.372, 0.352, 0.06], [0.04, 0.46, 0.07, 0.0, 0.178, 0.252], [0.094, 0.206, 0.048, 0.294, 0.124, 0.234], [0.05, 0.266, 0.07, 0.0, 0.32, 0.294], [0.046, 0.244, 0.292, 0.258, 0.0, 0.16], [0.36, 0.64, 0.0, 0.0, 0.0, 0.0], [0.214, 0.786, 0.0, 0.0, 0.0, 0.0], [0.266, 0.734, 0.0, 0.0, 0.0, 0.0], [0.226, 0.774, 0.0, 0.0, 0.0, 0.0]]


game4: [5, 4, 1, 1, 2]
child_visit: Vector{Float32}[[0.188, 0.026, 0.034, 0.238, 0.438, 0.076], [0.184, 0.164, 0.178, 0.242, 0.0, 0.232], [0.658, 0.07, 0.034, 0.0, 0.136, 0.102], [0.802, 0.034, 0.07, 0.028, 0.042, 0.024], [0.312, 0.486, 0.062, 0.044, 0.04, 0.056]]


game5: [5, 4, 1, 1, 1]
child_visit: Vector{Float32}[[0.276, 0.026, 0.062, 0.286, 0.296, 0.054], [0.122, 0.166, 0.21, 0.35, 0.0, 0.152], [0.582, 0.162, 0.054, 0.0, 0.094, 0.108], [0.784, 0.078, 0.036, 0.02, 0.036, 0.046], [0.398, 0.262, 0.06, 0.044, 0.08, 0.156]]


game6: [5, 4, 1, 1, 1]
child_visit: Vector{Float32}[[0.108, 0.05, 0.038, 0.258, 0.304, 0.242], [0.186, 0.166, 0.172, 0.244, 0.0, 0.232], [0.576, 0.11, 0.042, 0.0, 0.154, 0.118], [0.856, 0.044, 0.022, 0.018, 0.042, 0.018], [0.318, 0.308, 0.122, 0.162, 0.058, 0.032]]


game7: [4, 2, 4, 3, 6, 4, 2, 2, 2, 2]
child_visit: Vector{Float32}[[0.134, 0.026, 0.04, 0.302, 0.288, 0.21], [0.03, 0.386, 0.072, 0.0, 0.304, 0.208], [0.036, 0.192, 0.2, 0.314, 0.122, 0.136], [0.036, 0.264, 0.322, 0.0, 0.186, 0.192], [0.118, 0.208, 0.0, 0.23, 0.13, 0.314], [0.0, 0.244, 0.062, 0.534, 0.16, 0.0], [0.378, 0.622, 0.0, 0.0, 0.0, 0.0], [0.39, 0.61, 0.0, 0.0, 0.0, 0.0], [0.356, 0.644, 0.0, 0.0, 0.0, 0.0], [0.138, 0.862, 0.0, 0.0, 0.0, 0.0]]


game8: [5, 2, 2]
child_visit: Vector{Float32}[[0.12, 0.026, 0.036, 0.23, 0.462, 0.126], [0.118, 0.254, 0.242, 0.232, 0.0, 0.154], [0.036, 0.346, 0.092, 0.258, 0.22, 0.048]]


game9: [5, 6, 3, 5, 4, 1, 1, 2, 1, 1]
child_visit: Vector{Float32}[[0.15, 0.03, 0.046, 0.238, 0.282, 0.254], [0.074, 0.142, 0.16, 0.262, 0.0, 0.362], [0.0, 0.112, 0.364, 0.304, 0.22, 0.0], [0.044, 0.104, 0.0, 0.332, 0.41, 0.11], [0.272, 0.2, 0.122, 0.354, 0.0, 0.052], [0.648, 0.352, 0.0, 0.0, 0.0, 0.0], [0.578, 0.422, 0.0, 0.0, 0.0, 0.0], [0.42, 0.58, 0.0, 0.0, 0.0, 0.0], [0.646, 0.354, 0.0, 0.0, 0.0, 0.0], [0.648, 0.352, 0.0, 0.0, 0.0, 0.0]]


game10: [4, 2, 4, 3, 1, 6, 4, 2, 2, 2]
child_visit: Vector{Float32}[[0.174, 0.026, 0.036, 0.376, 0.304, 0.084], [0.112, 0.388, 0.102, 0.0, 0.188, 0.21], [0.06, 0.192, 0.128, 0.294, 0.122, 0.204], [0.036, 0.264, 0.304, 0.0, 0.2, 0.196], [0.25, 0.198, 0.0, 0.236, 0.144, 0.172], [0.036, 0.184, 0.046, 0.216, 0.122, 0.396], [0.0, 0.242, 0.26, 0.286, 0.212, 0.0], [0.33, 0.67, 0.0, 0.0, 0.0, 0.0], [0.308, 0.692, 0.0, 0.0, 0.0, 0.0], [0.156, 0.844, 0.0, 0.0, 0.0, 0.0]]
------------
sample 1
history: [5, 4, 1, 1, 2],  turn:2
target; Float32[0.184, 0.164, 0.178, 0.242, 0.0, 0.232, 4.636553]


------------
sample 2
history: [5, 6, 3, 5, 4, 1, 1, 2, 1, 1],  turn:4
target; Float32[0.044, 0.104, 0.0, 0.332, 0.41, 0.11, -6.181226]


------------
sample 3
history: [5, 3, 4, 5, 1, 2, 2, 1, 2],  turn:1
target; Float32[0.088, 0.038, 0.04, 0.234, 0.508, 0.092, -1.2736062]


------------
sample 4
history: [4, 2, 4, 3, 1, 6, 4, 2, 2, 2],  turn:9
target; Float32[0.308, 0.692, 0.0, 0.0, 0.0, 0.0, -5.077245]
------------
sample 5
history: [4, 2, 4, 5, 3, 2, 2, 2, 2],  turn:5
target; Float32[0.046, 0.244, 0.292, 0.258, 0.0, 0.16, 4.636553]


------------
sample 6
history: [4, 2, 4, 3, 6, 4, 2, 2, 2, 2],  turn:2
target; Float32[0.03, 0.386, 0.072, 0.0, 0.304, 0.208, 4.636553]
------------
sample 7
history: [5, 6, 4, 6, 4, 2, 2, 1, 2],  turn:4
target; Float32[0.04, 0.158, 0.156, 0.0, 0.312, 0.334, 4.636553]
------------
sample 8
history: [5, 4, 1, 1, 1],  turn:4
target; Float32[0.856, 0.044, 0.022, 0.018, 0.042, 0.018, 4.636553]


loss_average: 1.50326936354395
it=8;


game1: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.03, 0.06, 0.034, 0.076, 0.676, 0.124], [0.062, 0.346, 0.076, 0.19, 0.0, 0.326], [0.044, 0.136, 0.03, 0.282, 0.432, 0.076], [0.474, 0.218, 0.046, 0.09, 0.0, 0.172], [0.7, 0.064, 0.022, 0.078, 0.102, 0.034]]


game2: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.032, 0.03, 0.102, 0.062, 0.742, 0.032], [0.064, 0.368, 0.064, 0.208, 0.0, 0.296], [0.078, 0.048, 0.03, 0.392, 0.404, 0.048], [0.494, 0.088, 0.068, 0.174, 0.0, 0.176], [0.698, 0.028, 0.044, 0.062, 0.106, 0.062]]


game3: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.05, 0.036, 0.108, 0.068, 0.706, 0.032], [0.064, 0.376, 0.134, 0.252, 0.0, 0.174], [0.022, 0.046, 0.03, 0.348, 0.438, 0.116], [0.558, 0.094, 0.1, 0.084, 0.0, 0.164], [0.704, 0.122, 0.02, 0.022, 0.104, 0.028]]


game4: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.03, 0.026, 0.022, 0.292, 0.598, 0.032], [0.102, 0.368, 0.05, 0.194, 0.0, 0.286], [0.054, 0.048, 0.026, 0.302, 0.48, 0.09], [0.542, 0.086, 0.066, 0.128, 0.0, 0.178], [0.744, 0.028, 0.02, 0.038, 0.134, 0.036]]


game5: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.032, 0.08, 0.024, 0.076, 0.74, 0.048], [0.058, 0.486, 0.064, 0.202, 0.0, 0.19], [0.02, 0.102, 0.03, 0.224, 0.534, 0.09], [0.608, 0.116, 0.046, 0.072, 0.0, 0.158], [0.76, 0.034, 0.058, 0.02, 0.1, 0.028]]


game6: [5, 2, 4, 1, 1]
child_visit: Vector{Float32}[[0.032, 0.034, 0.04, 0.258, 0.604, 0.032], [0.078, 0.408, 0.05, 0.184, 0.0, 0.28], [0.018, 0.042, 0.04, 0.534, 0.326, 0.04], [0.57, 0.046, 0.05, 0.0, 0.298, 0.036], [0.752, 0.032, 0.034, 0.026, 0.026, 0.13]]


game7: [5, 2, 4, 1, 1]
child_visit: Vector{Float32}[[0.082, 0.03, 0.03, 0.138, 0.684, 0.036], [0.094, 0.36, 0.092, 0.268, 0.0, 0.186], [0.02, 0.048, 0.102, 0.406, 0.382, 0.042], [0.486, 0.05, 0.094, 0.0, 0.32, 0.05], [0.792, 0.034, 0.038, 0.024, 0.026, 0.086]]


game8: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.03, 0.028, 0.024, 0.212, 0.61, 0.096], [0.08, 0.478, 0.11, 0.166, 0.0, 0.166], [0.034, 0.09, 0.03, 0.332, 0.45, 0.064], [0.504, 0.23, 0.046, 0.088, 0.0, 0.132], [0.71, 0.03, 0.02, 0.022, 0.176, 0.042]]


game9: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.11, 0.028, 0.022, 0.078, 0.73, 0.032], [0.062, 0.548, 0.094, 0.136, 0.0, 0.16], [0.02, 0.176, 0.024, 0.334, 0.402, 0.044], [0.54, 0.09, 0.102, 0.132, 0.0, 0.136], [0.724, 0.068, 0.05, 0.022, 0.102, 0.034]]


game10: [5, 2, 5, 1, 1]
child_visit: Vector{Float32}[[0.048, 0.08, 0.024, 0.066, 0.75, 0.032], [0.186, 0.38, 0.054, 0.194, 0.0, 0.186], [0.022, 0.048, 0.092, 0.228, 0.53, 0.08], [0.494, 0.088, 0.048, 0.128, 0.0, 0.242], [0.7, 0.028, 0.028, 0.022, 0.104, 0.118]]


------------
sample 1
history: [5, 2, 4, 1, 1],  turn:1
target; Float32[0.032, 0.034, 0.04, 0.258, 0.604, 0.032, 4.636553]


------------
sample 2
history: [5, 2, 5, 1, 1],  turn:3
target; Float32[0.022, 0.046, 0.03, 0.348, 0.438, 0.116, -2.9900105]


------------
sample 3
history: [5, 2, 5, 1, 1],  turn:1
target; Float32[0.03, 0.028, 0.024, 0.212, 0.61, 0.096, -2.9900105]


------------
sample 4
history: [5, 2, 5, 1, 1],  turn:2
target; Float32[0.064, 0.368, 0.064, 0.208, 0.0, 0.296, -2.9900105]


------------
sample 5
history: [5, 2, 5, 1, 1],  turn:1
target; Float32[0.11, 0.028, 0.022, 0.078, 0.73, 0.032, -2.9900105]


------------
sample 6
history: [5, 2, 5, 1, 1],  turn:3
target; Float32[0.022, 0.048, 0.092, 0.228, 0.53, 0.08, -2.9900105]


------------
sample 7
history: [5, 2, 5, 1, 1],  turn:4
target; Float32[0.474, 0.218, 0.046, 0.09, 0.0, 0.172, -2.9900105]


------------
sample 8
history: [5, 2, 5, 1, 1],  turn:4
target; Float32[0.608, 0.116, 0.046, 0.072, 0.0, 0.158, -2.9900105]


loss_average: 6.263262224383652
it=9;


game1: [5, 4, 6, 4, 5, 1, 1, 1, 1, 2]
child_visit: Vector{Float32}[[0.064, 0.012, 0.016, 0.112, 0.77, 0.026], [0.354, 0.034, 0.102, 0.432, 0.0, 0.078], [0.122, 0.134, 0.15, 0.0, 0.276, 0.318], [0.0, 0.08, 0.082, 0.474, 0.364, 0.0], [0.066, 0.066, 0.082, 0.0, 0.72, 0.066], [0.52, 0.48, 0.0, 0.0, 0.0, 0.0], [0.61, 0.39, 0.0, 0.0, 0.0, 0.0], [0.596, 0.404, 0.0, 0.0, 0.0, 0.0], [0.616, 0.384, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0]]


game2: [5, 4, 5, 1, 4, 1, 1, 1, 1]
child_visit: Vector{Float32}[[0.088, 0.01, 0.01, 0.104, 0.778, 0.01], [0.132, 0.05, 0.094, 0.54, 0.0, 0.184], [0.122, 0.168, 0.262, 0.0, 0.276, 0.172], [0.45, 0.104, 0.068, 0.264, 0.0, 0.114], [0.324, 0.056, 0.054, 0.468, 0.042, 0.056], [0.678, 0.322, 0.0, 0.0, 0.0, 0.0], [0.908, 0.092, 0.0, 0.0, 0.0, 0.0], [0.882, 0.118, 0.0, 0.0, 0.0, 0.0], [0.662, 0.338, 0.0, 0.0, 0.0, 0.0]]


game3: [5, 4, 2, 4, 5, 1, 2, 1, 2]
child_visit: Vector{Float32}[[0.1, 0.01, 0.016, 0.07, 0.794, 0.01], [0.154, 0.036, 0.084, 0.59, 0.0, 0.136], [0.104, 0.442, 0.102, 0.0, 0.218, 0.134], [0.04, 0.052, 0.044, 0.482, 0.338, 0.044], [0.162, 0.148, 0.106, 0.0, 0.488, 0.096], [0.602, 0.398, 0.0, 0.0, 0.0, 0.0], [0.418, 0.582, 0.0, 0.0, 0.0, 0.0], [0.522, 0.478, 0.0, 0.0, 0.0, 0.0], [0.38, 0.62, 0.0, 0.0, 0.0, 0.0]]


game4: [5, 4, 1, 5, 4, 1, 2, 1, 2]
child_visit: Vector{Float32}[[0.08, 0.02, 0.016, 0.076, 0.798, 0.01], [0.148, 0.03, 0.262, 0.48, 0.0, 0.08], [0.338, 0.114, 0.104, 0.0, 0.232, 0.212], [0.034, 0.072, 0.098, 0.294, 0.334, 0.168], [0.124, 0.076, 0.21, 0.494, 0.0, 0.096], [0.518, 0.482, 0.0, 0.0, 0.0, 0.0], [0.434, 0.566, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.378, 0.622, 0.0, 0.0, 0.0, 0.0]]


game5: [5, 4, 2, 5, 4, 1, 2, 1, 2]
child_visit: Vector{Float32}[[0.064, 0.022, 0.026, 0.072, 0.8, 0.016], [0.11, 0.03, 0.298, 0.484, 0.0, 0.078], [0.168, 0.334, 0.178, 0.0, 0.212, 0.108], [0.126, 0.05, 0.052, 0.34, 0.374, 0.058], [0.16, 0.208, 0.076, 0.48, 0.0, 0.076], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.38, 0.62, 0.0, 0.0, 0.0, 0.0], [0.62, 0.38, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0]]


game6: [5, 4, 2, 4, 5, 1, 2, 1, 2]
child_visit: Vector{Float32}[[0.102, 0.01, 0.024, 0.072, 0.782, 0.01], [0.344, 0.038, 0.078, 0.446, 0.0, 0.094], [0.122, 0.318, 0.238, 0.0, 0.214, 0.108], [0.032, 0.048, 0.15, 0.394, 0.332, 0.044], [0.08, 0.114, 0.068, 0.0, 0.514, 0.224], [0.624, 0.376, 0.0, 0.0, 0.0, 0.0], [0.378, 0.622, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.414, 0.586, 0.0, 0.0, 0.0, 0.0]]


game7: [5, 4, 6, 4, 5, 2, 2, 1, 2, 2]
child_visit: Vector{Float32}[[0.096, 0.012, 0.02, 0.078, 0.784, 0.01], [0.204, 0.04, 0.106, 0.458, 0.0, 0.192], [0.112, 0.136, 0.114, 0.0, 0.252, 0.386], [0.0, 0.05, 0.048, 0.526, 0.376, 0.0], [0.066, 0.246, 0.066, 0.0, 0.486, 0.136], [0.416, 0.584, 0.0, 0.0, 0.0, 0.0], [0.452, 0.548, 0.0, 0.0, 0.0, 0.0], [0.618, 0.382, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0], [0.472, 0.528, 0.0, 0.0, 0.0, 0.0]]


game8: [5, 4, 6, 5, 4, 2, 1, 1, 1, 2]
child_visit: Vector{Float32}[[0.068, 0.056, 0.01, 0.074, 0.78, 0.012], [0.09, 0.032, 0.128, 0.672, 0.0, 0.078], [0.102, 0.1, 0.212, 0.0, 0.214, 0.372], [0.0, 0.104, 0.108, 0.304, 0.484, 0.0], [0.308, 0.078, 0.082, 0.456, 0.0, 0.076], [0.392, 0.608, 0.0, 0.0, 0.0, 0.0], [0.612, 0.388, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.376, 0.624, 0.0, 0.0, 0.0, 0.0]]


game9: [5, 4, 3, 4, 1, 1, 2, 2, 1]
child_visit: Vector{Float32}[[0.07, 0.012, 0.04, 0.072, 0.79, 0.016], [0.092, 0.08, 0.208, 0.474, 0.0, 0.146], [0.138, 0.1, 0.318, 0.0, 0.21, 0.234], [0.048, 0.052, 0.0, 0.384, 0.354, 0.162], [0.622, 0.378, 0.0, 0.0, 0.0, 0.0], [0.542, 0.458, 0.0, 0.0, 0.0, 0.0], [0.378, 0.622, 0.0, 0.0, 0.0, 0.0], [0.386, 0.614, 0.0, 0.0, 0.0, 0.0], [0.542, 0.458, 0.0, 0.0, 0.0, 0.0]]


game10: [5, 4, 2, 4, 5, 2, 1, 1, 1]
child_visit: Vector{Float32}[[0.064, 0.02, 0.01, 0.072, 0.78, 0.054], [0.084, 0.03, 0.13, 0.442, 0.0, 0.314], [0.138, 0.386, 0.128, 0.0, 0.23, 0.118], [0.05, 0.048, 0.048, 0.42, 0.344, 0.09], [0.098, 0.154, 0.176, 0.0, 0.488, 0.084], [0.434, 0.566, 0.0, 0.0, 0.0, 0.0], [0.616, 0.384, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0], [0.626, 0.374, 0.0, 0.0, 0.0, 0.0]]


------------
sample 1
history: [5, 4, 2, 5, 4, 1, 2, 1, 2],  turn:8
target; Float32[0.62, 0.38, 0.0, 0.0, 0.0, 0.0, -0.7951588]


------------
sample 2
history: [5, 4, 6, 5, 4, 2, 1, 1, 1, 2],  turn:7
target; Float32[0.612, 0.388, 0.0, 0.0, 0.0, 0.0, 0.46305296]


------------
sample 3
history: [5, 4, 2, 4, 5, 1, 2, 1, 2],  turn:2
target; Float32[0.154, 0.036, 0.084, 0.59, 0.0, 0.136, -0.7951517]


------------
sample 4
history: [5, 4, 6, 4, 5, 2, 2, 1, 2, 2],  turn:7
target; Float32[0.452, 0.548, 0.0, 0.0, 0.0, 0.0, 3.9266798]


------------
sample 5
history: [5, 4, 2, 4, 5, 2, 1, 1, 1],  turn:8
target; Float32[0.626, 0.374, 0.0, 0.0, 0.0, 0.0, -5.7758226]
------------
sample 6
history: [5, 4, 3, 4, 1, 1, 2, 2, 1],  turn:6
target; Float32[0.542, 0.458, 0.0, 0.0, 0.0, 0.0, 4.636553]


------------
sample 7
history: [5, 4, 2, 4, 5, 1, 2, 1, 2],  turn:1
target; Float32[0.102, 0.01, 0.024, 0.072, 0.782, 0.01, -0.7951517]


------------
sample 8
history: [5, 4, 1, 5, 4, 1, 2, 1, 2],  turn:8
target; Float32[0.626, 0.374, 0.0, 0.0, 0.0, 0.0, -5.4338403]


loss_average: 1.1165086092660204
it=10;


game1: [1]
child_visit: Vector{Float32}[[0.666, 0.3, 0.008, 0.018, 0.002, 0.006]]


game2: [1]
child_visit: Vector{Float32}[[0.572, 0.394, 0.008, 0.018, 0.002, 0.006]]


game3: [1]
child_visit: Vector{Float32}[[0.646, 0.312, 0.008, 0.018, 0.002, 0.014]]


game4: [1]
child_visit: Vector{Float32}[[0.54, 0.424, 0.008, 0.018, 0.004, 0.006]]


game5: [1]
child_visit: Vector{Float32}[[0.612, 0.352, 0.008, 0.018, 0.004, 0.006]]


game6: [1]
child_visit: Vector{Float32}[[0.644, 0.32, 0.008, 0.018, 0.004, 0.006]]


game7: [1]
child_visit: Vector{Float32}[[0.618, 0.348, 0.008, 0.018, 0.002, 0.006]]


game8: [1]
child_visit: Vector{Float32}[[0.54, 0.426, 0.008, 0.018, 0.002, 0.006]]


game9: [1]
child_visit: Vector{Float32}[[0.67, 0.29, 0.014, 0.018, 0.002, 0.006]]


game10: [1]
child_visit: Vector{Float32}[[0.632, 0.332, 0.008, 0.018, 0.004, 0.006]]
------------
sample 1
history: [1],  turn:1
target; Float32[0.67, 0.29, 0.014, 0.018, 0.002, 0.006, 4.636553]
------------
sample 2
history: [1],  turn:1
target; Float32[0.54, 0.424, 0.008, 0.018, 0.004, 0.006, 4.636553]
------------
sample 3
history: [1],  turn:1
target; Float32[0.632, 0.332, 0.008, 0.018, 0.004, 0.006, 4.636553]
------------
sample 4
history: [1],  turn:1
target; Float32[0.54, 0.426, 0.008, 0.018, 0.002, 0.006, 4.636553]
------------
sample 5
history: [1],  turn:1
target; Float32[0.646, 0.312, 0.008, 0.018, 0.002, 0.014, 4.636553]
------------
sample 6
history: [1],  turn:1
target; Float32[0.644, 0.32, 0.008, 0.018, 0.004, 0.006, 4.636553]
------------
sample 7
history: [1],  turn:1
target; Float32[0.572, 0.394, 0.008, 0.018, 0.002, 0.006, 4.636553]
------------
sample 8
history: [1],  turn:1
target; Float32[0.612, 0.352, 0.008, 0.018, 0.004, 0.006, 4.636553]


loss_average: 1.364205153658986


[2], score:-1.389799


[6, 3, 2, 1], score:-5.1887093


[2], score:-1.389799


[1], score:4.636553


[3, 6, 4, 5, 4, 2, 1, 1, 2, 1], score:-4.341542
