In [None]:
# In Julia, you want the features matrix to be nfeatures × nsamples (feature vectors are columns)

In [1]:
using Gobblet.TicTacToe

In [2]:
import Random
using Statistics
using LinearAlgebra: norm
using Flux
using Flux: onehot, onecold, crossentropy, throttle, @epochs, testmode!

┌ Info: Recompiling stale cache file /Users/jlauren1/.julia/compiled/v1.1/Flux/QdkVy.ji for Flux [587475ba-b771-5e3f-ad9e-33799f191a9c]
└ @ Base loading.jl:1184


In [3]:
flatten = collect ∘ Iterators.flatten
function vectorize_board(board)
    map(board[:,l] for l in 1:NUM_LAYERS) do layer
        map(layer) do p
            Float32[isnothing(p), p == Red, p == Blue]
        end |> flatten
    end |> flatten
end

#vectorize_board(make_board())

vectorize_board (generic function with 1 method)

In [4]:
solution = solve()

Solution(Bool[true, false, true, true, false, true, true, false, true, true  …  true, true, true, true, true, true, true, true, true, true], false, 19683, 5)

In [6]:
function State(board::Board; first_player=Red)
    s = State(first_player=first_player)
    s.board = board
    process_board_update!(s)
    red_alignment = has_won(s, Red)
    blue_alignment = has_won(s, Blue)
    stuck = is_stuck(s)
    if (stuck || red_alignment || blue_alignment) s.finished = true end
    if (red_alignment && !blue_alignment) s.winner = Red end
    if (blue_alignment && !red_alignment) s.winner = Blue end
    return s
end

function State(code::Int)
    B = make_board()
    decode_board!(B, code)
    State(B, first_player=Red)
end

State

In [7]:
POSSIBLE_VALUES = [-1, 0, 1]

interesting_state(st) = 
    !st.finished && available(st, Red)[1] - available(st, Blue)[1] ∈ [0, 1]

function value_dataset(solution)
    x = Vector{Float32}[]
    y = Vector{Float32}[]
    codes = Int[]
    for code in 0:CARD_BOARDS-1
        st = State(code)
        if interesting_state(st)
            V = value(status(solution, code))
            push!(x, vectorize_board(st.board))
            push!(y, Float32.(onehot(V, POSSIBLE_VALUES)))
            push!(codes, code)
        end
    end
    return reduce(hcat, x), reduce(hcat, y), codes
end

X, Y, Codes = value_dataset(solution)
N = size(X)[2]
perm = Random.randperm(N)
X, Y = X[:, perm], Y[:, perm]

Ntest = N ÷ 10
Xtest,  Ytest  = X[:,1:Ntest], Y[:,1:Ntest]
Xtrain, Ytrain = X[:,Ntest+1:end], Y[:,Ntest+1:end]

(Float32[1.0 0.0 … 0.0 1.0; 0.0 0.0 … 0.0 0.0; … ; 1.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Float32[0.0 1.0 … 0.0 1.0; 0.0 0.0 … 0.0 0.0; 1.0 0.0 … 1.0 0.0])

In [8]:
# Observe number of winning/loosing configs
# More winning configurations to red because it is about to play.
sum(Y, dims=2)

3×1 Array{Float32,2}:
  632.0
 1052.0
 2836.0

In [18]:
# Stochastic batches
struct StochasticBatches
    X
    Y
    N :: Int
    batch_size :: Int
    num_epochs :: Int
    function StochasticBatches(X,Y;batch_size, num_epochs)
        @assert size(X)[2] == size(Y)[2]
        N = size(X)[2]
        new(X, Y, N, batch_size, num_epochs)
    end
end

import Base.iterate

function iterate(B::StochasticBatches, rem)
    if rem > 0
        indices = rand(1:B.N, B.batch_size)
        batch = (B.X[:, indices], B.Y[:, indices])
        return (batch, rem-1)
    else
        return nothing
    end
end

iterate(B::StochasticBatches) = iterate(B, B.num_epochs)

iterate (generic function with 305 methods)

In [40]:
Ws = [layer.W for layer in NN if isa(layer, Dense)]

6-element Array{TrackedArray{…,Array{Float32,2}},1}:
 Float32[0.0923347 0.142067 … 0.205845 0.126846; -0.0593379 0.058034 … 0.168809 -0.125917; … ; -0.0892767 -0.215647 … -0.196134 -0.201507; 0.020898 0.215358 … -0.00047728 -0.201039] (tracked)           
 Float32[-0.140461 0.0312485 … -0.0266565 -0.000138215; -0.0903409 0.067695 … 0.0204294 -0.171848; … ; 0.162272 0.0928704 … 0.107399 -0.0647294; -0.00105898 -0.0619064 … -0.119091 -0.00957642] (tracked)
 Float32[0.0103104 -0.0839598 … 0.102463 0.169257; -0.159451 0.0848444 … -0.0517296 -0.0981979; … ; -0.116594 -0.1121 … -0.0380184 -0.0435465; 0.117266 -0.124329 … -0.0427423 0.0542367] (tracked)       
 Float32[0.105431 0.111935 … 0.120275 0.0892155; -0.171061 -0.0326176 … 0.070216 -0.0479025; … ; -0.0875467 0.157905 … 0.170374 -0.125937; -0.0836706 0.0929381 … -0.0910467 -0.0799067] (tracked)        
 Float32[0.0864833 0.118618 … 0.172547 -0.111816; -0.116719 -0.0917048 … 0.169437 -0.0514453; … ; 0.0936513 0.163863 … -0.165209 -0.029

In [43]:
INPUT_DIM = size(X)[1] # 3 * NUM_POSITIONS * NUM_LAYERS
OUTPUT_DIM = size(Y)[1] # length(POSSIBLE_VALUES)
HIDDEN_1 = 100
HIDDEN_2 = 100
DROPOUT_RATIO = 0.5

NN = Chain(
    Dense(INPUT_DIM, HIDDEN_1, relu),
    Dense(HIDDEN_1, HIDDEN_2, relu),
    Dropout(DROPOUT_RATIO),
    Dense(HIDDEN_2, HIDDEN_2, relu),
    Dropout(DROPOUT_RATIO),
    Dense(HIDDEN_2, HIDDEN_2, relu),
    Dropout(DROPOUT_RATIO),
    Dense(HIDDEN_2, HIDDEN_2, relu),
    Dense(HIDDEN_2, OUTPUT_DIM),
    softmax)

testmode!(NN, false)

λ = 0.01 # Regularization constant
Ws = [layer.W for layer in NN if isa(layer, Dense)]
raw_loss(x, y) = crossentropy(NN(x) .+ 1e-7, y)
loss(x, y) =  raw_loss(x, y) # + λ * sum(norm, Ws)
accuracy(x, y) = mean(onecold(NN(x)) .== onecold(y))

#dataset = Iterators.repeated((Xtrain, Ytrain), 1000)
dataset = StochasticBatches(Xtrain, Ytrain, batch_size=128, num_epochs=10000)

maximum_weight(NN) = maximum(p -> maximum(abs.(p)), params(NN))
    
function evalcb()
    testmode!(NN, true)
    if accuracy(Xtest, Ytest) > 1 - 1e-2 || raw_loss(Xtrain, Ytrain) < 1e-4
        Flux.stop()
    end
    @show (accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN))
    testmode!(NN, false)
end
opt = ADAM(1e-3)
Flux.train!(loss, params(NN), dataset, opt, cb = throttle(evalcb, 10))

(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.3274336283185841, 1.0972968501753413 (tracked), 0.24135657f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.831858407079646, 0.3479780687358608 (tracked), 0.4990248f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.8805309734513275, 0.12875844957272814 (tracked), 0.5630298f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.9092920353982301, 0.06522504575544066 (tracked), 0.5834315f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.911504424778761, 0.03229481679389611 (tracked), 0.62352896f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.9292035398230089, 0.021152689031502626 (tracked), 0.64473325f0 (tracked))
(accuracy(Xtest, Ytest), loss(Xtrain, Ytrain), maximum_weight(NN)) = (0.9247787610619469, 0.0108484872942248 (tracked), 0.68412566f0 (tr

In [None]:
testnode!(NN, true)
function predict(code)
    x = vectorize_board(State(code).board)
    onecold(NN(x), POSSIBLE_VALUES)
end

In [None]:
function find_counterexamples()
    cexs = []
    for code in Codes
        st = State(code)
        if !st.finished && predict(code) != value(solution, st)
           push!(cexs, code) 
        end
    end
    return cexs
end
cexs = find_counterexamples()

In [None]:
for cex in cexs
    st = State(cex)
    print_board(st)
    println("Predicted value: ", predict(cex))
    println("True value: ", value(solution, st))
    print("\n")
end

In [None]:
#=
import Flux.Tracker
import Flux.Optimise

function train!(loss, ps, data, opt; cb = () -> ())
  ps = Tracker.Params(ps)
  for d in data
    #print("| any inf: ", any(p -> any(isinf, p), params(NN)))
    #print("| any nan: ", any(p -> any(isnan, p), params(NN)))
    #print("| maximum: ", maximum(p -> maximum(abs.(p)), params(NN)))
    #print("\n")
    gs = gradient(ps) do 
        loss(d...)
    end
    Flux.Optimise.update!(opt, ps, gs)
    cb()
  end
end

function winner_dataset(solution)
    x = Vector{Float32}[]
    y = Vector{Float32}[]
    for code in 0:CARD_BOARDS-1
        st = State(code)
        push!(x, vectorize_board(st.board))
        push!(y, Float32.(onehot(st.winner, [nothing, Red, Blue])))
    end
    return reduce(hcat, x), reduce(hcat, y)
end
=#