In [4]:
using Random
using LinearAlgebra
using Statistics
using StatsBase
using Plots
using Base.Threads
using Flux
using Base.Iterators: product
using Flux: Optimise
using Flux: ADAM, params, update!
using Serialization



In [5]:
include("permanents.jl")

# Variables are for reward_function.jl
Lambda = 0.35 # Weight for regularizing the reward function to generate more ones (too high a labda will result in higher odds of generating isosceles triangles)

# Variables are for bitwise_model.jl
n_actions = 2  # Number of actions that the agent can take. In this case, it is either 0 for excluding a point and 1 for including it
n_sessions = 2000  # Number of new sessions per iteration
learning_rate = 0.001  # Learning rate, increase this to converge faster
percentile = 90  # Top 100-x percentile the agent will learn from
super_percentile = 90  # Top 100-x percentile of that survives to the next generation

90

In [15]:
# Variables are for bitwise_model.jl
#n_actions = 2  # Number of actions that the agent can take. In this case, it is either 0 for excluding a point and 1 for including it
#n_sessions = 2000  # Number of new sessions per iteration
#learning_rate = 0.001  # Learning rate, increase this to converge faster
percentile = 80  # Top 100-x percentile the agent will learn from
super_percentile = 90  # Top 100-x percentile of that survives to the next generation

90

# Helper functions

In [7]:
function board_to_string(board::Vector{Int64}, n)
    # board is currently a Vector
    board = reshape(convert(Vector{Int}, board), (n,n))
    
    output = "["
    for i in 1:n-1
        output = output * string(board[:,i]) * '\n' * ' '
    end
    output = output * string(board[:,n]) * "]"

    return output
end

board_to_string (generic function with 1 method)

In [8]:
function new_point_allowed(one_indices, new_point_index, n)
    row = ceil(Int, new_point_index / n)
    col = new_point_index % n
    if col == 0; col = n end # julia is 1-indexed, so x*n mod(n) must be n rather than 0

    point_allowed = true

    for i in 1:length(one_indices) 
        for j in i+1:length(one_indices)
            point_one_row = ceil(Int, one_indices[i] / n)
            point_two_row = ceil(Int, one_indices[j] / n)
            #if point_one_row == 0; point_one_row end
            #if point_two_row == 0; point_two_row end

            point_one_col = one_indices[i] % n
            point_two_col = one_indices[j] % n
            if point_one_col == 0; point_one_col = n end
            if point_two_col == 0; point_two_col = n end

            if (  (row == point_one_row)
                | (row == point_two_row)
                | (point_one_row == point_two_row)
                | (col == point_one_col)
                | (col == point_two_col)
                | (point_one_col == point_two_col))
                continue
            end
            
            # ensure point_one_col < point_two_col
            if (point_two_col < point_one_col)
                point_one_row, point_two_row = point_two_row, point_one_row
                point_one_col, point_two_col = point_two_col, point_one_col
            end

            # new point as 1 in a valid 312-pattern
            if ((point_two_row < point_one_row < row) & (point_one_col < col < point_two_col))
                point_allowed = false
                break
            end

            # new point as 2 in a valid 312-pattern
            if ((point_two_row < row < point_one_row) & (col < point_one_col))
                point_allowed = false
                break
            end

            # new point as 3 in a valid 312-pattern
            if ((row < point_one_row < point_two_row) & (point_two_col < col))
                point_allowed = false
                break
            end
        end

        if point_allowed == false
            break
        end
        
    end

    return point_allowed
end

new_point_allowed (generic function with 1 method)

In [9]:
function add_point(input_state, action_vec, n)
    point_added = false
    action_taken = zeros(Int, 1)
    cur_state = copy(input_state)
    
    while !point_added
        action_index = StatsBase.sample(collect(1:length(action_vec)), Weights(action_vec), 1)[1]
        one_indices = findall(!iszero, input_state)
        action_allowed = new_point_allowed(one_indices, action_index, n)
        
        if (cur_state[action_index] == 0) & (action_allowed)
            cur_state[action_index] = 1
            action_taken = action_index
            point_added = true
        else
            action_vec[action_index] = 0
            action_vec = action_vec ./ sum(action_vec)
        end
    end

    return cur_state, action_taken
end

add_point (generic function with 1 method)

In [291]:
n_sessions = 3
n = 4

i = 1

states = zeros(n_sessions, 4*n - 4 + 1, n^2)
actions = zeros(n_sessions, 4*n - 4 + 1)
scores = zeros(n_sessions)

for i in 1:n_sessions
    step_ = 0
    while step_ < 4*n - 4
        step_ += 1
        cur_state = states[i, step_, :]

        output = net(cur_state)
        next_state, action = add_point(cur_state, output, n)

        actions[i, step_, :] = action
        states[i, step_ + 1, :] = next_state
    end
    final_state = states[i, step_ + 1, :]
    final_state_matrix = reshape(final_state, (n,n))
    scores[i] = glynn(final_state_matrix)
end

M = reshape(states[1,4*n - 4 + 1,:], (n,n))'

4×4 adjoint(::Matrix{Float64}) with eltype Float64:
 1.0  1.0  0.0  0.0
 1.0  1.0  1.0  1.0
 0.0  1.0  1.0  1.0
 1.0  1.0  0.0  1.0

In [10]:
function generate_session(agent, n_sessions, n)
    states = zeros(Int, n_sessions, 4*n - 4 + 1, n^2)
    actions = zeros(Int, n_sessions, 4*n - 4 + 1)
    scores = zeros(Int, n_sessions)
    cur_state = zeros(Int, n^2)

    step = 0

    for i in 1:n_sessions # [1, n_sessions] inclusive
        step = 0
        while step < 4*n - 4 # there are 4*n - 4 steps total, starts at 0 and ends at 4*n - 4 -+? 1
            
            step += 1
            # current board
            cur_state .= states[i, step, :]

            output = agent(cur_state)

            next_state, action = add_point(cur_state, output, n)

            actions[i, step] = action
            states[i, step + 1, :] = next_state
        end
        final_state = states[i, step + 1, :]
        final_state_matrix = reshape(final_state, (n,n))
        scores[i] = glynn(final_state_matrix)
    end

    return states, actions, scores
end

generate_session (generic function with 1 method)

In [11]:
function select_super_sessions(states_batch, actions_batch, rewards_batch, super_percentile)
    counter = n_sessions * (100 - super_percentile) / 100
    reward_threshold = quantile(rewards_batch, super_percentile / 100)

    super_states = zeros(Int, 0, size(states_batch, 2), size(states_batch, 3))
    super_actions = Matrix{Int}(undef, 0, size(actions_batch, 2))
    super_rewards = Vector{Int}(undef, 0)
    #println("in super_states after resetting to zeros: " * string(size(super_states)))
    #println("in super_actions after resetting to zeros: " *string(size(super_actions)))
    for i in 1:size(states_batch, 1)
        #println("in super_states in for loop: " * string(size(super_states)))
        #println("in super_actions in for loop: " *string(size(super_actions)))
        if rewards_batch[i] >= reward_threshold - 0.000001 && counter > 0
            temp_state = reshape(states_batch[i, :, :], (1,size(states_batch[i, :, :])...))
            super_states = cat(super_states, temp_state; dims=1)

            temp_actions = reshape(actions_batch[i, :], (1,size(actions_batch[i, :])...))
            super_actions = cat(super_actions, temp_actions; dims=1)

            push!(super_rewards, rewards_batch[i])

            counter -= 1
        end
    end

    #println(size(super_states), size(super_rewards))
    return super_states, super_actions, super_rewards

end

select_super_sessions (generic function with 1 method)

In [12]:
function select_elites(states_batch, actions_batch, rewards_batch, percentile)
    counter = n_sessions * (100 - percentile) / 100
    reward_threshold = quantile(rewards_batch, percentile / 100)

    elite_states = Matrix{Int}(undef, 0, size(states_batch, 3))
    elite_actions = Vector{Int}(undef, 0)

    #println("in elites: states_batch: " * string(size(states_batch)))
    #println(size(rewards_batch))
    
    for i in 1:size(states_batch, 1)
        if rewards_batch[i] >= reward_threshold - 0.000001 && counter > 0
            for item in eachrow(states_batch[i, :, :])
                temp_state = reshape(item, 1, size(states_batch, 3)) # size of board
                elite_states = vcat(elite_states, temp_state)
            end

            for item in actions_batch[i, :]
                push!(elite_actions, item)
            end

            counter -= 1
        end
    end

    return elite_states, elite_actions
end

select_elites (generic function with 1 method)

In [17]:
function train(board_size, filename)
    n_sessions = 3000
    learning_rate = 0.00005

    n = board_size
    input_space = n*n

    first_layer_neurons = 128
    second_layer_neurons = 64
    third_layers_neurons = 4

    # Define the neural network architecture (similar to PyTorch)
    model = Chain(
        Dense(n^2, first_layer_neurons, relu),
        Dense(first_layer_neurons, second_layer_neurons, relu),
        Dense(second_layer_neurons, third_layers_neurons, relu),
        Dense(third_layers_neurons, n^2, σ)
    )

    # Create an instance of the neural network
    net = model

    # Defining the loss function and optimizer (similar to PyTorch)
    criterion(y_pred, y_true) = Flux.binarycrossentropy(y_pred, y_true)
    optimizer = Optimise.ADAM(learning_rate)

    # Global lists
    global super_states = Array{Int}(undef, 0, 4*n - 4 + 1, n^2)
    global super_actions = Array{Int}(undef, 0, 4*n - 4 + 1)
    global super_rewards = Int[]

    cur_best_reward = 0
    cur_best_board = []
    cur_best_game = []
    local best_states_set
    
    for i in 1:20000#200000
        println("\n GENERATION $i")
        states_batch, actions_batch, rewards_batch = generate_session(net, n_sessions, n)
        #println("new batch size: " * string(size(states_batch)))

        if i > 1
            states_batch = cat(states_batch, super_states; dims=1)
            actions_batch = cat(actions_batch, super_actions; dims=1)
            rewards_batch = cat(rewards_batch, super_rewards; dims=1)
            #println("super size: " * string(size(super_states)))
        end
        #println("batch size after cat: " * string(size(states_batch)))

        elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile)

        # sessions[1][i,:,:] are the states corresponding to the ith session
        # likewise for actions and rewards
        # the outer dimension is now the number of sessions
        # the next dimension indicates (1 = states, 2 = actions, 3 = rewards)
        # reverse sort the sessions (in the outer dimension) based on the rewards
        # sessions = select_super_sessions(states_batch, actions_batch, rewards_batch, super_percentile)
        ###
        super_states, super_actions, super_rewards = select_super_sessions(states_batch, actions_batch, rewards_batch, super_percentile)
        sessions = [super_states, super_actions, super_rewards]

        ### error: this is not updating super_states, below we concat with old super_states.. error below
        #println("new super size: " * string(size(super_states))) # this is not new, this is old
        #println(size(sessions[1]))
        super_sessions = [[sessions[1][i,:,:], sessions[2][i,:], sessions[3][i]] for i in 1:length(sessions[3])]
        #println(size(super_sessions))
        sort!(super_sessions, by = x -> x[3], rev=true)

        #### delete
        #if i == 2
        #    println(board_to_string(super_sessions[1][1][4*n - 4 + 1,:],n)) # 1st session, states of first session, last state
        #    println(super_sessions[1][3])
        #end

        # optimize
        # Backward pass (gradients calculation) and optimization (similar to PyTorch)
        outputs = zeros(Float32, size(elite_states, 1), 1)
        for i in 1:size(elite_states,1)
            outputs[i] = model(elite_states[i, :, :])[1]
        end

        loss = criterion(outputs, elite_actions)
        grads = gradient(() -> loss, params(model))
        Optimise.update!(optimizer, params(model), grads)

        # retrieve the sorted states, actions, rewards
        # i corresponds to the ith session, 1 corresponds to the states of the ith session
        # this is initally size (4n-4+1, n^2) but needs to be (1, 4n-4+1, n^2)

        #####
        # TODO
        # currently just resetting super_states, need to stack them
        # possible error here, fix here or above.
        super_states = Array{Int}(undef, 0, 4*n - 4 + 1, n^2)
        super_actions = Array{Int}(undef, 0, 4*n - 4 + 1)
        for i in 1:length(super_sessions)
            # just use super_states_reshaped?
            super_states_reshaped = reshape(super_sessions[i][1], (1, size(super_sessions[i][1])...))
            super_states = cat(super_states, super_states_reshaped; dims=1)

            super_actions_reshaped = reshape(super_sessions[i][2], (1, size(super_sessions[i][2])...))
            super_actions = cat(super_actions, super_actions_reshaped; dims=1)
        end
        super_rewards = [super_sessions[i][3] for i in 1:length(super_sessions)]

        mean_best_reward = mean(super_rewards)

        println("\n$i. Best individuals: ", super_rewards)
        # Uncomment the line below to print out the mean best reward
        println("Mean best reward: $mean_best_reward")

        # Make a new folder if 'Data' folder does not exist
        if !isdir("Data")
            mkdir("Data")
        end

        max_index = argmax(super_rewards)
        #max_index = 1

        if super_rewards[max_index] > cur_best_reward
            cur_best_reward = super_rewards[max_index]
            cur_best_board = super_states[max_index, 4*n-4+1,:] # best board as vector
            cur_best_game = super_states[max_index,:,:]

            best_states_set = Set()
            push!(best_states_set, string(cur_best_board))
            
            # add to file
            open(joinpath("Data", filename * "_best_board_timeline.txt"), "a") do f
                write(f, board_to_string(cur_best_board, n), '\n')
            end
            open(joinpath("Data", filename * "_best_reward_timeline.txt"), "a") do f
                write(f, string(cur_best_reward), '\n')
            end
            if cur_best_reward == 225
                open(joinpath("Data", filename * "_best_reward_timeline.txt"), "a") do f
                    write(f, "GENERATION $i", '\n')
                end
            end
        end
        
        if super_rewards[max_index] == cur_best_reward
            cur_best_board = super_states[max_index, 4*n-4+1,:] # best board as vector
            if !in(string(cur_best_board), best_states_set)
                push!(best_states_set, string(cur_best_board))

                # add to file
                open(joinpath("Data", filename * "_best_board_timeline.txt"), "a") do f
                    write(f, board_to_string(cur_best_board, n), '\n')
                end
                open(joinpath("Data", filename * "_best_reward_timeline.txt"), "a") do f
                    write(f, string(cur_best_reward), '\n')
                end
            end
    
        end
        
    end
        return net, cur_best_game
end

train (generic function with 1 method)

In [19]:
#n_sessions = 40000#50000
n = 10
filename = "10x10"
best_net, best_game = train(n, filename)


 GENERATION 1

1. Best individuals: [240, 238, 226, 192, 190, 188, 179, 176, 165, 160, 156, 153, 152, 148, 144, 144, 140, 140, 139, 136, 134, 132, 128, 128, 126, 120, 118, 116, 116, 116, 116, 115, 114, 114, 113, 112, 112, 112, 110, 110, 109, 108, 105, 104, 104, 98, 97, 96, 96, 96, 96, 96, 94, 92, 92, 90, 90, 90, 88, 88, 88, 84, 84, 84, 84, 81, 81, 80, 80, 80, 76, 76, 72, 72, 72, 72, 72, 71, 70, 70, 70, 68, 68, 68, 64, 64, 64, 64, 64, 64, 64, 62, 62, 60, 60, 60, 60, 60, 58, 57, 56, 56, 56, 56, 56, 56, 56, 56, 54, 54, 52, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 45, 45, 44, 44, 44, 44, 44, 44, 44, 44, 42, 42, 42, 42, 42, 42, 42, 40, 40, 40, 40, 40, 40, 40, 38, 37, 37, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 35, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32]
Mean best reward: 72.29

 GENERATION 2

2. Best individuals: [268, 252, 220, 192, 190, 180, 170, 168, 168, 166, 164, 

Excessive output truncated after 524288 bytes.

[390, 368, 360, 352, 336, 333, 330, 328, 326, 324, 322, 320, 320, 316, 316, 316, 315, 312, 312, 308, 308, 308, 308, 308, 308, 304, 304, 304, 304, 300, 300, 298, 298, 296, 296, 296, 296, 296, 296, 296, 294, 294, 294, 294, 294, 293, 292, 292, 292, 292, 292, 292, 292, 291, 290, 290, 288, 288, 288, 264, 260, 248, 240, 238, 238, 222, 218, 216, 208, 204, 204, 201, 200, 194, 192, 192, 190, 180, 176, 176, 176, 174, 174, 172, 172, 168, 168, 168, 168, 166, 166, 164, 164, 164, 163, 156, 154, 152, 152, 152, 151, 148, 146, 144, 144, 137, 132, 131, 130, 128, 127, 126, 124, 124, 124, 124, 124, 124, 122, 120, 118, 116, 114, 114, 114, 112, 112, 112, 110, 110, 108, 108, 105, 104, 104, 104, 102, 100, 100, 100, 98, 96, 96, 96, 96, 96, 96, 96, 96, 94, 94, 93, 92, 92, 90, 89, 88, 88, 88, 86, 84, 84, 84, 84, 84, 82, 81, 

LoadError: InterruptException:

# Execute training

# Scratch