In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import seaborn as sns
import time
import os
from permanents import glynn

In [2]:
device = torch.device("cpu")

n_sessions = 100 # number of sessions per iteration
Learning_rate = 0.001 # learning rate, increase this to converge faster
percentile = 90 # top 100-x percentile the agent will learn from
super_percentile = 95 # top 100-x percentile of that survives to the next generation

# Helper functions

In [3]:
def convert_to_board(word, n):
    word = word.astype(np.float32)  # cast input array to float32
    board = np.zeros((n, n), dtype=np.float32)
    for i in range(len(word)):
        board[i//n, i%n] = word[i]
    return board

In [4]:
# Defining a helper function that takes in a game and outputs the final board state
def final_board_state(game):
    n = len(game)
    for i in range(len(game)):
        if i == 0:
            continue
        if i == n-1:
            return game[i]
        if game[i+1].sum() == 0 and game[i].sum() != 0:
            return game[i]

In [5]:
def new_point_allowed(one_indices, new_point_index, n):
    row = new_point_index//n
    col = new_point_index%n
    point_allowed = True

    for i in range(len(one_indices)):
        for j in range(i+1, len(one_indices)):
            point_one_row = one_indices[i] // n
            point_one_col = one_indices[i] % n
            point_two_row = one_indices[j] // n
            point_two_col = one_indices[j] % n

            if (   row == point_one_row
                or row == point_two_row
                or point_one_row == point_two_row
                or col == point_one_col
                or col == point_two_col
                or point_one_col == point_two_col
            ):
                continue
            
            # ensure point_one_col < point_two_col
            if point_two_col < point_one_col:
                point_one_row, point_two_row = point_two_row, point_one_row
                point_one_col, point_two_col = point_two_col, point_one_col

            # new point as 1 in a valid 312-pattern
            if (point_two_row < point_one_row < row and point_one_col < col < point_two_col):
                point_allowed = False
                break

            # new point as 2 in a valid 312-pattern
            if (point_two_row < row < point_one_row and col < point_one_col):
                point_allowed = False
                break

            # new point as 3 in a valid 312-pattern
            if (row < point_one_row < point_two_row and point_two_col < col):
                point_allowed = False
                break

        if point_allowed == False:
            break

    return point_allowed

In [6]:
# defines a helper function to add point, action_vec is the output, output = agent(cur_state), agent = n
def add_point(input_state, action_vec, n):

    point_added = False
    action_taken = torch.zeros([len(action_vec)])
    cur_state = torch.clone(input_state)

    while not point_added:
        action_index = torch.multinomial(action_vec, 1).item()
        
        one_indices = torch.flatten(torch.nonzero(cur_state))
        #print(one_indices)
        action_allowed = new_point_allowed(one_indices, action_index, n)

        if cur_state[action_index] == 0 and action_allowed:
            cur_state[action_index] = 1
            action_taken[action_index] = 1
            point_added = True
        else:
            action_vec[action_index] = 0
            action_vec = action_vec / torch.sum(action_vec)

    return cur_state, action_taken

In [7]:
def generate_session(agent, n_sessions, n):
    # (nth session, always 4*n - 4 steps, always n*n board)
    states = torch.zeros((n_sessions, 4*n-4+1, n*n))
    actions = torch.zeros((n_sessions, 4*n-4+1, n*n))
    scores = torch.zeros([n_sessions])

    states.to(device)

    for i in range(n_sessions):
        step = 0

        while step < 4*n - 4:
            step+=1
            cur_state = states[i,step-1, :]

            output = agent(cur_state)

            new_state, action = add_point(cur_state, output, n)

            actions[i,step-1, :] = action
            states[i,step, :] = new_state

        final_state = states[i,step, :]
        state_mtx = final_state.reshape(n,n)
        scores[i] = glynn(state_mtx.numpy())

    return states, actions, scores

In [8]:
def select_super_sessions(states_batch, actions_batch, rewards_batch, percentile=90):

    counter = n_sessions * (100 - percentile)/100
    reward_threshold = np.percentile(rewards_batch, percentile)

    super_states = torch.empty(0)
    super_actions = torch.empty(0)
    super_rewards = torch.empty(0)

    for i in range(len(states_batch)):

        if counter <= 0:
            break

        if rewards_batch[i] >= reward_threshold - 0.001:
            super_states = torch.cat((super_states, states_batch[i].unsqueeze(0)), dim=0)
            super_actions = torch.cat((super_actions, actions_batch[i].unsqueeze(0)), dim=0)
            super_rewards = torch.cat((super_rewards, torch.tensor([rewards_batch[i]])), dim=0)
            counter -= 1

    return super_states, super_actions, super_rewards

In [9]:
def select_elites(states_batch, actions_batch, rewards_batch, percentile=50):

    counter = n_sessions * (100 - percentile)/100
    reward_threshold = np.percentile(rewards_batch, percentile)

    elite_states = torch.empty(0)
    elite_actions = torch.empty(0)

    for i in range(len(states_batch)):

        if counter <= 0:
            break

        if rewards_batch[i] >= reward_threshold - 0.01:
            game_end_index = 0
            for item in states_batch[i]:
                if item.sum() == 0 and game_end_index != 0:
                    break
                elite_states = torch.cat((elite_states, item.unsqueeze(0)))
                game_end_index += 1

            for item in actions_batch[i]:
                if game_end_index == 0:
                    break
                elite_actions = torch.cat((elite_actions, item.unsqueeze(0)))
                game_end_index -= 1
            counter -= 1

    return elite_states, elite_actions

# Model and training

In [21]:
def train(board_size, filename):
    n_sessions = 100 # number of sessions per iteration
    Learning_rate = 0.0001 # learning rate, increase this to converge faster

    n = board_size

    input_space = n*n
    INF = 1000000

    first_layer_neurons = 128
    second_layer_neurons = 64
    third_layer_neurons = 4
    last_layer_neurons = n*n

    # Defining the neural network architecture
    class MyNet(nn.Module):
        def __init__(self):
            super(MyNet, self).__init__()
            self.fc1 = nn.Linear(input_space, first_layer_neurons)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(first_layer_neurons, second_layer_neurons)
            self.relu = nn.ReLU()
            self.fc3 = nn.Linear(second_layer_neurons, third_layer_neurons)
            self.relu = nn.ReLU()
            self.fc4 = nn.Linear(third_layer_neurons, last_layer_neurons)
            self.softmax = nn.Softmax(dim=0)

        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.relu(self.fc3(x))
            x = self.softmax(self.fc4(x))

            return x

    # Create neural network for all other points
    net = MyNet()

    # Definte the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=Learning_rate)

    global super_states
    super_states = torch.empty((0, n*n, n*n), dtype=torch.int)
    global super_actions
    super_actions = torch.tensor([], dtype=torch.int)
    global super_rewards
    super_rewards = torch.tensor([])

    counter = 0
    pass_threshold = 1.25 * n

    cur_best_reward = 0
    cur_best_board = torch.zeros([n*n])
    cur_best_game = torch.zeros([n*n, n*n])
    cur_best_actions = torch.zeros([n*n, n*n])

    for i in range(10):
        states_batch, actions_batch, rewards_batch = generate_session(net, n_sessions, n)

        #states_batch = states_batch.to(dtype=torch.to)

        if i > 0:
            states_batch = torch.cat((states_batch, super_states), dim=0)
            actions_batch = torch.cat((actions_batch, super_actions), dim=0)
            rewards_batch = torch.cat((rewards_batch, super_rewards), dim=0)

        elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile = percentile)

        super_sessions = select_super_sessions(states_batch, actions_batch, rewards_batch, percentile = super_percentile)


        super_sessions = [(super_sessions[0][i], super_sessions[1][i], super_sessions[2][i]) for i in range(len(super_sessions[2]))] #, super_sessions[3][i]
        super_sessions.sort(key=lambda x: x[2], reverse=True)
    
        optimizer.zero_grad()
        elite_states.to(device)
        outputs = net(elite_states)

        loss = criterion(outputs, elite_actions.float())

        loss.backward()
        optimizer.step()

        super_states = torch.stack([super_sessions[i][0] for i in range(len(super_sessions))])
        super_actions = torch.stack([super_sessions[i][1] for i in range(len(super_sessions))])
        super_rewards = torch.stack([super_sessions[i][2] for i in range(len(super_sessions))])

        ########

        mean_all_reward = torch.mean(rewards_batch[-100:])
        mean_best_reward = torch.mean(super_rewards)

        #acceptence_threshold = mean_all_reward - 1
        #explore_rate = 1-(i/10000)

        #if mean_best_reward > 1.25*n:
        #    counter+=1

        print("\n" + str(i) +  ". Best individuals: " + str(super_rewards))#str(np.flip(np.sort(super_rewards))))

        #uncomment below line to print out how much time each step in this loop takes.
        #print(	"Mean reward: " + str(mean_all_reward) + "\nSessgen: " + str(sessgen_time) + ", other: " + str(randomcomp_time) + ", select1: " + str(select1_time) + ", select2: " + str(select2_time) + ", select3: " + str(select3_time) +  ", fit: " + str(fit_time) + ", score: " + str(score_time))

        #uncomment below line to print out the mean best reward
        print("Mean best reward: " + str(mean_best_reward))
        #print("Best reward: " + str(np.flip(np.sort(super_rewards))[0]))
    
        # Make a new folder if 'Data' folder does not exist
        #if not os.path.exists('Data'):
        #    os.makedirs('Data')

        #max_index = torch.argmax(super_rewards)
        max_index = 0
        
        #print('old best: ' + str(cur_best_reward))
        #print('test super rewards max index: ' + str(super_rewards[max_index]))
        if super_rewards[max_index] > cur_best_reward:
            cur_best_reward = super_rewards[max_index]
            #print('new best: ' + str(cur_best_reward))
            #cur_best_board = final_board_state(super_states[max_index]).numpy()
            cur_best_board = super_states[max_index, 4*n-4].numpy()
            cur_best_game = super_states[max_index]
            cur_best_actions = super_actions[max_index]

            best_states_set = set()
            best_states_set.add(str(cur_best_board))

            with open(os.path.join('Data', str(filename)+'_best_board_timeline'+'.txt'), 'a') as f:
                f.write(str(convert_to_board(cur_best_board, n))) #, construction)))
                f.write("\n")
            with open(os.path.join('Data', str(filename)+'_best_reward_timeline'+'.txt'), 'a') as f:
                f.write(str(cur_best_reward))
                f.write("\n")

            #with open(os.path.join('Data', str(filename)+'_best_board_timeline'+'.txt'), 'a') as f:
            #    f.write(str(convert_to_board(cur_best_board, n))) #, construction)))
            #    f.write("\n")
            
            #with open(os.path.join('Data', str(filename)+'_best_reward_timeline'+'.txt'), 'a') as f:
            #    f.write(str(cur_best_reward))
            #    f.write("\n")
            
        #    counter = 0
        
        if super_rewards[max_index] == cur_best_reward:
        #    counter += 1

            cur_best_board = super_states[max_index, 4*n-4].numpy()
            if str(cur_best_board) not in best_states_set:
                best_states_set.add(str(cur_best_board))
                #print('Glynn: '+str(glynn(cur_best_board.reshape(n,n))))
                #print(str(cur_best_board))
                with open(os.path.join('Data', str(filename)+'_best_board_timeline'+'.txt'), 'a') as f:
                    f.write(str(convert_to_board(cur_best_board, n))) #, construction)))
                    f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_reward_timeline'+'.txt'), 'a') as f:
                    f.write(str(cur_best_reward))
                    f.write("\n")
                
        #   cur_best_board = final_board_state(super_states[max_index]).numpy()
            #if str(cur_best_board) not in best_states_set:
            #    with open(os.path.join('Data', str(filename)+'_best_board_timeline'+'.txt'), 'a') as f:
            #        f.write(str(convert_to_board(cur_best_board, n))) #, construction)))
            #        f.write("\n")
            #    with open(os.path.join('Data', str(filename)+'_best_reward_timeline'+'.txt'), 'a') as f:
            #        f.write(str(cur_best_reward))
            #        f.write("\n")
        
        #if board_type == "line" and cur_best_reward == 4*n:
        #    return net
        
        #if counter > 1000:
        #    return net
        '''
        if write_all:
            if (i%20 == 1): #Write all important info to files every 20 iterations
                with open(os.path.join('Data', str(filename)+'_best_species'+'.txt'), 'w') as f:
                    for game in super_states:
                        f.write(str(convert_to_board(final_board_state(game).numpy(), n))) #, construction)))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_species_rewards'+'.txt'), 'w') as f:
                    for item in super_rewards:
                        f.write(str(item))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_100_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_all_reward)+"\n")
                with open(os.path.join('Data', str(filename)+'_best_super_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_best_reward)+"\n")
                if (i%200==2):
                    with open(os.path.join('Data', str(filename)+'_best_species_timeline'+'.txt'), 'a') as f:
                        f.write(str(convert_to_board(final_board_state(super_states[max_index]).numpy(), n))) #, construction)))
                        f.write("\n")
        if write_best:
            if mean_best_reward > pass_threshold:
                with open(os.path.join('Data', str(filename)+'_best_species'+'.txt'), 'w') as f:
                    for game in super_states:
                        f.write(str(convert_to_board(final_board_state(game).numpy(), n))) #,construction)))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_species_rewards'+'.txt'), 'w') as f:
                    for item in super_rewards:
                        f.write(str(item))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_100_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_all_reward)+"\n")
                with open(os.path.join('Data', str(filename)+'_best_super_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_best_reward)+"\n")
                if (i%200==2):
                    max_index = torch.argmax(super_rewards)
                    with open(os.path.join('Data', str(filename)+'_best_species_timeline'+'.txt'), 'a') as f:
                        f.write(str(convert_to_board(final_board_state(super_states[max_index]).numpy(), n))) #, construction)))
                        f.write("\n")

        if counter > 1000:
            if mean_best_reward > pass_threshold:
                with open(os.path.join('Data', str(filename)+'_best_species'+'.txt'), 'w') as f:
                    for game in super_states:
                        f.write(str(convert_to_board(final_board_state(game).numpy(), n))) #, construction)))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_species_rewards'+'.txt'), 'w') as f:
                    for item in super_rewards:
                        f.write(str(item))
                        f.write("\n")
                with open(os.path.join('Data', str(filename)+'_best_100_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_all_reward)+"\n")
                with open(os.path.join('Data', str(filename)+'_best_super_rewards'+'.txt'), 'a') as f:
                    f.write(str(mean_best_reward)+"\n")
                if (i%200==2):
                    max_index = torch.argmax(super_rewards)
                    with open(os.path.join('Data', str(filename)+'_best_species_timeline'+'.txt'), 'a') as f:
                        f.write(str(convert_to_board(final_board_state(super_states[max_index]).numpy(), n))) #, construction)))
                        f.write("\n
        #        return net
        '''
    print(cur_best_reward)
    return net, cur_best_game

# Execute training

In [22]:
n = 6
filename = "6x6_test"
best_net, best_game = train(n, filename)


0. Best individuals: tensor([25., 23., 22., 22., 22.])
Mean best reward: tensor(22.8000)

1. Best individuals: tensor([28., 24., 24., 24., 24.])
Mean best reward: tensor(24.8000)

2. Best individuals: tensor([28., 25., 24., 24., 24.])
Mean best reward: tensor(25.)

3. Best individuals: tensor([32., 28., 28., 25., 25.])
Mean best reward: tensor(27.6000)

4. Best individuals: tensor([32., 28., 28., 27., 25.])
Mean best reward: tensor(28.)

5. Best individuals: tensor([32., 28., 28., 26., 25.])
Mean best reward: tensor(27.8000)

6. Best individuals: tensor([32., 26., 25., 25., 25.])
Mean best reward: tensor(26.6000)

7. Best individuals: tensor([32., 26., 26., 25., 25.])
Mean best reward: tensor(26.8000)

8. Best individuals: tensor([32., 26., 26., 26., 26.])
Mean best reward: tensor(27.2000)

9. Best individuals: tensor([32., 26., 26., 25., 25.])
Mean best reward: tensor(26.8000)
tensor(32.)


In [943]:
write_all = False
write_best = True

# Previous point insertions

In [None]:
# helper function to add 3 of the 5 free points present in all 312-avoiding matrices
def add_free_points(input_state, n, step):
    action_taken = torch.zeros([len(input_state)])
    cur_state = torch.clone(input_state)

    if step == 1:
        action_index = 0
    elif step == 2:
        action_index = n**2-n
    else: # step == 3:
        action_index = n**2-1

    cur_state[action_index] = 1
    action_taken[action_index] = 1

    return cur_state, action_taken    

In [None]:
# helper function to add corners based on probability on the output of the corner_agent neural network
def add_corner(input_state, action_vec, n, row_boundary, col_boundary):
    corner_added = False
    action_taken = torch.zeros([len(action_vec)])
    cur_state = torch.clone(input_state)

    terminal = False

    while not corner_added:
        action_index = torch.multinomial(action_vec, 1).item()
        action_row = action_index//n
        action_col = action_index%n

        if (action_row == n-2 and action_col != n-1) or (action_col == 1 and action_row != 0) or (row_boundary == 0 and action_row != 0):
            action_vec[action_index] = 0
            action_vec = action_vec / torch.sum(action_vec)
        elif row_boundary <= action_row < n-1 and col_boundary <= action_col:
            cur_state[action_index] = 1
            action_taken[action_index] = 1
            corner_added = True
        else:
            action_vec[action_index] = 0
            action_vec = action_vec / torch.sum(action_vec)

    if action_col == n-1:
        terminal = True

    return cur_state, action_taken, terminal, action_row, action_col

In [None]:
# defines a helper function to add point, action_vec is the output, output = agent(cur_state), agent = net
def add_point(input_state, action_vec, forbidden_state, corners, n):
    
    ## add time
    point_added = False
    action_taken = torch.zeros([len(action_vec)])
    cur_state = torch.clone(input_state)
    cur_forbidden = torch.clone(forbidden_state)

    while not point_added:
        action_index = torch.multinomial(action_vec, 1).item()

        if cur_state[action_index] == 0 and cur_forbidden[action_index] != 1:
            # action
            cur_state[action_index] = 1
            action_taken[action_index] = 1
            point_added = True

            point_row = action_index//n
            point_col = action_index%n
            # fill forbidden
            for corner in corners:
                corner_row = corner//n
                corner_col = corner%n
                # fill left block
                if corner_row < point_row and point_col < corner_col:
                    for forbidden_row in range(corner_row+1, point_row):
                        for forbidden_col in range(point_col):
                            forbidden_index = forbidden_row*n + forbidden_col
                            if cur_state[forbidden_index] == 0:
                                cur_forbidden[forbidden_index] = 1
                    # fill right block
                    for forbidden_col in range(point_col+1, corner_col):
                        for forbidden_row in range(point_row+1, n):
                            forbidden_index = forbidden_row*n + forbidden_col
                            if cur_state[forbidden_index] == 0:
                                cur_forbidden[forbidden_index] = 1
        else:
            action_vec[action_index] = 0
            action_vec = action_vec / torch.sum(action_vec)

    return cur_state, action_taken, cur_forbidden

# Individual testing

In [None]:
states, actions, scores = generate_session(best_net, best_corner_net, 1, 5) #  forbidden_points,

# Plot
step_through = 4*n - 4
state = states[0,step_through,:]
state_mtx = state.reshape(n,n)

action = actions[0,step_through-1,:]
action_mtx = action.reshape(n,n)

#forbidden = forbidden_points[0,step_through,:]
#forbidden_mtx = forbidden.reshape(n,n)

plot_board(state_mtx)
#plot_board(action_mtx)
#plot_board(forbidden_mtx)

print(glynn(state_mtx.numpy()))

In [None]:
# Plot
state = best_game[4*n-4,:]
state_mtx = state.reshape(n,n)
plot_board(state_mtx)
print(glynn(state_mtx.numpy()))