In [None]:
import numpy as np
import os
import random
import time
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
!pip install kaggle_environments
from kaggle_environments import make, evaluate, utils
from kaggle_environments import agent as kagent

!pip install 'tensorflow==1.15.0'
import tensorflow as tf
tf.__version__

In [None]:
############### Model environment
import os
!apt-get update
!apt-get install -y cmake libopenmpi-dev python3-dev zlib1g-dev

!pip install "stable-baselines[mpi]==2.9.0"
from stable_baselines import PPO1 
from stable_baselines.a2c.utils import conv, linear, conv_to_fc
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.bench import Monitor 
from stable_baselines.common.vec_env import DummyVecEnv

In [None]:
def get_win_percentages(agent1, agent2, n_rounds=10, rtrn_outcomes=False):
    # Use default Connect Four setup
    import numpy as np
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
    if rtrn_outcomes:
        return outcomes

# Game Agents

### Heuristic Agents

##### Pseudocode

Minimax Pseudocode:
```
function minimax(node, depth, maximizingPlayer) is
    if depth = 0 or node is a terminal node then
        return the heuristic value of node
    if maximizingPlayer then
        value := −∞
        for each child of node do
            value := max(value, minimax(child, depth − 1, FALSE))
        return value
    else (* minimizing player *)
        value := +∞
        for each child of node do
            value := min(value, minimax(child, depth − 1, TRUE))
        return value 

### Initial call:        
### alphabeta(origin, depth, TRUE)
```
[minimax algorithm](https://en.wikipedia.org/wiki/Minimax#Pseudocode) 

##### Implementation

In [None]:
#@title Heuristic Agent
def heuristic(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =  4#@param {type: "integer"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        return False

    # Minimax implementation was here:
    def minimax(node, depth, maximizingPlayer, mark):
        if depth == 0:
            return get_heuristic(node, mark)
        if is_terminal_node(node):
            return get_heuristic(node, mark)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, minimax(child, depth-1, False, mark))
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, minimax(child, depth-1, True, mark))
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = minimax(next_grid, nsteps-1, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

### Alpha-Beta Pruning Agents

##### Pseudocode

AlphaBeta Pruner Pseudocode:

```
function alphabeta(node, depth, α, β, maximizingPlayer) is
    if depth = 0 or node is a terminal node then
        return the heuristic value of node
    if maximizingPlayer then
        value := −∞
        for each child of node do
            value := max(value, alphabeta(child, depth − 1, α, β, FALSE))
            α := max(α, value)
            if α ≥ β then
                break (* β cutoff *)
        return value
    else
        value := +∞
        for each child of node do
            value := min(value, alphabeta(child, depth − 1, α, β, TRUE))
            β := min(β, value)
            if β ≤ α then
                break (* α cutoff *)
        return value

### Initial call:        
### alphabeta(origin, depth, −∞, +∞, TRUE)
```
[alpha-beta pruning](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning#Pseudocode) 

##### Implementation

In [None]:
#@title Current Pruning Agent "pruner" >>> my_agent
#def my_agent(obs, config):
def pruner(obs, config):    
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =   4#@param {type: "number"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):

        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Alpha Beta Pruning of MiniMax algorithm:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
            return node_score
 
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta or value >= 800:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta or value <= -80:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

### Trained Agent (my_agent)

##### Training

In [None]:
#from kaggle_environments import make, evaluate, agent
from gym import spaces

class ConnectFourGym:
    def __init__(self, agent2="random"):
        ks_env = make("connectx", debug=True)
        self.env = ks_env.train([None, agent2])
        self.rows = ks_env.configuration.rows
        self.columns = ks_env.configuration.columns
        # Learn about spaces here: http://gym.openai.com/docs/#spaces
        self.action_space = spaces.Discrete(self.columns)
        self.observation_space = spaces.Box(low=0, high=2, 
                                            shape=(self.rows,self.columns,1), dtype=np.int)
        # Tuple corresponding to the min and max possible rewards
        self.reward_range = (-10, 1)
        # StableBaselines throws error if these are not defined
        self.spec = None
        self.metadata = None
    def reset(self):
        self.obs = self.env.reset()
        return np.array(self.obs['board']).reshape(self.rows,self.columns,1)
    def change_reward(self, old_reward, done):
        if old_reward == 1: # The agent won the game
            return 1
        elif done: # The opponent won the game
            return -1
        else: # Reward 1/42
            return 1/(self.rows*self.columns)
    def step(self, action):
        # Check if agent's move is valid
        is_valid = (self.obs['board'][int(action)] == 0)
        if is_valid: # Play the move
            self.obs, old_reward, done, _ = self.env.step(int(action))
            reward = self.change_reward(old_reward, done)
        else: # End the game and penalize agent
            reward, done, _ = -10, True, {}
        return np.array(self.obs['board']).reshape(self.rows,self.columns,1), reward, done, _

##### Create ConnectFour environment -- define training opponent here!
env = ConnectFourGym(agent2="negamax")  ################### <--- agent goes here
########################################################

# Create directory for logging training information
log_dir = "ppo/"
os.makedirs(log_dir, exist_ok=True)

# Logging progress
monitor_env = Monitor(env, log_dir, allow_early_resets=True)

# Create a vectorized environment
vec_env = DummyVecEnv([lambda: monitor_env])

# Neural network for predicting action values
def modified_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3, stride=1, 
                         init_scale=np.sqrt(2), **kwargs))
    layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, 
                         init_scale=np.sqrt(2), **kwargs))
    layer_2 = conv_to_fc(layer_2)
    return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))  

class CustomCnnPolicy(CnnPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomCnnPolicy, self).__init__(*args, **kwargs, cnn_extractor=modified_cnn)

In [None]:
# Initialize agent
model = PPO1(CustomCnnPolicy, vec_env, verbose=0)

In [None]:
# Train agent
start = time.time()
train_steps = 100

model.learn(total_timesteps=train_steps)

print (train_steps,"steps took",time.time()-start,"seconds.")

# Plot cumulative reward
with open(os.path.join(log_dir, "monitor.csv"), 'rt') as fh:    
    firstline = fh.readline()
    assert firstline[0] == '#'
    df = pd.read_csv(fh, index_col=None)['r']
df.rolling(window=train_steps//10).mean().plot()
plt.show()

In [None]:
# Train agent
start = time.time()
train_steps = 20000
model.learn(total_timesteps=train_steps)
print (train_steps,"steps took",time.time()-start,"seconds.")

In [None]:
#model.save('./trained_depth4_50000')
#trained_model = PPO1.load('/content/trained_negamax_100000.zip', env=env)

In [None]:
env = make("connectx")

##### Finally, we specify the trained agent in the format required for the competition.

In [None]:
def my_agent(obs, config):
    
    #Import saved model trained on best heuristic agent
    from stable_baselines import PPO1
    trained_model = PPO1.load('/content/trained_negamax_100000.zip')
    
    # Use the trained model to select a column
    col, _ = trained_model.predict(np.array(obs['board']).reshape(6,7,1))
    
    # Check if selected column is valid
    is_valid = (obs['board'][int(col)] == 0)
    
    # If not valid, select random move. 
    if is_valid:
        return int(col)
    else:
        return random.choice([col for col in range(config.columns) if obs.board[int(col)] == 0])

#Tests

In [None]:
#agent1
#930//60

15

In [None]:
start_time = time.time()
n_rounds =   1#@param {type:"integer"}
agent1 = my_agent #@param
agent2 = "random" #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.0
Agent 2 Win Percentage: 0.0
Number of Invalid Plays by Agent 1: 1
Number of Invalid Plays by Agent 2: 0
Total time taken: 0.0 seconds (per round: 0.037 seconds)


#### Animated game

In [None]:
# Create the game environment
env = make("connectx")

# Two random agents play one game round
env.run([no_config, heuristic])

# Show the game
env.render(mode="ipython")

#env.play([None,no_config])

#Experimental

In [None]:
#@title Time and Score Comparisons
start_time = time.time()
n_rounds =   1#@param {type:"integer"}
agent1 = pruneX #@param
agent2 = pruneX #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.0
Agent 2 Win Percentage: 1.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 1.5 seconds (per round: 1.546 seconds)


In [None]:
grid = np.asarray([ [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 2, 1, 0, 2],
                    [0, 1, 0, 2, 1, 0, 2],
                    [0, 1, 1, 2, 1, 1, 2] ])


In [None]:
#Import saved model trained on best heuristic agent
env = make("connectx")
#env = ConnectFourGym(agent2="negamax")
trained_model = PPO1.load('/content/trained_negamax_100000.zip', env=env)


In [None]:
# Use the trained model to select a column
col, _ = trained_model.predict(grid)#np.array(obs['board']).reshape(6,7,1))

# Check if selected column is valid
is_valid = (grid[int(col)] == 0)

# If not valid, select random move. 
if is_valid:
    print ("Prediction",int(col))
else:
    print ("Guess:",random.choice([col for col in range(config.columns) if grid[int(col)] == 0]))

In [None]:
# @title Experimental Pruning Agent "pruneX"
def pruneX(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random
    ########################### Regular pruner ################
    # constants (given by game)
    ROWS = 6
    COLUMNS = 7
    CNCTX = 4
    ## coefficients (weights on variable future outcomes)
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100     #-10 opp-fours   

        # lookahead depth:
    N_STEPS = 4#@param {type:"integer"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax implementation was here:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
            return node_score
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
        return score
    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    #scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))
    returning_scores = []
    for col in valid_moves:
        col_score = score_move(grid, col, mark, N_STEPS)
        returning_scores.append(col_score)
        if col_score >= B:# or col_score <= D:
            break
    scores = dict(zip(valid_moves, returning_scores))   

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
#@title Regular pruner 
import numpy as np
import random
import pandas as pd


########################### Regular pruner ################
# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1      #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    print ("Dropped", mark,"into column", col, ", row:", row)
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_heuristic(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D
    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    return score

# Helper function for minimax: checks if agent or opponent has four in a row in the window
def is_terminal_window(window):
    return window.count(1) == CNCTX or window.count(2) == CNCTX

# Helper function for minimax: checks if game has ended
def is_terminal_node(grid):
    # Check for draw 
    if list(grid[0, :]).count(0) == 0:
        return True
    # Check for win: horizontal, vertical, or diagonal
    # horizontal 
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if is_terminal_window(window):
                return True
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if is_terminal_window(window):
                return True
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if is_terminal_window(window):
                return True
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if is_terminal_window(window):
                return True
    return False

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                                    round(alpha,2),
                                                    round(beta,2)) )
    if depth == 0:
        node_score = get_heuristic(node, mark)
        print ("\n~~ Depth terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
    if is_terminal_node(node): 
        node_score = get_heuristic(node, mark)       
        print ("Winning terminal grid\n", node,"\nterminal score:", node_score,"\n")
        return node_score
    
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
    
    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta:
                print ("\t\t<<<< BETA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                break
        return value
    
    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta:
                print ("\t\t<<<< ALPHA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    print("\nv v v v v v v v v v v v v v v v v v\nGet best score for column:",col)
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    print("Return score:",score, "for column", col)
    print("\n^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \n")
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 2, 0, 0, 0, 0],
                    [0, 0, 2, 0, 0, 0, 2],
                    [0, 1, 1, 0, 0, 0, 2],
                    [0, 1, 1, 1, 0, 2, 2],
                    [0, 1, 2, 2, 2, 1, 1] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )

In [None]:
#@title Minimalist  pruner 
import numpy as np
import random
import pandas as pd


##########################


# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1       #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    print ("Dropped", mark,"into column", col, ", row:", row)
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_score(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
    return score, is_terminal

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")    
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                                round(alpha,2),
                                                round(beta,2)) )
    #if alpha >= 20: 
    #    return alpha
    #if beta <= -10:
    #    return beta

    node_score, is_terminal = get_score(node, mark)
    if depth == 0 or is_terminal:
        print ("\n~~ Terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
     
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
    
    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta or (value >= (B + 2*C)):
                print ("\t\t<<<< BETA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                print ("\t\t<<< value:",value,">=",B + 2*C," >>>")
                break
        return value
    
    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta or (value <= (D + 2*A)):
                print ("\t\t<<<< ALPHA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                print ("\t\t<<< value:",value,"<=",D + 2*A," >>>")
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    print("\nv v v v v v v v v v v v v v v v v v\nGet best score for column:",col)
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    print("Return score:",score, "for column", col)
    print("\n^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \n")
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 2, 1, 0, 2],
                    [0, 1, 0, 2, 1, 0, 2],
                    [0, 1, 1, 2, 1, 1, 2] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
#scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))
returning_scores = []
for col in valid_moves:
    col_score = score_move(grid, col, mark, N_STEPS)
    returning_scores.append(col_score)
    if col_score >= A:# or col_score <= D:
        break
scores = dict(zip(valid_moves, returning_scores))   

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )

In [None]:
# @title Experimental Pruning Agent "pruneX"
#def pruneTTT(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

import numpy as np
import random

########################### Regular pruner ################
# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1      #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4#@ param {type:"integer"}

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_score(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
    return score, is_terminal

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")    
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                            round(alpha,2),
                                            round(beta,2)) )

    node_score, is_terminal = get_score(node, mark)
    if depth == 0 or is_terminal:
        print ("\n~~ Terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
        
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]

    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta or (value >= (B + 2*C)):
                break
        return value

    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta or (value <= (D + 2*A)):
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 2, 1, 0, 2],
                    [0, 1, 0, 2, 1, 0, 2],
                    [0, 1, 1, 2, 1, 1, 2] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
#scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))
returning_scores = []
for col in valid_moves:
    col_score = score_move(grid, col, mark, N_STEPS)
    returning_scores.append(col_score)
    if col_score >= A:# or col_score <= D:
        break
scores = dict(zip(valid_moves, returning_scores))   

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )