## Testing the Deep Reinforcement Implementation

Please take a look at the game played below. I tried to get a game as descriptive of the models capabilities as possible. While it is not very good (not even close to the skill of the adversarial search), it did develop some game sense in its 

Like the "trying deep reinforcement" notebook, skip this block, it just has all the implementation needed to run the game. 

In [24]:
import numpy as np

# Board is 8x8 numpy array
# 0 = no piece
# 1 = X piece
# 2 = O piece

blank = 0
X = 1
O = 2

symbol = [' ','X','O']

N = 8      

def getEmptyBoard():                              # use this function to create a fresh empty board
    return np.zeros((N,N)).astype(int)

# This will be used to indicate an error when you try to make a move in a column that is already full

ERROR = -1

# Check for error: use this function ONLY, since numpy arrays work strangely with comparisons

def isError(B):
    if type(B) == int:
        return B == ERROR
    else:
        return False

# Print out a human-readable version of the board, can indent if want to trace through the recursion

def printBoard(B,ind=0):
    indent = '\t'*ind
    if isError(B):
        print(indent,"ERROR: Overflow in column.")
        return
    print(indent,'  0 1 2 3 4 5 6 7')
    print(indent,'-------------------')
    for row in range(N):
        print(indent,'|',end='')
        for col in range(N):
            print(' '+ symbol[B[row][col]],end='')
        print(' |')
    print(indent,'-------------------')
    


# This function should make the indicated move on the input board, and return that board, or ERROR (-1)
# if there is no room in the column of the move.  Note that you are changing the original board
# IN PLACE, but also returning it, so you can indicate the error by returning ERROR (-1).
# Do NOT make a copy, as that is very inefficient!

# player is 1 (X) or 2 (O); 0 <= move <= 7; board is 8x8 numpy array as shown in first code cell.
# If move is illegal (either outside range 0..7) or there is no room in that column, return ERROR

def illegalMove(m):
    return not(0 <= m <= 7)

def noRoomInColumn(move,board):
    return board[0][move] != blank

def dropPiece(player, move, board):
    if illegalMove(move) or noRoomInColumn(move, board):
        return board
    row = 7
    while board[row][move] != blank:
        row -= 1
    board[row][move] = player
    return board



import numpy as np

def checkWin(player, board):
    # Check horizontal, vertical, and diagonal lines for a win
    pattern = np.array([player] * 4)
    
    # Horizontal
    for row in range(8):
        for col in range(5):
            if np.array_equal(board[row, col:col + 4], pattern):
                return player

    # Vertical
    for col in range(8):
        for row in range(5):
            if np.array_equal(board[row:row + 4, col], pattern):
                return player

    # Diagonal (down-right)
    for row in range(5):
        for col in range(5):
            if np.array_equal(board[range(row, row + 4), range(col, col + 4)], pattern):
                return player

    # Diagonal (up-right)
    for row in range(3, 8):
        for col in range(5):
            if np.array_equal(board[range(row, row - 4, -1), range(col, col + 4)], pattern):
                return player

    return 0

import sys


# Return evaluation of the board from O's point of view

def evaluate(board, player):
    opponent = 3 - player
    my_score = score(board, player)
    opponent_score = score(board, opponent)
    return my_score - 0.5 * opponent_score

def score(board, player):
    width=8
    height=8
    total = 0
    # Horizontal
    for y in range(height):
        total += eval_dir(board, 0, y, 1, 0, player)
    # Vertical
    for x in range(width):
        total += eval_dir(board, x, 0, 0, 1, player)
    # Diagonal from bottom-left to top-right
    for x in range(width):
        total += eval_dir(board, x, 0, 1, 1, player)
    for y in range(1, height):
        total += eval_dir(board, 0, y, 1, 1, player)
    # Diagonal from top-left to bottom-right
    for x in range(width):
        total += eval_dir(board, x, height - 1, 1, -1, player)
    for y in range(height - 2, -1, -1):
        total += eval_dir(board, 0, y, 1, -1, player)
    return total

def eval_dir(board, start_x, start_y, dx, dy, player):
    consecutive = 0
    blocks = 0
    for i in range(8):
        if start_x + i*dx < 0 or start_y + i*dy < 0 or start_x + i*dx >= len(board) or start_y + i*dy >= len(board[0]):
            # Out of bounds
            blocks += 1
            continue
        if board[start_y + i*dy][start_x + i*dx] == player:
            # Consecutive player piece
            consecutive += 1
        elif board[start_y + i*dy][start_x + i*dx] != 0:
            # Piece from the other player
            blocks += 1
            break
        else:
            # Empty space
            break
    if blocks == 2 or consecutive == 0:
        return 0
    switcher = {
        1: 1,
        2: 10,
        3: 1000,
        4: sys.maxsize, # Increase the score for a win
    }
    return switcher.get(consecutive, 0)

maxDepth = 3

def minMax(board, player, depth, alpha, beta):
    if depth >= maxDepth or checkWin(player, board) or checkWin((player % 2) + 1, board): 
        return (evaluate(board, player), None)

    moves = getAvailableMoves(board)
    best_move = None
    
    if player == O:  # Maximizing player
        maxEval = -np.inf
        for move in moves:
            row = get_row(board, move)
            if row == -1: continue
            board[row][move] = player
            if checkWin(player, board):
                board[row][move] = blank  # undo move
                return (np.inf, move)  # Found a winning move, no need to consider other moves
            val, _ = minMax(board, X, depth + 1, alpha, beta)
            board[row][move] = blank  # undo move
            if val > maxEval:
                maxEval = val
                best_move = move
            alpha = max(alpha, val)
            if beta <= alpha:
                break
        return maxEval, best_move

    else:  # Minimizing player
        minEval = np.inf
        for move in moves:
            row = get_row(board, move)
            if row == -1: continue
            board[row][move] = player
            if checkWin(player, board):
                board[row][move] = blank  # undo move
                return (-np.inf, move)  # Found a winning move for the opponent, no need to consider other moves
            val, _ = minMax(board, O, depth + 1, alpha, beta)
            board[row][move] = blank  # undo move
            if val < minEval:
                minEval = val
                best_move = move
            beta = min(beta, val)
            if beta <= alpha:
                break
        return minEval, best_move


    
def get_row(board,col):
    for i in range (0,8):
        if board[i][col] != 0:
            return i - 1
    return 7

def getAvailableMoves(board):
    return [i for i in range(8) if get_row(board, i) != -1]

def getRandomBoard():
    board = getEmptyBoard()
    moves_played = random.randint(0, 60) + 1
    if moves_played % 2 == 1:
        moves_played -= 1
    player = 1
    move_history = []

    for _ in range(moves_played):
        while True:
            move = random.choice(getAvailableMoves(board))
            board_temp = dropPiece(player, move, board.copy())  # use a temp board
            if not checkWin(player, board_temp):
                # If the move doesn't cause a win, make it on the real board
                board = dropPiece(player, move, board)
                move_history.append(move)
                player = (player % 2) + 1
                break
            elif len(move_history) > 0 or abs(minMax(board,player,0,-sys.maxsize,sys.maxsize)[0]) == sys.maxsize: 
                # If the move causes a win and it's not the first move, undo the last move and try again
                undoLastMove(board, move_history.pop())
            else:
                # If it's the first move and it causes a win, just try a different move
                continue

    return board

def isBoardFull(board):
    return not np.any(board == blank)

def undoLastMove(board, move):
    """Remove the top-most piece from the given column."""
    for row in range(N):
        if board[row][move] != blank:
            board[row][move] = blank
            break

In [25]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import random

# Load the model
model = tf.keras.models.load_model('/kaggle/input/reinforcement-nn/reinforcement_cnn.h5')

In [26]:
# Load the model
model = tf.keras.models.load_model('/kaggle/input/reinforcement-nn/reinforcement_cnn.h5')

# Initialize the board
board = getRandomBoard()

# Player 'X' (AI) goes first, player 'O' (human) goes second
player = X

while True:
    # Print the current board
    printBoard(board)

    if player == 1:
        # AI's turn
        move_probabilities = model.predict(np.array(board).reshape(-1, 8, 8))
        move = np.argmax(move_probabilities[0])
        
        # If the move is illegal or there's no room, choose another move
        while illegalMove(move) or noRoomInColumn(move, board):
            move_probabilities[0][move] = 0  # Set the probability of the illegal move to 0
            move = np.argmax(move_probabilities[0])
            
        print(f"AI's move: {move}")

        # Apply the move and get the new board
        board = dropPiece(1, move, board)

        # Check if the AI has won
        if checkWin(1, board):
            printBoard(board)
            print("AI has won!")
            break

    else:
        # Human's turn
        move = int(input("Enter your move (0-7): "))

        # If the move is illegal or there's no room, ask for another move
        while illegalMove(move) or noRoomInColumn(move, board):
            print("Invalid move.")
            move = int(input("Enter your move (0-7): "))

        # Apply the move and get the new board
        board = dropPiece(2, move, board)

        # Check if the human has won
        if checkWin(2, board):
            printBoard(board)
            print("You have won!")
            break

    # Switch players
    player = 2 if player == 1 else 1

   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O       |
 |   X     X       |
 | X O     O       |
 | O O     O     O |
 | X X     X     O |
 | X O X X X O   O |
 -------------------
AI's move: 7
   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O       |
 |   X     X       |
 | X O     O     X |
 | O O     O     O |
 | X X     X     O |
 | X O X X X O   O |
 -------------------


Enter your move (0-7):  6


   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O       |
 |   X     X       |
 | X O     O     X |
 | O O     O     O |
 | X X     X     O |
 | X O X X X O O O |
 -------------------
AI's move: 7
   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O       |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X     O |
 | X O X X X O O O |
 -------------------


Enter your move (0-7):  6


   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O       |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X   O O |
 | X O X X X O O O |
 -------------------
AI's move: 7
   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O       |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X   O O |
 | X O X X X O O O |
 -------------------


Enter your move (0-7):  7


   0 1 2 3 4 5 6 7
 -------------------
 |                 |
 |         O     O |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X   O O |
 | X O X X X O O O |
 -------------------
AI's move: 7
   0 1 2 3 4 5 6 7
 -------------------
 |               X |
 |         O     O |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X   O O |
 | X O X X X O O O |
 -------------------


Enter your move (0-7):  5


   0 1 2 3 4 5 6 7
 -------------------
 |               X |
 |         O     O |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O     O |
 | X X     X O O O |
 | X O X X X O O O |
 -------------------
AI's move: 6
   0 1 2 3 4 5 6 7
 -------------------
 |               X |
 |         O     O |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O   X O |
 | X X     X O O O |
 | X O X X X O O O |
 -------------------


Enter your move (0-7):  5


   0 1 2 3 4 5 6 7
 -------------------
 |               X |
 |         O     O |
 |   X     O     X |
 |   X     X     X |
 | X O     O     X |
 | O O     O O X O |
 | X X     X O O O |
 | X O X X X O O O |
 -------------------
You have won!
