In [6]:
# we want to code connect4
import numpy as np
import random
import time
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

ModuleNotFoundError: No module named 'torch'

In [7]:
class Connect4:
    def __init__(self):
        self.board = np.array([[0 for i in range(7)] for j in range(6)])
        self.player = 1
        self.winner = 0
        self.last_move = None

    def __str__(self):
        s = ""
        for i in range(6):
            for j in range(7):
                s += str(self.board[i][j]) + " "
            s += "\n"
        return s

    def play(self, col):
        if self.winner != 0:
            print("Game is over")
            return
        if col < 0 or col > 6:
            print("Invalid column")
            return
        if self.board[0][col] != 0:
            print("Column is full")
            return
        for i in range(5, -1, -1):
            if self.board[i][col] == 0:
                self.board[i][col] = self.player
                break
        self.check_winner()
        if self.player == 1:
            self.last_move = col
            self.player = 2
        else:
            self.last_move = col
            self.player = 1

    def check_winner(self):
        for i in range(6):
            for j in range(7):
                if self.board[i][j] != 0:
                    if self.check_horizontal(i, j) or self.check_vertical(i, j) or self.check_diagonal(i, j):
                        self.winner = self.board[i][j]
                        return

    def check_horizontal(self, i, j):
        if j > 3:
            return False
        for k in range(4):
            if self.board[i][j + k] != self.board[i][j]:
                return False
        return True

    def check_vertical(self, i, j):
        if i > 2:
            return False
        for k in range(4):
            if self.board[i + k][j] != self.board[i][j]:
                return False
        return True

    def check_diagonal(self, i, j):
        if i > 2 or j > 3:
            return False
        for k in range(4):
            if self.board[i + k][j + k] != self.board[i][j]:
                return False
        return True

    def is_over(self):
        return self.winner != 0

    def get_winner(self):
        return self.winner

In [36]:
def play_game(bot1, bot2, print_game=False, player=0):
    game = Connect4()
    if not player:
        player = random.randint(1, 2)

    while not game.is_over():
        if print_game:
            print(game)
        if player == 1:
            game.play(bot1(game.board, game.player, game.last_move))
        else:
            game.play(bot2(game.board, game.player, game.last_move))
        if game.is_over():
            break
        player = 3 - player
        if 0 not in game.board[0]:
            player = 0
            break
    return player

In [None]:
# I want to play connect4 using AI

# we have coded connect4 above, so we need to implement the AI
# it should take in the board, the player and the last move as input and return the action it wants to take in the form of a list of 7 probabilities
# the probabilities reflect how much the AI wants to play in each column
# the AI should be able to play against itself
# We want to train the AI by letting it play against itself and then update the probabilities based on the outcome of the game

class Connect4AI:
    def __init__(self, player, learning_rate=0.01):
        self.player = player
        self.learning_rate = learning_rate
        self.model = nn.Sequential(
            nn.Linear(42, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 7),
            nn.Softmax(dim=1)
        )
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.loss_function = nn.MSELoss()

    def get_action(self, board):
        board = np.array(board)
        board = board.reshape(1, 42)
        board = torch.from_numpy(board).float()
        output = self.model(board)
        output = output.detach().numpy()
        output = output[0]

        j = 0
        while True:
            action = np.argmax(output)
            if j > 7:
                action = random.randint(0, 6)
            if board[0][action] == 0:
                return action
            else:
                output[action] = 0
            j += 1

    def train(self, board, action, reward):
        board = np.array(board)
        board = board.reshape(1, 42)
        board = torch.from_numpy(board).float()
        
        # Create a tensor for the target output
        target_output = torch.zeros((1, 7))
        target_output[0][action] = 1.0  # One-hot encode the target action
        
        # Forward pass
        output = self.model(board)
        
        # Compute the CrossEntropyLoss
        self.optimizer.zero_grad()
        loss = nn.CrossEntropyLoss()(output, torch.tensor([action]))
        
        # Backward pass and optimization step
        loss.backward()
        self.optimizer.step()


def traingame(ai1, ai2, random_chance = 0.1, print_board = False):
    # we want to play a game of connect4 between two AIs
    # we want to train the AIs based on the outcome of the game
    # we save all moves both AI's make and the outcome of the game
    # the AI that wins the game gets a reward of 1, and the AI that loses the game gets a reward of -1
    # we then train both AIs based on the moves they made and the outcome of the game

    game = Connect4()
    ai1.player = 1
    ai2.player = 2
    ai1_moves = []
    ai2_moves = []
    player = random.randint(1, 2)
    while not game.is_over():
        if print_board:
            print(game)
        if player == 1:
            if random.random() < random_chance:
                action = random.randint(0, 6)
                while game.board[0][action] != 0:
                    action = random.randint(0, 6)
            else:
                action = ai1.get_action(game.board)
            ai1_moves.append((game.board, action))
            game.play(action)
        else:
            if random.random() < random_chance:
                action = random.randint(0, 6)
                while game.board[0][action] != 0:
                    action = random.randint(0, 6)
            else:
                action = ai2.get_action(game.board)
            ai2_moves.append((game.board, action))
            game.play(action)
                
        if game.is_over():
            break
        player = 3 - player
        if 0 not in game.board[0]:
            return player

    if player == 1:
        reward1 = 1
        reward2 = -1
    elif player == 2:
        reward1 = -1
        reward2 = 1

    for board, action in ai1_moves:
        ai1.train(board, action, reward1)
    for board, action in ai2_moves:
        ai2.train(board, action, reward2)

    return player

def traingame2(ai1, challenge_bot, random_chance = 0.1):
    # we want to play a game of connect4 between two AIs
    # we want to train the AIs based on the outcome of the game
    # we save all moves both AI's make and the outcome of the game
    # the AI that wins the game gets a reward of 1, and the AI that loses the game gets a reward of -1
    # we then train both AIs based on the moves they made and the outcome of the game

    game = Connect4()
    ai1.player = 1
    challenge_bot.player = 2
    ai1_moves = []
    challenge_bot_moves = []
    player = random.randint(1, 2)
    while not game.is_over():
        if player == 1:
            # we add a random chance to just do a random move
            if random.random() < random_chance:
                action = random.randint(0, 6)
                while game.board[0][action] != 0:
                    action = random.randint(0, 6)
            else:
                action = ai1.get_action(game.board)
            ai1_moves.append((game.board, action))
            game.play(action)
        else:
            action = challenge_bot(game.board, game.player, game.last_move)
            challenge_bot_moves.append((game.board, action))
            game.play(action)
                
        if game.is_over():
            break
        player = 3 - player
        if 0 not in game.board[0]:
            return player

    if player == 1:
        reward1 = 1
        reward2 = -1
    elif player == 2:
        reward1 = -1
        reward2 = 1

    for board, action in ai1_moves:
        ai1.train(board, action, reward1)
    for board, action in challenge_bot_moves:
        ai1.train(board, action, reward2)

    return player

In [5]:
# training

ai1 = Connect4AI(1)
ai2 = Connect4AI(2)
N = 100000
u = 0
t = time.time()
for i in range(N):
    if traingame(ai1, ai2)==1:
        u += 1
    if i % (N//100) == 0:
        print(f"{i}/{N} games played in {round(time.time()-t,2)} seconds")
print(u/N)

NameError: name 'Connect4AI' is not defined

In [37]:
def plot_board(board):
    # reduce size of image
    plt.figure(figsize=(3, 2))
    board = np.array(board)

    img = np.zeros((6, 7, 3))
    img[board == 1, 0] = 1
    img[board == 2, 2] = 1
    img[board == 0, :] = 1

    # plot the image
    plt.imshow(img)
    plt.show()

def visual_game(bot1, bot2, player = 0):
    game = Connect4()

    if not player:
        player = random.randint(1, 2)
    print("Bot", player, "is red, and starts")
    print("Bot", 3-player, "is blue")

    while not game.is_over():
        if player == 1:
            game.play(bot1(game.board, game.player, game.last_move))
        else:
            game.play(bot2(game.board, game.player, game.last_move))
        if game.is_over():
            break
        player = 3 - player
        if 0 not in game.board[0]:
            player = 0
            break
        plot_board(game.board)
    plot_board(game.board)
    print("Winner is bot", player)

# visual_game(bot_ai, challenge_bot3)

In [39]:
# collection of challenge bots that have fairly simple logic to them

def check_winner(board, player):
    # check if the player has won the game
    # check horizontal
    for i in range(6):
        for j in range(4):
            if board[i][j] == player and board[i][j + 1] == player and board[i][j + 2] == player and board[i][j + 3] == player:
                return True
    # check vertical
    for i in range(3):
        for j in range(7):
            if board[i][j] == player and board[i + 1][j] == player and board[i + 2][j] == player and board[i + 3][j] == player:
                return True
    # check diagonal
    for i in range(3):
        for j in range(4):
            if board[i][j] == player and board[i + 1][j + 1] == player and board[i + 2][j + 2] == player and board[i + 3][j + 3] == player:
                return True
    # check other diagonal
    for i in range(3):
        for j in range(3, 7):
            if board[i][j] == player and board[i + 1][j - 1] == player and board[i + 2][j - 2] == player and board[i + 3][j - 3] == player:
                return True
    return False

def random_bot(board, player, last_move):
    while True:
        action = random.randint(0, 6)
        if board[0][action] == 0:
            return action

def cat(board, player, last_move):
    # do the same move as last time, and if that is not possible, do a random move
    if last_move and board[0][last_move] == 0:
        return last_move
    else:
        while True:
            action = random.randint(0, 6)
            if board[0][action] == 0:
                return action
            
def gravity(board, player, last_move):
    # always fill the lowest column, randomize between the lowest columns if there are multiple
    # first find the lowest columns
    # keep in mind that the board is upside down
    lowest = 0

    for i in range(7):
        for j in range(6):
            if board[j][i] == 0:
                if j > lowest:
                    lowest = j
    lowest_columns = []
    # we know that lowest is the lowest row that is empty

    for i in range(7):
        if board[lowest][i] == 0:
            lowest_columns.append(i)
    return random.choice(lowest_columns)

def gotcha(board, player, last_move):
    # check if a move can be made that will win the game
    # if not, check if a move can be made that will prevent the opponent from winning the game
    # if not, do a random move

    # check if a move can be made that will win the game
    for action in range(7):
        if board[0][action] != 0:
            continue

        board_copy = board.copy()
        for i in range(5, -1, -1):
            if board_copy[i][action] == 0:
                board_copy[i][action] = player
                break
        if check_winner(board_copy, player):
            return action

    # check if a move can be made that will prevent the opponent from winning the game
    bad_actions = []
    for action in range(7):
        if board[0][action] != 0:
            continue

        board_copy = board.copy()
        for i in range(5, -1, -1):
            if board_copy[i][action] == 0:
                board_copy[i][action] = player
                break

        # now we have played the action on the copy of the board, and check if the opponent can win
        for challenge_action in range(7):
            if board_copy[0][challenge_action] != 0:
                continue

            board_copy2 = board_copy.copy()
            for i in range(5, -1, -1):
                if board_copy2[i][challenge_action] == 0:
                    board_copy2[i][challenge_action] = 3 - player
                    break

            if check_winner(board_copy2, 3 - player):
                bad_actions.append(action)
                break

    good_actions = []
    for i in range(7):
        if i not in bad_actions and board[0][i] == 0:
            good_actions.append(i)

    if len(good_actions) > 0:
        return random.choice(good_actions)
    else:
        while True:
            action = random.randint(0, 6)
            if board[0][action] == 0:
                return action


 

def bot_ai1(board, player, last_move):
    return ai1.get_action(board)

def bot_ai2(board, player, last_move):
    return ai2.get_action(board)

In [10]:
# train against a challenge bot

def train_against_challenge_bot(ai1, challenge_bot, N = 1000):
    a = 0
    c = 0
    d = 0
    t = time.time()
    for i in range(N):
        g = traingame2(ai1, challenge_bot)
        if g == 1:
            a += 1
        if g == 2:
            c += 1
        if g == 0:
            d += 1
        if i % (N//10) == 0:
            print(f"{i}/{N} games played in {round(time.time()-t,2)} seconds")
    print(f"AI won {round(a/N*100,2)}% of the games, the challenge bot won {round(c/N*100,2)}% of the games, and {round(d/N*100,2)}% of the games were draws")

train_against_challenge_bot(ai1, challenge_bot1, N = 1000)

NameError: name 'ai1' is not defined

In [11]:
def duel_bot(bot1, bot2, N = 1000):
    b1 = 0
    b2 = 0
    d = 0
    t = time.time()
    for i in range(N):
        g = play_game(bot1, bot2)
        if g == 1:
            b1 += 1
        if g == 2:
            b2 += 1
        if g == 0:
            d += 1      
        if i % (N//10) == 0:
            print(f"{i}/{N} games played in {round(time.time()-t,2)} seconds")
    print(f"bot 1 won {round(b1/N*100,2)}% of the games, bot 2 won {round(b2/N*100,2)}% of the games, and {round(d/N*100,2)}% of the games were draws")

# bots available:
# bot_ai1 - AI
# bot_ai2 - AI
# random_bot - always do a random legal move
# cat - always play the same move as last time, if that is not possible, do a random move
# gravity - always fill the lowest column, randomize between the lowest columns if there are multiple
# gotcha - checks if a winning move or a move that prevents the opponent from winning can be made, if not, do a random move
# treebeard - searches for the best move by looking ahead some moves and calculating a sum

duel_bot(treebeard, gotcha, 10)

Column is full
Column is full
Column is full
Column is full
Column is full
Column is full
Column is full
Column is full
0/10 games played in 1.85 seconds
1/10 games played in 3.28 seconds
Column is full
Column is full
2/10 games played in 4.88 seconds
3/10 games played in 6.64 seconds
Column is full
Column is full
4/10 games played in 8.25 seconds
5/10 games played in 9.44 seconds
6/10 games played in 10.78 seconds
7/10 games played in 12.74 seconds
Column is full
8/10 games played in 14.36 seconds
Column is full
9/10 games played in 15.79 seconds
bot 1 won 50.0% of the games, bot 2 won 40.0% of the games, and 10.0% of the games were draws


In [13]:
class Memory:
    def __init__(self, current):
        self.current = current


memory = Memory(0)
def tetris_bot(board, player, last_move):
    priority = [3, 4, 2, 1, 5, 6, 0]
    for i in range(len(priority)):
        if board[0][priority[i]] == 0:
            return priority[i]
        
def nokia(board, player, last_move):
    liste = [0,1,2,3,4,5,6,5,4,3,2,1]
    while True:
        if board[0][liste[memory.current]] == 0:
            a = memory.current
            memory.current+=1
            memory.current%=12
            return liste[a]
        else:
            memory.current+=1
            memory.current%=12
        
def moist_bot(board, player, last_move):
    a = random.randint(0,6)
    try:
        if board[0][last_move] == 0 and a<4:
            return last_move
        else:
            while True:
                a = random.randint(0,6)
                if board[0][a] == 0:
                    return a
    except:
        return random.randint(2,4)

def anish_giri(board, player, last_move):
    dict = {
    0:6,
    1:5,
    2:4,
    3:3,
    6:0,
    5:1,
    4:2
    }

    try:
        if board[0][dict[last_move]] == 0:
            return dict[last_move]
        else:
            while True:
                a = random.randint(0,6)
                if board[0][a] == 0:
                    return a
    except:
        while True:
            a = random.randint(0,6)
            if board[0][a] == 0:
                return a    


def zig(board, player, last_move):
    a = random.randint(-1,1)
    try:
        if board[0][last_move+a] == 0:
            if last_move+a == -1:
                return 6
            return last_move+a
        
        else:
            while True:
                a = random.randint(0,6)
                if board[0][a] == 0:
                    return a
    except:
        while True:
            a = random.randint(0,6)
            if board[0][a] == 0:
                return a

In [40]:
def treebeard(board, player, last_move, depth = 3):
    # looks ahead a certain number of moves and then chooses the move that will give the best outcome
    # for each of the 7 possible moves, we look ahead a certain number of moves
    # if we win, we add 2 to the score
    # if we lose, we subtract 2 from the score
    # if we draw, we add -0.5 to the score
    # if there is no winner, we add 0.5
    # overall, we want to maximize the score

    def search(board, player, depth):
        # is a recursive function that looks ahead a certain number of moves
        # each time it is called, it looks ahead one move less
        # when it terminates, the score of the board multiplied with (depth+1), so that the winning moves that happens later are less important (since depth decreases each time)
        # it may terminate if
        # 1) it is called with depth = 0
        # 2) we win
        # 3) we we draw
        # we don't need to check if we lose, since we are always looking at the board from our perspective
        # if it does not terminate, it calls itself with depth-1 for each possible move
        # we want to maximize the score, but since we are swapping players each time, always return the negative of the score

        if depth == 0:
            return 0.5 * (depth + 1)
        
        scores = []
        for action in range(7):
            if board[0][action] != 0:
                continue
            board_copy = board.copy()
            for i in range(5, -1, -1):
                if board_copy[i][action] == 0:
                    board_copy[i][action] = player
                    break
            
            if check_winner(board_copy, player):
                return 2 * (depth + 1)
            if 0 not in board_copy[0]:
                return 0.5 * (depth + 1)
            scores.append(-search(board_copy, 3 - player, depth - 1))
        
        # sum up the scored and return the negative of it
        if len(scores) == 0:
            return 0
        return sum(scores) / len(scores)
    
    scores = []
    for first_move in range(7):
        if board[0][first_move] != 0:
            continue
        board_copy = board.copy()
        for i in range(5, -1, -1):
            if board_copy[i][first_move] == 0:
                board_copy[i][first_move] = player
                break
            
        if check_winner(board_copy, player):
            return first_move
        
        scores.append(-search(board_copy, 3 - player, depth))
    
    # sort the indexes 0-7 based on the scores
    indexes = np.argsort(scores)
    for i in range(7):
        if board[0][indexes[i]] == 0:
            return indexes[i]   

duel_bot(moist_bot, treebeard, 100)

# visual_game(tetris_bot, treebeard)

0/100 games played in 0.87 seconds


IndexError: index 2 is out of bounds for axis 0 with size 2