In [None]:
from stable_baselines3 import PPO
import numpy as np
import time
from scipy.ndimage import convolve

In [None]:
def make_horizontal_win_board(lenght_to_win):
    reward_board = [[]]
    for x in range(lenght_to_win):
        reward_board[0].append(1)
    return reward_board

def make_vertical_win_board(lenght_to_win):
    reward_board = []
    for y in range(lenght_to_win):
        reward_board.append([1])
    return reward_board

def make_left_up_diagonal_win_board(lenght_to_win):
    reward_board = []
    for y in range(lenght_to_win):
        reward_board.append([])
        for x in range(lenght_to_win):
            if y==x:
                reward_board[y].append(1)
            else:
                reward_board[y].append(0)
    return reward_board

def make_right_up_diagonal_win_board(lenght_to_win):
    reward_board = make_left_up_diagonal_win_board(lenght_to_win)
    reward_board = np.flipud(reward_board)
    return reward_board


def flat_to_poss_in2d_array(n,size,player):
    y = int((n)/size)
    x = (n)- y*size
    action = [player,y,x]
    return action

def check_for_win(board,lenght_to_win):
    win_conditions = [make_vertical_win_board(lenght_to_win),make_horizontal_win_board(lenght_to_win),make_left_up_diagonal_win_board(lenght_to_win),make_right_up_diagonal_win_board(lenght_to_win)]
    for win_board in win_conditions:
        is_win = convolve(board,win_board,mode="constant")
        for y in is_win:
            for x in y:
                if x == lenght_to_win:
                    return True
    return False



def is_draw(playing_boards,board_size):
    if np.sum(playing_boards) == board_size**2:
        return True
    else:
        return False
    

def all_posible_moves(player,boards):
    posible_moves = []
    for y_index, y in enumerate(boards[0]):
        for x_index, x in enumerate(y):
            if boards[0][y_index][x_index] == 0 and boards[1][y_index][x_index] == 0:
                posible_moves.append([player,y_index,x_index])
    return posible_moves


def best_move_by_minimax(board):
    best_score = -1000
    bestmove = None
    for move in all_posible_moves(0,board):
        board[move[0]][move[1]][move[2]] = 1
        score = minmax(board,False)
        board[move[0]][move[1]][move[2]] = 0
        if score == 1:
            return move
        if score > best_score:
            best_score = score
            bestmove = move
        

    return bestmove

def minmax(boards,is_maximazing, depth = 0):
    player_win = check_for_win(boards[0],len(boards[0]))
    oponent_win = check_for_win(boards[1],len(boards[0]))
    draw = is_draw(boards,len(boards[0]))
    if oponent_win:
        return -1
    elif player_win:
        return 1
    else:
        if draw:
            return 0 

    if is_maximazing:
        best_score = -1000
        for move in all_posible_moves(0,boards):
            boards[0][move[1]][move[2]] = 1
            score = minmax(boards,False,depth+1)
            boards[0][move[1]][move[2]] = 0

            if score == 1: # not gonna get better result than this
                return 1

            best_score = max(score, best_score)
             
    else:
        best_score = 1000
        for move in all_posible_moves(0,boards):
            boards[1][move[1]][move[2]] = 1
            score = minmax(boards,True,depth+1)
            boards[1][move[1]][move[2]] = 0
            
            if score == -1: # not gonna get better result than this
                 return -1
            
            best_score = min(score, best_score)

    return best_score 



In [None]:
model = PPO.load(r"best_model_2000000.zip")


In [None]:
lenght_to_win = 3
board_size=3

board = np.zeros((2,board_size,board_size)).astype(int)
display_board = np.array(board[0])
for index_y, y in enumerate(board[1]):
    for index_x, x in enumerate(y):
        if board[1][index_y][index_x] == 1:
            display_board[index_y][index_x] = 2
#display(display_board)


done = False
while not done:
    print("current_board:")
    display_board = np.array(board[0])
    for index_y, y in enumerate(board[1]):
        for index_x, x in enumerate(y):
            if board[1][index_y][index_x] == 1:
                display_board[index_y][index_x] = 2
    print(display_board)
    #time.sleep(1)


    print("AI turn: ")
    #time.sleep(1)
    rewersed_boards = np.array([board[1],board[0]])
    prediction = model.predict(rewersed_boards)
    prediction = prediction[0] # obasuhe tuple s vísledkem a NONE
    prediction = prediction.argmax()
    prediction = flat_to_poss_in2d_array(prediction,board_size,1)
    board[prediction[0]][prediction[1]][prediction[2]] = 1

    print("current_board:")
    display_board = np.array(board[0])
    for index_y, y in enumerate(board[1]):
        for index_x, x in enumerate(y):
            if board[1][index_y][index_x] == 1:
                display_board[index_y][index_x] = 2
    print(display_board)


    print("minmax move:")
    best_move = best_move_by_minimax(board)
    board[0][best_move[1]][best_move[2]] = 1



    player_win = check_for_win(board[0],lenght_to_win)
    oponent_win = check_for_win(board[1],lenght_to_win)
    draw = is_draw(board,board_size)

    if player_win or oponent_win or draw:
        done = True
    else:
        done = False
    
    if player_win:
        print("minmax wins!")
    elif oponent_win:
        print("AI wins!")
    
    if draw and not(player_win or oponent_win):
        print("its a draw!")


