In [4]:
import sys
import os
import numpy as np
import time

# 현재 파일(main.ipynb)이 있는 디렉토리의 부모 디렉토리를 sys.path에 추가
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [5]:
STATE_SHAPE = (9,9) # 9,9
N_ACTIONS = STATE_SHAPE[0]*STATE_SHAPE[1]
STATE_DIM = 3 # ( my_actions, enemy_actions, first player ) 
BOARD_SHAPE = (STATE_DIM, *STATE_SHAPE)
WINNING_CONDITION = 5

In [27]:
class State1:
    def __init__(self, my_actions=None, enemy_actions=None, state_shape=STATE_SHAPE):
        # mine, enemy's action
        self.my_actions = [] if my_actions is None else my_actions
        self.enemy_actions = [] if enemy_actions is None else enemy_actions

        self.state_shape = state_shape

        # state info about action space
        self.action_space = range(self.state_shape[0]*self.state_shape[1])
        self.n_actions = len(self.action_space)

        # create board
        self.board = self._create_board(self.my_actions, self.enemy_actions)

        # calculate legal actions
        self.legal_actions = self._get_legal_actions()
        self.winning_condition = WINNING_CONDITION

        self.done_condition = None

    def next(self, action):
        my_actions = self.my_actions.copy()
        my_actions.append(action)
        return State1(self.enemy_actions, my_actions)

    def _create_board(self, my_actions, enemy_actions):
        # 전체 state
        total_board = np.zeros(shape=(STATE_DIM, *self.state_shape))

        # 내 말과 상대방 말이 놓인 보드를 원핫인코딩으로 표현
        my_board, enemy_board = np.zeros(self.n_actions), np.zeros(self.n_actions)

        my_board[my_actions] = 1
        enemy_board[enemy_actions] = 1

        total_board[0] = my_board.reshape(self.state_shape)
        total_board[1] = enemy_board.reshape(self.state_shape)
        total_board[2] = np.full(self.state_shape, fill_value=1) if not self.is_first_player() else np.zeros(self.state_shape)

        return total_board

    def _get_legal_actions(self):
        my_actions_set = set(self.my_actions)
        enemy_actions_set = set(self.enemy_actions)

        return list(set(self.action_space) - my_actions_set - enemy_actions_set)

    def _check_winning_condition(self, board):
        def _check_row_consecutive(single_arr):
            for i in range(len(single_arr) - self.winning_condition + 1):
                if all(single_arr[i:i+self.winning_condition]):    
                    return True
            return False
        
        board = board.astype(bool)
        combined_board = np.concatenate([board,
                                         board.T])

        is_row = np.any(np.apply_along_axis(lambda x: _check_row_consecutive(x), axis=1, arr=combined_board))

        if is_row:
            return True
        
        # 세로, 대각 조건이 맞는지 
        indices = np.arange(N_ACTIONS)[board.reshape(-1)] 

        for index in indices:
            diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]+1)+1, STATE_SHAPE[1]+1))
            anti_diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]-1)+1, STATE_SHAPE[1]-1))
            # col_lst = list(range(index, index+(WINNING_CONDITION-1)*STATE_SHAPE[1]+1, STATE_SHAPE[1]))

            is_diag = all(element in indices for element in diag_lst)
            is_anti_diag = all(element in indices for element in anti_diag_lst)
            # is_col = all(element in indices for element in col_lst)

            if is_diag or is_anti_diag:
                return True 
                
        return False


    def is_win(self):
        my_state = self.board[0]
        condition = self._check_winning_condition(my_state)
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_draw(self):
        condition = (np.sum(self.board[0]) + np.sum(self.board[1])) >= self.n_actions
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_lose(self):
        enemy_state = self.board[1]
        condition = self._check_winning_condition(enemy_state)
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_done(self):
        if self.done_condition is not None:
            return self.done_condition
        else:
            return self.is_win() or self.is_draw() or self.is_lose()

    def is_first_player(self):
        return (len(self.my_actions) + len(self.enemy_actions)) % 2 == 1

    def _render_board_to_str(self):
        
        board = self.board[0] + self.board[1] * -1 if self.is_first_player() else self.board[0] * -1 + self.board[1]

        mapping = {0: '.', 1: '●', -1 : '○'}

        # Create column legend (header)
        col_legend = '  ' + ' '.join(map(str, range(board.shape[1])))

        # Create rows with row legend (A, B, C, ...)
        row_legend = []

        for i, row in enumerate(board):
            row_label = chr(65 + i)  # Convert row index to A, B, C, ...
            row_str = ' '.join(mapping[val] for val in row)
            row_legend.append(f"{row_label} {row_str}")

        # Combine the column legend and rows
        return '\n'.join([col_legend] + row_legend)

    def __call__(self):
        return self.board[:2]

    def __str__(self):
        return self._render_board_to_str()

In [7]:
class State2:
    def __init__(self, my_actions=None, enemy_actions=None, state_shape=STATE_SHAPE):
        # mine, enemy's action
        self.my_actions = [] if my_actions is None else my_actions
        self.enemy_actions = [] if enemy_actions is None else enemy_actions

        self.state_shape = state_shape

        # state info about action space
        self.action_space = range(self.state_shape[0]*self.state_shape[1])
        self.n_actions = len(self.action_space)

        # create board
        self.board = self._create_board(self.my_actions, self.enemy_actions)

        # calculate legal actions
        self.legal_actions = self._get_legal_actions()
        self.winning_condition = WINNING_CONDITION

        self.done_condition = None

    def next(self, action):
        my_actions = self.my_actions.copy()
        my_actions.append(action)
        return State2(self.enemy_actions, my_actions)

    def _create_board(self, my_actions, enemy_actions):
        # 전체 state
        total_board = np.zeros(shape=(STATE_DIM, *self.state_shape))

        # 내 말과 상대방 말이 놓인 보드를 원핫인코딩으로 표현
        my_board, enemy_board = np.zeros(self.n_actions), np.zeros(self.n_actions)

        my_board[my_actions] = 1
        enemy_board[enemy_actions] = 1

        total_board[0] = my_board.reshape(self.state_shape)
        total_board[1] = enemy_board.reshape(self.state_shape)
        total_board[2] = np.full(self.state_shape, fill_value=1) if not self.is_first_player() else np.zeros(self.state_shape)

        return total_board

    def _get_legal_actions(self):
        my_actions_set = set(self.my_actions)
        enemy_actions_set = set(self.enemy_actions)

        return list(set(self.action_space) - my_actions_set - enemy_actions_set)

    def _check_winning_condition(self, board):
        def _check_consecutive(single_arr):
            consecutive_count = 0

            for value in single_arr:
                if value: 
                    consecutive_count += 1
                    if consecutive_count == self.winning_condition:
                        return True

                else:  
                    consecutive_count = 0  # reset counter

            return False
        
        def _check_diag_consecutive(board):

            indices = np.arange(N_ACTIONS)[board.astype(bool).reshape(-1)] 

            for index in indices:
                diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]+1)+1, STATE_SHAPE[1]+1))
                anti_diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]-1)+1, STATE_SHAPE[1]-1))

                is_diag = all(element in indices for element in diag_lst)
                is_anti_diag = all(element in indices for element in anti_diag_lst)

                if is_diag or is_anti_diag:
                    return True 
                
            return False

        combined_board = np.concatenate([board, 
                                         board.T])
        
        result = np.any(np.apply_along_axis(lambda x: _check_consecutive(x), axis=1, arr=combined_board))

        if result is True:
            return result
        
        else:
            result = _check_diag_consecutive(board)

        return result


    def is_win(self):
        my_state = self.board[0]
        condition = self._check_winning_condition(my_state)
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_draw(self):
        condition = (np.sum(self.board[0]) + np.sum(self.board[1])) >= self.n_actions
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_lose(self):
        enemy_state = self.board[1]
        condition = self._check_winning_condition(enemy_state)
        self.done_condition = condition if self.done_condition is not True else self.done_condition
        return condition

    def is_done(self):
        if self.done_condition is not None:
            return self.done_condition
        else:
            return self.is_win() or self.is_draw() or self.is_lose()

    def is_first_player(self):
        return (len(self.my_actions) + len(self.enemy_actions)) % 2 == 1

    def _render_board_to_str(self):
        
        board = self.board[0] + self.board[1] * -1 if self.is_first_player() else self.board[0] * -1 + self.board[1]

        mapping = {0: '.', 1: '●', -1 : '○'}

        # Create column legend (header)
        col_legend = '  ' + ' '.join(map(str, range(board.shape[1])))

        # Create rows with row legend (A, B, C, ...)
        row_legend = []

        for i, row in enumerate(board):
            row_label = chr(65 + i)  # Convert row index to A, B, C, ...
            row_str = ' '.join(mapping[val] for val in row)
            row_legend.append(f"{row_label} {row_str}")

        # Combine the column legend and rows
        return '\n'.join([col_legend] + row_legend)

    def __call__(self):
        return self.board[:2]

    def __str__(self):
        return self._render_board_to_str()

In [34]:
my_list = list(range(23, 55, 8)) + list(range(5,12))
enemy_list = [1,2,3,4] + list(range(70,77))

In [35]:
start = time.time()

s = State1(my_list, enemy_list)
print(s)
print()
print(f"is first player? : {s.is_first_player()}")
print(f"is win : {s.is_win()}")
print(f"is draw : {s.is_draw()}")
print(f"is lose : {s.is_lose()}")
print(f"is done : {s.is_done()}")

print(time.time() - start)

  0 1 2 3 4 5 6 7 8
A . ● ● ● ● ○ ○ ○ ○
B ○ ○ ○ . . . . . .
C . . . . . ○ . . .
D . . . . ○ . . . .
E . . . ○ . . . . .
F . . ○ . . . . . .
G . . . . . . . . .
H . . . . . . . ● ●
I ● ● ● ● ● . . . .

is first player? : False
is win : False
is draw : False
is lose : True
is done : True
0.001032114028930664


In [36]:
start = time.time()

s = State2(my_list, enemy_list)
print(s)
print()
print(f"is first player? : {s.is_first_player()}")
print(f"is win : {s.is_win()}")
print(f"is draw : {s.is_draw()}")
print(f"is lose : {s.is_lose()}")
print(f"is done : {s.is_done()}")

print(time.time() - start)

  0 1 2 3 4 5 6 7 8
A . ● ● ● ● ○ ○ ○ ○
B ○ ○ ○ . . . . . .
C . . . . . ○ . . .
D . . . . ○ . . . .
E . . . ○ . . . . .
F . . ○ . . . . . .
G . . . . . . . . .
H . . . . . . . ● ●
I ● ● ● ● ● . . . .

is first player? : False
is win : False
is draw : False
is lose : False
is done : False
0.0011241436004638672


In [13]:
class State:
    def __init__(self, my_actions=None, enemy_actions=None, state_shape=STATE_SHAPE):
        # mine, enemy's action
        self.my_actions = [] if my_actions is None else my_actions
        self.enemy_actions = [] if enemy_actions is None else enemy_actions

        self.state_shape = state_shape

        # state info about action space
        self.action_space = range(self.state_shape[0]*self.state_shape[1])
        self.n_actions = len(self.action_space)

        # create board
        self.board = self._create_board(self.my_actions, self.enemy_actions)

        # calculate legal actions
        self.legal_actions = self._get_legal_actions()
        self.winning_condition = WINNING_CONDITION

        self.done_condition = [None] * 3 # win, draw, lose 

    def next(self, action):
        my_actions = self.my_actions.copy()
        my_actions.append(action)
        return State(self.enemy_actions, my_actions)

    def _create_board(self, my_actions, enemy_actions):
        # 전체 state
        total_board = np.zeros(shape=(STATE_DIM, *self.state_shape))

        # 내 말과 상대방 말이 놓인 보드를 원핫인코딩으로 표현
        my_board, enemy_board = np.zeros(self.n_actions), np.zeros(self.n_actions)

        my_board[my_actions] = 1
        enemy_board[enemy_actions] = 1

        total_board[0] = my_board.reshape(self.state_shape)
        total_board[1] = enemy_board.reshape(self.state_shape)
        total_board[2] = np.full(self.state_shape, fill_value=1) if not self.is_first_player() else np.zeros(self.state_shape)

        return total_board

    def _get_legal_actions(self):
        my_actions_set = set(self.my_actions)
        enemy_actions_set = set(self.enemy_actions)

        return list(set(self.action_space) - my_actions_set - enemy_actions_set)

    def _check_winning_condition(self, board):
        
        indices = np.arange(N_ACTIONS)[board.astype(bool).reshape(-1)] 

        for index in indices:
            _, col = divmod(index, STATE_SHAPE[1]) 

            if col < STATE_SHAPE[1] - WINNING_CONDITION:
                row_lst = list(range(index, index+WINNING_CONDITION))
                is_row = all(element in indices for element in row_lst)
                
                if is_row:
                    return True
                
            diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]+1)+1, STATE_SHAPE[1]+1))
            anti_diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]-1)+1, STATE_SHAPE[1]-1))
            col_lst = list(range(index, index+(WINNING_CONDITION-1)*STATE_SHAPE[1]+1, STATE_SHAPE[1]))

            is_diag = all(element in indices for element in diag_lst)
            is_anti_diag = all(element in indices for element in anti_diag_lst)
            is_col = all(element in indices for element in col_lst)

            if is_diag or is_anti_diag or is_col:
                return True 
                
        return False


    def is_win(self):
        my_state = self.board[0]
        condition = self._check_winning_condition(my_state)
        self.done_condition[0] = condition
        return condition

    def is_draw(self):
        condition = (np.sum(self.board[0]) + np.sum(self.board[1])) >= self.n_actions
        self.done_condition[1] = condition
        return condition

    def is_lose(self):
        enemy_state = self.board[1]
        condition = self._check_winning_condition(enemy_state)
        self.done_condition[2] = condition
        return condition

    def is_done(self):
        if None in self.done_condition:
            return self.is_win() or self.is_draw() or self.is_lose()
        else:
            return any(self.done_condition)
            

    def is_first_player(self):
        return (len(self.my_actions) + len(self.enemy_actions)) % 2 == 1

    def _render_board_to_str(self):
        
        board = self.board[0] + self.board[1] * -1 if self.is_first_player() else self.board[0] * -1 + self.board[1]

        mapping = {0: '.', 1: '●', -1 : '○'}

        # Create column legend (header)
        col_legend = '  ' + ' '.join(map(str, range(board.shape[1])))

        # Create rows with row legend (A, B, C, ...)
        row_legend = []

        for i, row in enumerate(board):
            row_label = chr(65 + i)  # Convert row index to A, B, C, ...
            row_str = ' '.join(mapping[val] for val in row)
            row_legend.append(f"{row_label} {row_str}")

        # Combine the column legend and rows
        return '\n'.join([col_legend] + row_legend)

    def __call__(self):
        return self.board[:2]

    def __str__(self):
        return self._render_board_to_str()

In [9]:
my_list = list(range(23, 55, 8)) + list(range(5,12))
enemy_list = [1,2,3,4,13] + list(range(70,77))

In [14]:
start = time.time()

s = State(my_list, enemy_list)
print(s)
print()
print(f"is first player? : {s.is_first_player()}")
print(f"is win : {s.is_win()}")
print(f"is draw : {s.is_draw()}")
print(f"is lose : {s.is_lose()}")
print(f"is done : {s.is_done()}")

print(time.time() - start)

  0 1 2 3 4 5 6 7 8
A . ○ ○ ○ ○ ● ● ● ●
B ● ● ● . ○ . . . .
C . . . . . ● . . .
D . . . . ● . . . .
E . . . ● . . . . .
F . . ● . . . . . .
G . . . . . . . . .
H . . . . . . . ○ ○
I ○ ○ ○ ○ ○ . . . .

is first player? : True
is win : False
is draw : False
is lose : True
is done : True
0.0011029243469238281


In [47]:
l = [True, False, False]
any(l)

True

In [26]:
class State:
    def __init__(self, my_actions=None, enemy_actions=None, state_shape=STATE_SHAPE):
        # mine, enemy's action
        self.my_actions = [] if my_actions is None else my_actions
        self.enemy_actions = [] if enemy_actions is None else enemy_actions

        self.state_shape = state_shape

        # state info about action space
        self.action_space = range(self.state_shape[0]*self.state_shape[1])
        self.n_actions = len(self.action_space)

        # create board
        self.board = self._create_board(self.my_actions, self.enemy_actions)

        # calculate legal actions
        self.legal_actions = self._get_legal_actions()
        self.winning_condition = WINNING_CONDITION

        self.done_condition = [None] * 3 # win, draw, lose 

        # mask & unavailble idx list 
        self.diag_mask = self._make_diag_mask()
        self.anti_diag_mask = self.diag_mask[:,::-1]

        self.diag_idx_list = list(np.arange(N_ACTIONS).reshape(STATE_SHAPE)[self.diag_mask])
        self.anti_diag_idx_list = list(np.arange(N_ACTIONS).reshape(STATE_SHAPE)[self.anti_diag_mask])

    def _make_diag_mask(self):
        mask = np.zeros(shape=STATE_SHAPE, dtype=bool)
        mask[:, -(WINNING_CONDITION-1):] = 1
        return mask

    def next(self, action):
        my_actions = self.my_actions.copy()
        my_actions.append(action)
        return State(self.enemy_actions, my_actions)

    def _create_board(self, my_actions, enemy_actions):
        # 전체 state
        total_board = np.zeros(shape=(STATE_DIM, *self.state_shape))

        # 내 말과 상대방 말이 놓인 보드를 원핫인코딩으로 표현
        my_board, enemy_board = np.zeros(self.n_actions), np.zeros(self.n_actions)

        my_board[my_actions] = 1
        enemy_board[enemy_actions] = 1

        total_board[0] = my_board.reshape(self.state_shape)
        total_board[1] = enemy_board.reshape(self.state_shape)
        total_board[2] = np.full(self.state_shape, fill_value=1) if not self.is_first_player() else np.zeros(self.state_shape)

        return total_board

    def _get_legal_actions(self):
        my_actions_set = set(self.my_actions)
        enemy_actions_set = set(self.enemy_actions)

        return list(set(self.action_space) - my_actions_set - enemy_actions_set)

    def _check_winning_condition(self, board):
        def _check_row_consecutive(single_arr):
            for i in range(len(single_arr) - self.winning_condition + 1):
                if all(single_arr[i:i+self.winning_condition]):    
                    return True
            return False
        
        board = board.astype(bool)

        is_row = np.any(np.apply_along_axis(lambda x: _check_row_consecutive(x), axis=1, arr=board))

        if is_row:
            return True
        
        # 세로, 대각 조건이 맞는지 
        indices = np.arange(N_ACTIONS)[board.reshape(-1)] 

        for index in indices:
            if index not in self.anti_diag_idx_list:
                anti_diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]-1)+1, STATE_SHAPE[1]-1))
                is_anti_diag = all(element in indices for element in anti_diag_lst)

                if is_anti_diag:
                    return True 
                
            if index not in self.diag_idx_list:
                diag_lst = list(range(index, index+(WINNING_CONDITION-1)*(STATE_SHAPE[1]+1)+1, STATE_SHAPE[1]+1))
                is_diag = all(element in indices for element in diag_lst)

                if is_diag:
                    return True 
                
            
            col_lst = list(range(index, index+(WINNING_CONDITION-1)*STATE_SHAPE[1]+1, STATE_SHAPE[1]))
            is_col = all(element in indices for element in col_lst)

            if is_col:
                return True 
                
        return False


    def is_win(self):
        my_state = self.board[0]
        condition = self._check_winning_condition(my_state)
        self.done_condition[0] = condition
        return condition

    def is_draw(self):
        condition = (np.sum(self.board[0]) + np.sum(self.board[1])) >= self.n_actions
        self.done_condition[1] = condition
        return condition

    def is_lose(self):
        enemy_state = self.board[1]
        condition = self._check_winning_condition(enemy_state)
        self.done_condition[2] = condition
        return condition

    def is_done(self):
        if None in self.done_condition:
            return self.is_win() or self.is_draw() or self.is_lose()
        else:
            return any(self.done_condition)
            

    def is_first_player(self):
        return (len(self.my_actions) + len(self.enemy_actions)) % 2 == 0

    def _render_board_to_str(self):
        
        board = self.board[0] + self.board[1] * -1 if self.is_first_player() else self.board[0] * -1 + self.board[1]
        mapping = {0: '.', 1: '●', -1 : '○'}

        # Create column legend (header)
        col_legend = '  ' + ' '.join(map(str, range(board.shape[1])))

        # Create rows with row legend (A, B, C, ...)
        row_legend = []

        for i, row in enumerate(board):
            row_label = chr(65 + i)  # Convert row index to A, B, C, ...
            row_str = ' '.join(mapping[val] for val in row)
            row_legend.append(f"{row_label} {row_str}")

        # Combine the column legend and rows
        return '\n'.join([col_legend] + row_legend)
    
    def __call__(self):
        return self.board[:2]

    def __str__(self):
        return self._render_board_to_str()

In [14]:
it = iter(range(81))

In [30]:
# my_list = list(range(2, 35, 8)) 
i = next(it)
my_list = list(range(i,i+4*10+1,10))
# my_list = list(range(i,i+4*8+1,8))
# my_list = list(range(i,i+4*9+1,9))
# my_list = list(range(i,i+5))
enemy_list = list(range(70,75))

start = time.time()

s = State(my_list, enemy_list)
print(s)
print()
print(f"is first player? : {s.is_first_player()}")
print(f"is win : {s.is_win()}")
print(f"is draw : {s.is_draw()}")
print(f"is lose : {s.is_lose()}")
print(f"is done : {s.is_done()}")

print(time.time() - start)

  0 1 2 3 4 5 6 7 8
A . . . . . . . . .
B ● . . . . . . . .
C . ● . . . . . . .
D . . ● . . . . . .
E . . . ● . . . . .
F . . . . ● . . . .
G . . . . . . . . .
H . . . . . . . ○ ○
I ○ ○ ○ . . . . . .

is first player? : True
is win : True
is draw : False
is lose : False
is done : True
0.0012767314910888672


In [73]:
np.arange(81).reshape(9,9)[mask1[:,::-1]]

array([ 0,  1,  2,  3,  9, 10, 11, 18, 19, 27])

In [31]:
class State1:
    def __init__(self, board_size, pieces=None, enemy_pieces=None):
        self.board_size = board_size
        self.pieces = np.zeros(board_size * board_size, dtype=int) if pieces is None else np.array(pieces)
        self.enemy_pieces = np.zeros(board_size * board_size, dtype=int) if enemy_pieces is None else np.array(enemy_pieces)

    def piece_count(self, pieces):
        return np.sum(pieces)

    def check_five_in_a_row(self, board):
        # 보드의 모든 가로, 세로, 대각선에서 연속된 5개의 돌이 있는지 확인
        def check_line(line):
            count = 0
            for cell in line:
                if cell == 1:
                    count += 1
                    if count == 5:  # 연속된 5개를 찾으면 True 반환
                        return True
                else:
                    count = 0
            return False

        # 가로, 세로 확인
        for i in range(self.board_size):
            if check_line(board[i, :]) or check_line(board[:, i]):
                return True

        # 대각선 확인
        for offset in range(-self.board_size + 1, self.board_size):
            if check_line(np.diag(board, k=offset)) or check_line(np.diag(np.fliplr(board), k=offset)):
                return True

        return False

    def is_lose(self):
        board = self.enemy_pieces.reshape(self.board_size, self.board_size)
        return self.check_five_in_a_row(board)

    def is_draw(self):
        return np.sum(self.pieces + self.enemy_pieces) == self.board_size * self.board_size

    def is_done(self):
        # 종료 조건: 상대방이 승리 or 무승부
        return self.is_lose() or self.is_draw()

    def next(self, action):
        # 현재 상태에서 주어진 action(칸)에 말을 놓은 후 다음 상태 반환
        pieces = self.pieces.copy()
        pieces[action] = 1

        # 턴을 교체해서 다음 상태 반환 (플레이어 교체)
        return State1(self.board_size, self.enemy_pieces, pieces)

    def legal_actions(self):
        # 가능한 행동(빈 칸의 위치)을 반환
        return np.where((self.pieces + self.enemy_pieces) == 0)[0]

    def to_feature(self):
        # 상태를 신경망 입력 형태로 변환 (2, board_size, board_size)
        return np.stack([
            self.pieces.reshape(self.board_size, self.board_size),
            self.enemy_pieces.reshape(self.board_size, self.board_size)
        ], axis=0).astype(np.float32)
    
    def __str__(self):
        """
        ●: 검은 돌 (플레이어 1), ○: 흰 돌 (플레이어 2), ·: 빈 칸
        """
        ox = ('●', '○') if np.sum(self.pieces) == np.sum(self.enemy_pieces) else ('○', '●')
        mapping = {0: '·', 1: ox[0], -1: ox[1]}

        board = np.zeros((self.board_size, self.board_size), dtype=int)
        board += self.pieces.reshape(self.board_size, self.board_size) * 1  # 검은 돌 (1)
        board -= self.enemy_pieces.reshape(self.board_size, self.board_size) * 1  # 흰 돌 (-1)

        return '\n'.join([' '.join(f"{mapping[val]:<2}" for val in row) for row in board])


In [33]:
# 보드 크기 (5x5)
board_size = 9
st = s().astype(int)

# 초기 상태: 빈 보드
state = State1(board_size,st[1], st[0])

# 상태 출력
print("초기 상태:")
print(state)
print(state.is_done())
print(state.is_draw())
print(state.is_lose())

초기 상태:
·  ·  ·  ·  ·  ·  ·  ·  · 
○  ·  ·  ·  ·  ·  ·  ·  · 
·  ○  ·  ·  ·  ·  ·  ·  · 
·  ·  ○  ·  ·  ·  ·  ·  · 
·  ·  ·  ○  ·  ·  ·  ·  · 
·  ·  ·  ·  ○  ·  ·  ·  · 
·  ·  ·  ·  ·  ·  ·  ·  · 
·  ·  ·  ·  ·  ·  ·  ●  ● 
●  ●  ●  ·  ·  ·  ·  ·  · 
True
False
True
