In [None]:
class TicTacToe:
    def __init__(self):
        self.N = 3
        self.map = [['E' for _ in range(self.N)] for _ in range(self.N)]    # E: 빈 공간(Empty)
        self.map_index_description = [h*self.N + w for h in range(self.N) for w in range(self.N)]
        self.player_types = ('X', 'O')  # 선공: X, 후공: O
        self.global_step = 0

        self.win_reward = 1.0
        self.defeat_reward = -1.0
        self.draw_reward = 0.0
        self.player_result = {'X': self.draw_reward, 'O': self.draw_reward}

        self.done = False

    def reset(self):
        self.map = [['E' for _ in range(self.N)] for _ in range(self.N)]
        self.global_step = 0
        self.player_result = {'X': self.draw_reward, 'O': self.draw_reward}
        self.done = False

        return self.map

    def step(self, action):
        action_coord_h, action_coord_w = self.transform_action(action)
        if self.global_step % 2 == 0:
            current_player_idx = 0
            other_player_idx = 1
        else:
            current_player_idx = 1
            other_player_idx = 0
        current_player_type = self.player_types[current_player_idx]
        other_player_type = self.player_types[other_player_idx]
        if self.map[action_coord_h][action_coord_w] == 'E':
            self.map[action_coord_h][action_coord_w] = current_player_type
            if self.is_win(current_player_type):    # 현재 플레이어 승리
                self.player_result[current_player_type] = self.win_reward
                self.player_result[other_player_type] = self.defeat_reward
                self.done = True
            elif self.is_full():    # 무승부
                self.done = True
            else:
                pass
        else:   # 현재 플레이어 패배
            self.player_result[current_player_type] = self.defeat_reward
            self.player_result[other_player_type] = self.win_reward
            self.done = True
        self.global_step += 1

        return self.map, self.player_result, self.done

    def transform_action(self, action):
        return divmod(action, self.N)

    def is_win(self, current_player_type):
        vertical_win = [True for _ in range(self.N)]
        horizontal_win = [True for _ in range(self.N)]
        diagonal_win = [True for _ in range(2)]
        for h in range(self.N):
            for w in range(self.N):
                # 가로, 세로
                if self.map[h][w] != current_player_type:
                    vertical_win[h] = False
                    horizontal_win[w] = False
                else:
                    pass
                # 왼 대각
                if h == w and self.map[h][w] != current_player_type:
                    diagonal_win[0] = False
                # 오른 대각
                rotated_w = abs(w - (self.N - 1))
                if h == rotated_w and self.map[h][w] != current_player_type:
                    diagonal_win[1] = False
        if any(vertical_win) or any(horizontal_win) or any(diagonal_win):
            return True
        else:
            return False

    def is_full(self):
        for h in range(self.N):
            for w in range(self.N):
                if self.map[h][w] == 'E':
                    return False
                else:
                    pass
        return True

    def print_description(self):
        print("** Initial NxN Tic-tac-toe Map **")
        self.print_current_map()

        print("** Action Indexes **")
        for idx, des in enumerate(self.map_index_description):
            print(des, end=' ')
            if (idx + 1) % self.N == 0:
                print('\n', end='')

    def print_current_map(self):
        for h in range(self.N):
            for w in range(self.N):
                print(self.map[h][w], end=' ')
            print('\n', end='')
        print()

    def checkwin(self):
        for i in range(3):
            pre = self.map[i][0]
            for j in range(1, 3):
                if self.map[i][j] != pre or pre == 'E':
                    break
                if j == 2:
                    return True
            pre = self.map[0][i]
            for j in range(1, 3):
                if self.map[j][i] != pre or pre == 'E':
                    break
                if j == 2:
                    return True
                
        pre = self.map[0][0]
        for j in range(1, 3):
            if self.map[j][j] != pre or pre == 'E':
                break
            if j == 2:
                return True
            
        pre = self.map[2][0]
        for j in range(1, 3):
            if self.map[2-j][j] != pre or pre == 'E':
                break
            if j == 2:
                return True
            
        return False
        
    def go(self, now, cnt):
        if self.checkwin():
            return -1
        if cnt == 9:
            return 0
        nx = 'X'
        if now == 'X':
            nx = 'O'
        
        min_ = 1
        for i in range(3):
            for j in range(3):
                if self.map[i][j] == 'E':
                    self.map[i][j] = now;
                    v = self.go(nx, cnt+1)
                    min_ = min(min_,v)
                    self.map[i][j] = 'E'
        if min_ == 1:
            return -1
        elif min_ == 0:
            return 0
        else:
            return 1
                
    
    def match_prediction(self):
        # X: 현재 플레이어
        # return value(이긴다: 1, 무승부: 0, 진다: -1)
        
        one = self.global_step // 2
        two = self.global_step // 2
        
        if(self.global_step % 2 == 1):
            one += 1
        
        value = 0
        if one > two:
            value = self.go('O', one + two)
        else:
            value = self.go('X', one + two)
            
        if self.global_step % 2 == 0:
            print("'X'", end = '')
        else:
            print("'O'", end = '')
        
        if value == 1:
            print("의 최대 결과는 '승리'입니다.")
        elif value == 0:
            print("의 최대 결과는 '무승부'입니다.")
        else:
            print("의 최대 결과는 '패배'입니다.")
            

if __name__ == '__main__':
    game = TicTacToe()
    game.print_description()

    game.reset()
    done = False
    while not done:
        print()

        # Do it.
        game.match_prediction()

        action = int(input('Select action please: '))
        if not(game.map_index_description[0] <= action <= game.map_index_description[-1]):
            done = True
            print("Error: You entered the wrong number.")
            continue
        _, player_result, done = game.step(action)
        game.print_current_map()
        if done:
            for player, result in player_result.items():
                if result == game.win_reward:
                    player_result[player] = 'win'
                elif result == game.defeat_reward:
                    player_result[player] = 'defeat'
                else:
                    player_result[player] = 'draw'
            print(player_result)

** Initial NxN Tic-tac-toe Map **
E E E 
E E E 
E E E 

** Action Indexes **
0 1 2 
3 4 5 
6 7 8 

'X'의 최대 결과는 '무승부'입니다.
Select action please: 1
E X E 
E E E 
E E E 


'O'의 최대 결과는 '무승부'입니다.
Select action please: 2
E X O 
E E E 
E E E 


'X'의 최대 결과는 '무승부'입니다.
Select action please: 7
E X O 
E E E 
E X E 


'O'의 최대 결과는 '승리'입니다.
Select action please: 4
E X O 
E O E 
E X E 


'X'의 최대 결과는 '패배'입니다.
