In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import math
from tqdm import tqdm

### Tic Tac Toe 환경 정의

In [2]:
class Environment():
    
    def __init__(self):
    # 보드는 0으로 초기화된 9개의 배열로 준비
    # 게임종료 : done = True
        self.board_a = np.zeros(16)
        self.done = False
        self.reward = 0
        self.winner = 0
        self.print = False

    def move(self, p1, p2, player):
    # 각 플레이어가 선택한 행동을 표시 하고 게임 상태(진행 또는 종료)를 판단
    # p1 = 1, p2 = -1로 정의
    # 각 플레이어는 행동을 선택하는 select_action 메서드를 가짐
        if player == 1:
            pos = p1.select_action(env, player)
        else:
            pos = p2.select_action(env, player)
        
        # 보드에 플레이어의 선택을 표시
        self.board_a[pos] = player
        if self.print:
            print(player)
            self.print_board()
        # 게임이 종료상태인지 아닌지를 판단
        self.end_check(player)
        
        return  self.reward, self.done
 
    # 현재 보드 상태에서 가능한 행동(둘 수 있는 장소)을 탐색하고 리스트로 반환
    def get_action(self):
        observation = []
        for i in range(16):
            if self.board_a[i] == 0:
                observation.append(i)
        return observation
    
    # 게임이 종료(승패 또는 비김)됐는지 판단
    def end_check(self,player):
        # 0 1 2
        # 3 4 5
        # 6 7 8
        # 승패 조건은 가로, 세로, 대각선 이 -1 이나 1 로 동일할 때 
        end_condition = ((0,1,2,3),(4,5,6,7),(8,9,10,11),(12,13,14,15),(0,4,8,12),(1,5,9,13),(2,6,10,14),(3,7,11,15),(0,5,10,15),(3,6,9,12))
        for line in end_condition:
            if self.board_a[line[0]] == self.board_a[line[1]] \
                and self.board_a[line[1]] == self.board_a[line[2]] \
                and self.board_a[line[2]] == self.board_a[line[3]] \
                and self.board_a[line[0]] != 0:
                # 종료됐다면 누가 이겼는지 표시
                self.done = True
                self.reward = player
                return
        # 비긴 상태는 더는 보드에 빈 공간이 없을때
        observation = self.get_action()
        if (len(observation)) == 0:
            self.done = True
            self.reward = 0            
        return
        
    # 현재 보드의 상태를 표시 p1 = O, p2 = X    
    def print_board(self):
        print("+----+----+----+----+")
        for i in range(4):
            for j in range(4):
                if self.board_a[4*i+j] == 1:
                    print("|  O",end=" ")
                elif self.board_a[4*i+j] == -1:
                    print("|  X",end=" ")
                else:
                    print("|   ",end=" ")
            print("|")
            print("+----+----+----+----+")

### Human player

In [3]:
class Human_player():
    
    def __init__(self):
        self.name = "Human player"
        
    def select_action(self, env, player):
        while True:
            # 가능한 행동을 조사한 후 표시
            available_action = env.get_action()
            print("possible actions = {}".format(available_action))

            # 상태 번호 표시
            print("+----+----+----+----+")
            print("+  0 +  1 +  2 +  3 +")
            print("+----+----+----+----+")
            print("+  4 +  5 +  6 +  7 +")
            print("+----+----+----+----+")
            print("+  8 +  9 + 10 + 11 +")
            print("+----+----+----+----+")
            print("+ 12 + 13 + 14 + 15 +")
            print("+----+----+----+----+")
                        
            # 키보드로 가능한 행동을 입력 받음
            action = input("Select action(human) : ")
            action = int(action)
            
            # 입력받은 행동이 가능한 행동이면 반복문을 탈출
            if action in available_action:
                return action
            # 아니면 행동 입력을 반복
            else:
                print("You selected wrong action")
        return

### 랜덤 플레이어

In [4]:
class Random_player():
    
    def __init__(self):
        self.name = "Random player"
        self.print = False
        
    def select_action(self, env, player):
        # 가능한 행동 조사
        available_action = env.get_action()
        # 가능한 행동 중 하나를 무작위로 선택
        action = np.random.randint(len(available_action))
#         print("Select action(random) = {}".format(available_action[action]))
        return available_action[action]

### Actor-Critic 플레이어

In [5]:
class Actor_Critic_player():
    def __init__(self):
        self.name = "Actor_Critic_player"
        self.actor_lr = 0.01  # Actor(정책) 학습률
        self.critic_lr = 0.1  # Critic(가치) 학습률
        self.gamma = 0.9  # 할인율
        self.epsilon = 1.0  # 탐험 확률
        self.epsilon_decay = 0.995  # 탐험 확률 감소율
        self.epsilon_min = 0.01  # 최소 탐험 확률
        
        # Actor와 Critic 네트워크 초기화
        self.actor_network = {}  # 상태에 대한 행동 확률
        self.critic_network = {}  # 상태에 대한 가치
        self.print = False
        
    def get_state_key(self, board):
        # 보드 상태를 키로 변환
        return tuple(board)
    
    def get_action_probs(self, state_key, available_actions):
        # 상태에 대한 행동 확률 반환
        if state_key not in self.actor_network:
            # 새로운 상태면 균등 확률로 초기화
            self.actor_network[state_key] = {action: 1.0/len(available_actions) for action in available_actions}
        
        # Gibbs softmax method 적용
        probs = np.zeros(len(available_actions))
        for i, action in enumerate(available_actions):
            if action in self.actor_network[state_key]:
                probs[i] = np.exp(self.actor_network[state_key][action])
        
        # 확률 정규화
        probs = probs / np.sum(probs)
        return probs
    
    def get_state_value(self, state_key):
        if state_key not in self.critic_network:
            self.critic_network[state_key] = 0.0
        return self.critic_network[state_key]
    
    def select_action(self, env, player):
        available_actions = env.get_action()
        state_key = self.get_state_key(env.board_a)
        
        # ε-greedy 정책
        if np.random.random() < self.epsilon:
            action_idx = np.random.randint(len(available_actions))
        else:
            probs = self.get_action_probs(state_key, available_actions)
            action_idx = np.random.choice(len(available_actions), p=probs)
        
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
        return available_actions[action_idx]
    
    def learn(self, state, action, reward, next_state, done):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        
        # Critic(가치) 업데이트
        current_value = self.get_state_value(state_key)
        if done:
            target = reward
        else:
            next_value = self.get_state_value(next_state_key)
            target = reward + self.gamma * next_value
        
        td_error = target - current_value
        self.critic_network[state_key] += self.critic_lr * td_error
        
        # TD 에러 계산
        td_error = target - current_value
        
        # Critic 업데이트
        self.critic_network[state_key] += self.critic_lr * td_error
        
        # Actor(정책) 업데이트
        if state_key in self.actor_network and action in self.actor_network[state_key]:
            # TD 에러를 사용하여 정책 업데이트
            self.actor_network[state_key][action] += self.actor_lr * td_error
            
            # 음수 확률 보정
            min_prob = min(self.actor_network[state_key].values())
            if min_prob < 0:
                for a in self.actor_network[state_key]:
                    self.actor_network[state_key][a] -= min_prob
            
            # 확률 정규화
            total = sum(self.actor_network[state_key].values())
            for a in self.actor_network[state_key]:
                self.actor_network[state_key][a] /= total

### Actor-Critic 플레이어 훈련

In [None]:

p1_AC = Actor_Critic_player()
p2_AC = Actor_Critic_player()

p1_score = 0
p2_score = 0
draw_score = 0

max_episodes = 100000

for episode in tqdm(range(max_episodes)):
    env = Environment()
    done = False
    
    while not done:
        # P1의 턴
        current_state = np.copy(env.board_a)
        action = p1_AC.select_action(env, 1)
        env.board_a[action] = 1
        env.end_check(1)
        
        if env.done:
            if env.reward == 1:  # P1 승리
                p1_AC.learn(current_state, action, 1, env.board_a, True)
                p2_AC.learn(p2_last_state, p2_last_action, -1, env.board_a, True)
                p1_score += 1
            elif env.reward == 0:  # 무승부
                p1_AC.learn(current_state, action, 0, env.board_a, True)
                p2_AC.learn(p2_last_state, p2_last_action, 0, env.board_a, True)
                draw_score += 1
            break
            
        # P2의 턴
        p2_last_state = np.copy(env.board_a)
        p2_last_action = p2_AC.select_action(env, -1)
        env.board_a[p2_last_action] = -1
        env.end_check(-1)
        
        if env.done:
            if env.reward == -1:  # P2 승리
                p1_AC.learn(current_state, action, -1, env.board_a, True)
                p2_AC.learn(p2_last_state, p2_last_action, 1, env.board_a, True)
                p2_score += 1
            elif env.reward == 0:  # 무승부
                p1_AC.learn(current_state, action, 0, env.board_a, True)
                p2_AC.learn(p2_last_state, p2_last_action, 0, env.board_a, True)
                draw_score += 1
            break
        else:
            # 게임이 계속되는 경우의 학습
            p1_AC.learn(current_state, action, -0.01, env.board_a, False)
            if 'p2_last_state' in locals():
                p2_AC.learn(p2_last_state, p2_last_action, -0.01, env.board_a, False)
    
    # 1000 에피소드마다 진행상황 출력
    if episode % 1000 == 0:
        print(f"Episode {episode}: P1 wins = {p1_score}, P2 wins = {p2_score}, Draws = {draw_score}")
        p1_AC.epsilon = max(p1_AC.epsilon_min, p1_AC.epsilon * p1_AC.epsilon_decay)
        p2_AC.epsilon = max(p2_AC.epsilon_min, p2_AC.epsilon * p2_AC.epsilon_decay)

print(f"Final scores - P1: {p1_score}, P2: {p2_score}, Draws: {draw_score}")

  0%|▍                                                                                                                                  | 323/100000 [00:00<01:46, 938.89it/s]

Episode 0: P1 wins = 0, P2 wins = 0, Draws = 1


  1%|█▌                                                                                                                                | 1159/100000 [00:01<01:48, 908.53it/s]

Episode 1000: P1 wins = 324, P2 wins = 264, Draws = 413


  2%|██▊                                                                                                                               | 2170/100000 [00:02<01:48, 899.03it/s]

Episode 2000: P1 wins = 655, P2 wins = 513, Draws = 833


  3%|████                                                                                                                              | 3166/100000 [00:03<01:48, 890.84it/s]

Episode 3000: P1 wins = 966, P2 wins = 770, Draws = 1265


  4%|█████▍                                                                                                                            | 4167/100000 [00:04<01:45, 911.15it/s]

Episode 4000: P1 wins = 1288, P2 wins = 1030, Draws = 1683


  5%|██████▋                                                                                                                           | 5177/100000 [00:05<01:45, 897.21it/s]

Episode 5000: P1 wins = 1575, P2 wins = 1317, Draws = 2109


  6%|███████▉                                                                                                                          | 6102/100000 [00:06<01:46, 883.33it/s]

Episode 6000: P1 wins = 1917, P2 wins = 1577, Draws = 2507


  7%|█████████▎                                                                                                                        | 7165/100000 [00:07<01:47, 860.66it/s]

Episode 7000: P1 wins = 2237, P2 wins = 1850, Draws = 2914


  8%|██████████▌                                                                                                                       | 8134/100000 [00:09<01:45, 870.46it/s]

Episode 8000: P1 wins = 2536, P2 wins = 2117, Draws = 3348


  9%|███████████▊                                                                                                                      | 9099/100000 [00:10<01:44, 873.18it/s]

Episode 9000: P1 wins = 2861, P2 wins = 2370, Draws = 3770


 10%|█████████████▏                                                                                                                   | 10244/100000 [00:11<01:43, 867.64it/s]

Episode 10000: P1 wins = 3165, P2 wins = 2622, Draws = 4214


 11%|██████████████▎                                                                                                                  | 11134/100000 [00:12<01:41, 872.36it/s]

Episode 11000: P1 wins = 3474, P2 wins = 2872, Draws = 4655


 12%|███████████████▌                                                                                                                 | 12101/100000 [00:13<01:41, 870.08it/s]

Episode 12000: P1 wins = 3794, P2 wins = 3127, Draws = 5080


 13%|█████████████████                                                                                                                | 13235/100000 [00:14<01:40, 866.52it/s]

Episode 13000: P1 wins = 4099, P2 wins = 3374, Draws = 5528


 14%|██████████████████▎                                                                                                              | 14192/100000 [00:16<01:39, 860.72it/s]

Episode 14000: P1 wins = 4440, P2 wins = 3616, Draws = 5945


 15%|███████████████████▋                                                                                                             | 15226/100000 [00:17<01:39, 855.95it/s]

Episode 15000: P1 wins = 4749, P2 wins = 3899, Draws = 6353


 16%|████████████████████▊                                                                                                            | 16090/100000 [00:18<01:38, 849.42it/s]

Episode 16000: P1 wins = 5046, P2 wins = 4170, Draws = 6785


 17%|██████████████████████                                                                                                           | 17139/100000 [00:19<01:35, 868.47it/s]

Episode 17000: P1 wins = 5348, P2 wins = 4437, Draws = 7216


 18%|███████████████████████▎                                                                                                         | 18098/100000 [00:20<01:36, 851.07it/s]

Episode 18000: P1 wins = 5671, P2 wins = 4702, Draws = 7628


 19%|████████████████████████▋                                                                                                        | 19127/100000 [00:21<01:34, 852.17it/s]

Episode 19000: P1 wins = 6000, P2 wins = 4950, Draws = 8051


 20%|██████████████████████████                                                                                                       | 20161/100000 [00:23<01:35, 838.69it/s]

Episode 20000: P1 wins = 6338, P2 wins = 5195, Draws = 8468


 21%|███████████████████████████▎                                                                                                     | 21171/100000 [00:24<01:35, 826.78it/s]

Episode 21000: P1 wins = 6661, P2 wins = 5451, Draws = 8889


 22%|████████████████████████████▌                                                                                                    | 22094/100000 [00:25<01:34, 822.47it/s]

Episode 22000: P1 wins = 6983, P2 wins = 5703, Draws = 9315


 23%|██████████████████████████████                                                                                                   | 23276/100000 [00:26<01:33, 823.94it/s]

Episode 23000: P1 wins = 7307, P2 wins = 5968, Draws = 9726


 24%|███████████████████████████████▎                                                                                                 | 24282/100000 [00:28<01:31, 823.30it/s]

Episode 24000: P1 wins = 7646, P2 wins = 6223, Draws = 10132


 25%|████████████████████████████████▍                                                                                                | 25112/100000 [00:29<01:31, 818.40it/s]

Episode 25000: P1 wins = 7978, P2 wins = 6490, Draws = 10533


 26%|█████████████████████████████████▋                                                                                               | 26117/100000 [00:30<01:28, 838.94it/s]

Episode 26000: P1 wins = 8297, P2 wins = 6727, Draws = 10977


 27%|███████████████████████████████████                                                                                              | 27134/100000 [00:31<01:26, 846.30it/s]

Episode 27000: P1 wins = 8624, P2 wins = 6977, Draws = 11400


 28%|████████████████████████████████████▎                                                                                            | 28149/100000 [00:32<01:26, 828.14it/s]

Episode 28000: P1 wins = 8932, P2 wins = 7239, Draws = 11830


 29%|█████████████████████████████████████▌                                                                                           | 29113/100000 [00:33<01:24, 841.11it/s]

Episode 29000: P1 wins = 9217, P2 wins = 7506, Draws = 12278


 30%|██████████████████████████████████████▉                                                                                          | 30141/100000 [00:35<01:23, 833.31it/s]

Episode 30000: P1 wins = 9554, P2 wins = 7765, Draws = 12682


 31%|████████████████████████████████████████▏                                                                                        | 31156/100000 [00:36<01:22, 835.41it/s]

Episode 31000: P1 wins = 9849, P2 wins = 8027, Draws = 13125


 32%|█████████████████████████████████████████▍                                                                                       | 32166/100000 [00:37<01:22, 825.00it/s]

Episode 32000: P1 wins = 10171, P2 wins = 8284, Draws = 13546


 33%|██████████████████████████████████████████▊                                                                                      | 33161/100000 [00:38<01:22, 805.94it/s]

Episode 33000: P1 wins = 10513, P2 wins = 8541, Draws = 13947


 34%|████████████████████████████████████████████                                                                                     | 34148/100000 [00:39<01:21, 806.37it/s]

Episode 34000: P1 wins = 10814, P2 wins = 8815, Draws = 14372


 35%|█████████████████████████████████████████████▎                                                                                   | 35131/100000 [00:41<01:20, 805.55it/s]

Episode 35000: P1 wins = 11122, P2 wins = 9088, Draws = 14791


 36%|██████████████████████████████████████████████▌                                                                                  | 36100/100000 [00:42<01:20, 795.10it/s]

Episode 36000: P1 wins = 11437, P2 wins = 9367, Draws = 15197


 37%|███████████████████████████████████████████████▉                                                                                 | 37137/100000 [00:43<01:20, 784.27it/s]

Episode 37000: P1 wins = 11740, P2 wins = 9639, Draws = 15622


 38%|█████████████████████████████████████████████████▏                                                                               | 38094/100000 [00:45<01:20, 770.49it/s]

Episode 38000: P1 wins = 12066, P2 wins = 9922, Draws = 16013


 39%|██████████████████████████████████████████████████▌                                                                              | 39227/100000 [00:46<01:24, 722.54it/s]

Episode 39000: P1 wins = 12353, P2 wins = 10212, Draws = 16436


 40%|███████████████████████████████████████████████████▊                                                                             | 40208/100000 [00:48<01:27, 685.10it/s]

Episode 40000: P1 wins = 12654, P2 wins = 10492, Draws = 16855


 41%|█████████████████████████████████████████████████████                                                                            | 41112/100000 [00:50<01:34, 624.70it/s]

Episode 41000: P1 wins = 12951, P2 wins = 10766, Draws = 17284


 42%|██████████████████████████████████████████████████████▎                                                                          | 42086/100000 [00:51<01:17, 751.04it/s]

Episode 42000: P1 wins = 13264, P2 wins = 11035, Draws = 17702


 43%|███████████████████████████████████████████████████████▌                                                                         | 43098/100000 [00:52<01:14, 765.51it/s]

Episode 43000: P1 wins = 13601, P2 wins = 11314, Draws = 18086


 44%|█████████████████████████████████████████████████████████                                                                        | 44256/100000 [00:54<01:14, 745.09it/s]

Episode 44000: P1 wins = 13908, P2 wins = 11588, Draws = 18505


 45%|██████████████████████████████████████████████████████████▏                                                                      | 45094/100000 [00:55<01:11, 763.24it/s]

Episode 45000: P1 wins = 14222, P2 wins = 11846, Draws = 18933


 46%|███████████████████████████████████████████████████████████▍                                                                     | 46112/100000 [00:56<01:08, 781.13it/s]

Episode 46000: P1 wins = 14551, P2 wins = 12096, Draws = 19354


 47%|████████████████████████████████████████████████████████████▊                                                                    | 47103/100000 [00:58<01:11, 735.81it/s]

Episode 47000: P1 wins = 14875, P2 wins = 12354, Draws = 19772


 48%|██████████████████████████████████████████████████████████████                                                                   | 48110/100000 [00:59<01:08, 760.43it/s]

Episode 48000: P1 wins = 15217, P2 wins = 12624, Draws = 20160


 49%|███████████████████████████████████████████████████████████████▍                                                                 | 49194/100000 [01:01<01:08, 746.58it/s]

Episode 49000: P1 wins = 15503, P2 wins = 12887, Draws = 20611


 50%|████████████████████████████████████████████████████████████████▌                                                                | 50083/100000 [01:02<01:26, 578.04it/s]

Episode 50000: P1 wins = 15799, P2 wins = 13172, Draws = 21030


 51%|█████████████████████████████████████████████████████████████████▉                                                               | 51085/100000 [01:03<01:06, 737.56it/s]

Episode 51000: P1 wins = 16108, P2 wins = 13435, Draws = 21458


 52%|███████████████████████████████████████████████████████████████████▏                                                             | 52092/100000 [01:05<01:01, 775.85it/s]

Episode 52000: P1 wins = 16435, P2 wins = 13688, Draws = 21878


 53%|████████████████████████████████████████████████████████████████████▌                                                            | 53113/100000 [01:06<01:01, 765.85it/s]

Episode 53000: P1 wins = 16762, P2 wins = 13961, Draws = 22278


 54%|█████████████████████████████████████████████████████████████████████▊                                                           | 54122/100000 [01:07<01:00, 761.32it/s]

Episode 54000: P1 wins = 17079, P2 wins = 14247, Draws = 22675


 55%|███████████████████████████████████████████████████████████████████████▏                                                         | 55138/100000 [01:09<00:58, 771.81it/s]

Episode 55000: P1 wins = 17375, P2 wins = 14525, Draws = 23101


 56%|████████████████████████████████████████████████████████████████████████▍                                                        | 56165/100000 [01:10<01:12, 608.14it/s]

Episode 56000: P1 wins = 17700, P2 wins = 14805, Draws = 23496


 57%|█████████████████████████████████████████████████████████████████████████▋                                                       | 57082/100000 [01:12<01:01, 698.73it/s]

Episode 57000: P1 wins = 18007, P2 wins = 15103, Draws = 23891


 58%|██████████████████████████████████████████████████████████████████████████▉                                                      | 58114/100000 [01:13<01:08, 610.96it/s]

Episode 58000: P1 wins = 18329, P2 wins = 15361, Draws = 24311


 59%|████████████████████████████████████████████████████████████████████████████▏                                                    | 59082/100000 [01:15<00:58, 693.64it/s]

Episode 59000: P1 wins = 18608, P2 wins = 15655, Draws = 24738


 60%|█████████████████████████████████████████████████████████████████████████████▌                                                   | 60104/100000 [01:16<00:54, 726.47it/s]

Episode 60000: P1 wins = 18913, P2 wins = 15939, Draws = 25149


 61%|██████████████████████████████████████████████████████████████████████████████▉                                                  | 61191/100000 [01:18<01:04, 599.74it/s]

Episode 61000: P1 wins = 19259, P2 wins = 16181, Draws = 25561


 62%|████████████████████████████████████████████████████████████████████████████████                                                 | 62106/100000 [01:19<00:59, 634.66it/s]

Episode 62000: P1 wins = 19584, P2 wins = 16456, Draws = 25961


 63%|█████████████████████████████████████████████████████████████████████████████████▍                                               | 63099/100000 [01:21<01:01, 598.54it/s]

Episode 63000: P1 wins = 19907, P2 wins = 16713, Draws = 26381


 64%|███████████████████████████████████████████████████████████████████████████████████                                              | 64426/100000 [01:23<00:49, 715.99it/s]

Episode 64000: P1 wins = 20247, P2 wins = 16961, Draws = 26793


 65%|███████████████████████████████████████████████████████████████████████████████████▉                                             | 65078/100000 [01:24<00:49, 705.02it/s]

Episode 65000: P1 wins = 20599, P2 wins = 17235, Draws = 27167


 66%|█████████████████████████████████████████████████████████████████████████████████████▎                                           | 66093/100000 [01:25<00:47, 716.11it/s]

Episode 66000: P1 wins = 20923, P2 wins = 17500, Draws = 27578


 67%|██████████████████████████████████████████████████████████████████████████████████████▌                                          | 67141/100000 [01:27<00:43, 752.55it/s]

Episode 67000: P1 wins = 21234, P2 wins = 17764, Draws = 28003


 68%|███████████████████████████████████████████████████████████████████████████████████████▊                                         | 68117/100000 [01:28<00:43, 733.06it/s]

### 게임 진행 함수

In [None]:
np.random.seed(0)

p1 = Human_player()
# p2 = Human_player()

# p1 = Random_player()
# p2 = Random_player()

# p1 = Monte_Carlo_player()
# p1.num_playout = 100
# p2 = Monte_Carlo_player()
# p2.num_playout = 1000

# p1 = p1_Qplayer
# p1.epsilon = 0

# p2 = p2_Qplayer
# p2.epsilon = 0

p2 = p2_AC
p2.epsilon = 0

# p1 = p1_DQN
# p1.epsilon = 0

# 지정된 게임 수를 자동으로 두게 할 것인지 한게임씩 두게 할 것인지 결정
# auto = True : 지정된 판수(games)를 자동으로 진행 
# auto = False : 한판씩 진행

auto = False

# auto 모드의 게임수
games = 100

print("pl player : {}".format(p1.name))
print("p2 player : {}".format(p2.name))

# 각 플레이어의 승리 횟수를 저장
p1_score = 0
p2_score = 0
draw_score = 0


if auto: 
    # 자동 모드 실행
    for j in tqdm(range(games)):
        
        np.random.seed(j)
        env = Environment()
        
        for i in range(10000):
            # p1 과 p2가 번갈아 가면서 게임을 진행
            # p1(1) -> p2(-1) -> p1(1) -> p2(-1) ...
            reward, done = env.move(p1,p2,(-1)**i)
            # 게임 종료 체크
            if done == True:
                if reward == 1:
                    p1_score += 1
                elif reward == -1:
                    p2_score += 1
                else:
                    draw_score += 1
                break

else:                
    # 한 게임씩 진행하는 수동 모드
    np.random.seed(1)
    while True:
        
        env = Environment()
        env.print = False
        for i in range(10000):
            reward, done = env.move(p1,p2,(-1)**i)
            env.print_board()
            if done == True:
                if reward == 1:
                    print("winner is p1({})".format(p1.name))
                    p1_score += 1
                elif reward == -1:
                    print("winner is p2({})".format(p2.name))
                    p2_score += 1
                else:
                    print("draw")
                    draw_score += 1
                break
        
        # 최종 결과 출력        
        print("final result")
        env.print_board()

        # 한게임 더?최종 결과 출력 
        answer = input("More Game? (y/n)")

        if answer == 'n':
            break           

print("p1({}) = {} p2({}) = {} draw = {}".format(p1.name, p1_score,p2.name, p2_score,draw_score))
                