# Bot performance comparison

In [2]:
import random
import sys
import numpy as np
from tqdm.notebook import trange

sys.path.append('..')
from env import BlockadeEnv
from blockade import Blockade
from players.ReinforcementLearningBot import ReinforcementLearningBot
from players.OptimizedBot import OptimizedBot
from players.HeuristicBot import HeuristicBot
from players.RandomBot import RandomBot

In [3]:
def make_bot_comparison(bot1, bot2, arena_size=15, total_games=1000, starting_seed=0):
    win_counter = 0
    draw_counter = 0
    
    for seed in trange(starting_seed, starting_seed+total_games):
        random.seed(seed)
        p1 = bot1
        p2 = bot2
        game = Blockade(player1=p1,
                        player2=p2,
                        arena_size=arena_size,
                        verbose=False)
        outcome = game.run_windowless()
        if outcome == 1:
            win_counter += 1
        elif outcome == 0:
            draw_counter += 1
    
    lost_games = total_games - win_counter - draw_counter
    print(f'{bot1} against {bot2} results (arena_size={arena_size}):')
    print(f'{win_counter}/{total_games} games won ({np.round(win_counter / total_games * 100.0, 2)}%)')
    print(f'{draw_counter}/{total_games} draws ({np.round(draw_counter / total_games * 100.0, 2)}%)')
    print(f'{lost_games}/{total_games} games lost ({np.round(lost_games / total_games * 100.0, 2)}%)')
    
    return win_counter, draw_counter, lost_games

In [13]:
bots = [RandomBot(verbose=False),
        HeuristicBot(verbose=False),
        OptimizedBot(verbose=False),
        ReinforcementLearningBot(verbose=False, model_name='../players/A2C15v2')
       ]

results = dict()
bot_total_wins = {str(b): 0 for b in bots}
bot_total_draws = {str(b): 0 for b in bots}
bot_total_loses = {str(b): 0 for b in bots}
counter = 1

for p1 in bots:
    for p2 in bots:
        print(f'{counter}/{len(bots)**2} player1={p1}; player2={p2}')
        wins, draws, loses = make_bot_comparison(p1, p2, arena_size=15, total_games=1000)
        results[(str(p1), str(p2))] = (wins, draws, loses)
        bot_total_wins[str(p1)] += wins
        bot_total_wins[str(p2)] += loses
        bot_total_draws[str(p1)] += draws
        bot_total_draws[str(p2)] += draws
        bot_total_loses[str(p1)] += loses
        bot_total_loses[str(p2)] += wins
        
        counter += 1
        print('\n')
        
print(results)
print('Total wins:', bot_total_wins)
print('Total draws:', bot_total_draws)
print('Total loses:', bot_total_loses)

1/16 player1=RandomBot; player2=RandomBot


  0%|          | 0/1000 [00:00<?, ?it/s]

RandomBot against RandomBot results (arena_size=15):
480/1000 games won (48.0%)
66/1000 draws (6.6%)
454/1000 games lost (45.4%)


2/16 player1=RandomBot; player2=HeuristicBot


  0%|          | 0/1000 [00:00<?, ?it/s]

RandomBot against HeuristicBot results (arena_size=15):
43/1000 games won (4.3%)
85/1000 draws (8.5%)
872/1000 games lost (87.2%)


3/16 player1=RandomBot; player2=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)


  0%|          | 0/1000 [00:00<?, ?it/s]

RandomBot against OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) results (arena_size=15):
55/1000 games won (5.5%)
137/1000 draws (13.7%)
808/1000 games lost (80.8%)


4/16 player1=RandomBot; player2=ReinforcementLearningBot (A2C15v2)


  0%|          | 0/1000 [00:00<?, ?it/s]

RandomBot against ReinforcementLearningBot (A2C15v2) results (arena_size=15):
329/1000 games won (32.9%)
126/1000 draws (12.6%)
545/1000 games lost (54.5%)


5/16 player1=HeuristicBot; player2=RandomBot


  0%|          | 0/1000 [00:00<?, ?it/s]

HeuristicBot against RandomBot results (arena_size=15):
834/1000 games won (83.4%)
129/1000 draws (12.9%)
37/1000 games lost (3.7%)


6/16 player1=HeuristicBot; player2=HeuristicBot


  0%|          | 0/1000 [00:00<?, ?it/s]

HeuristicBot against HeuristicBot results (arena_size=15):
344/1000 games won (34.4%)
166/1000 draws (16.6%)
490/1000 games lost (49.0%)


7/16 player1=HeuristicBot; player2=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)


  0%|          | 0/1000 [00:00<?, ?it/s]

HeuristicBot against OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) results (arena_size=15):
272/1000 games won (27.2%)
354/1000 draws (35.4%)
374/1000 games lost (37.4%)


8/16 player1=HeuristicBot; player2=ReinforcementLearningBot (A2C15v2)


  0%|          | 0/1000 [00:00<?, ?it/s]

HeuristicBot against ReinforcementLearningBot (A2C15v2) results (arena_size=15):
829/1000 games won (82.9%)
100/1000 draws (10.0%)
71/1000 games lost (7.1%)


9/16 player1=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525); player2=RandomBot


  0%|          | 0/1000 [00:00<?, ?it/s]

OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) against RandomBot results (arena_size=15):
849/1000 games won (84.9%)
128/1000 draws (12.8%)
23/1000 games lost (2.3%)


10/16 player1=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525); player2=HeuristicBot


  0%|          | 0/1000 [00:00<?, ?it/s]

OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) against HeuristicBot results (arena_size=15):
512/1000 games won (51.2%)
88/1000 draws (8.8%)
400/1000 games lost (40.0%)


11/16 player1=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525); player2=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)


  0%|          | 0/1000 [00:00<?, ?it/s]

OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) against OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) results (arena_size=15):
385/1000 games won (38.5%)
221/1000 draws (22.1%)
394/1000 games lost (39.4%)


12/16 player1=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525); player2=ReinforcementLearningBot (A2C15v2)


  0%|          | 0/1000 [00:00<?, ?it/s]

OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) against ReinforcementLearningBot (A2C15v2) results (arena_size=15):
0/1000 games won (0.0%)
1000/1000 draws (100.0%)
0/1000 games lost (0.0%)


13/16 player1=ReinforcementLearningBot (A2C15v2); player2=RandomBot


  0%|          | 0/1000 [00:00<?, ?it/s]

ReinforcementLearningBot (A2C15v2) against RandomBot results (arena_size=15):
560/1000 games won (56.0%)
94/1000 draws (9.4%)
346/1000 games lost (34.6%)


14/16 player1=ReinforcementLearningBot (A2C15v2); player2=HeuristicBot


  0%|          | 0/1000 [00:00<?, ?it/s]

ReinforcementLearningBot (A2C15v2) against HeuristicBot results (arena_size=15):
24/1000 games won (2.4%)
516/1000 draws (51.6%)
460/1000 games lost (46.0%)


15/16 player1=ReinforcementLearningBot (A2C15v2); player2=OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)


  0%|          | 0/1000 [00:00<?, ?it/s]

ReinforcementLearningBot (A2C15v2) against OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) results (arena_size=15):
1/1000 games won (0.1%)
931/1000 draws (93.1%)
68/1000 games lost (6.8%)


16/16 player1=ReinforcementLearningBot (A2C15v2); player2=ReinforcementLearningBot (A2C15v2)


  0%|          | 0/1000 [00:00<?, ?it/s]

ReinforcementLearningBot (A2C15v2) against ReinforcementLearningBot (A2C15v2) results (arena_size=15):
386/1000 games won (38.6%)
371/1000 draws (37.1%)
243/1000 games lost (24.3%)


{('RandomBot', 'RandomBot'): (480, 66, 454), ('RandomBot', 'HeuristicBot'): (43, 85, 872), ('RandomBot', 'OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)'): (55, 137, 808), ('RandomBot', 'ReinforcementLearningBot (A2C15v2)'): (329, 126, 545), ('HeuristicBot', 'RandomBot'): (834, 129, 37), ('HeuristicBot', 'HeuristicBot'): (344, 166, 490), ('HeuristicBot', 'OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)'): (272, 354, 374), ('HeuristicBot', 'ReinforcementLearningBot (A2C15v2)'): (829, 100, 71), ('OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)', 'RandomBot'): (849, 128, 23), ('OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)', 'HeuristicBot'): (512, 88, 400), ('OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525)', 'OptimizedBot (0.41529055, 0.

## Verify RL vs Optimized bot draw anomaly

90-100% draws when ReinforcementLearningBot fights OptimizedBot. Let's verify it with different seeds and more trials.

In [4]:
make_bot_comparison(ReinforcementLearningBot(verbose=False, model_name='../players/A2C15v2'), 
                    OptimizedBot(verbose=False), 
                    arena_size=15, total_games=10000, starting_seed=987654321)

  0%|          | 0/10000 [00:00<?, ?it/s]

ReinforcementLearningBot (A2C15v2) against OptimizedBot (0.41529055, 0.12742814, 0.38834967, 0.00099525) results (arena_size=15):
6/10000 games won (0.06%)
9307/10000 draws (93.07%)
687/10000 games lost (6.87%)


(6, 9307, 687)