## Testing more specific mcts_implementations vs the so far best mctsTimed(3, 0.08) at 5 seconds ##

-----

# Part 1 - setup and classes #

In [48]:
### IMPORTS

from mctsNode import mctsNode
import mctsSearch_Basic as mcts
import mctsSearch_Basic_timelimited as mctstimed
import mctsSearch_Basic_timelimited_full as mctsfull
import mctsSearch_Basic_timelimited_smart as mctssmart

from time import monotonic
from copy import deepcopy

• Time aspect - special editing to mcts + controlled for each search from the agent <br>
• Breadth - dynamic needs implementing in mcts + some external way to affect it easily <br>
• C - currently testable straight away <br>
• Need a book of opening moves <br>

set up agent 1 args <br>
set up agent 2 args 


While N iterations <br>
    play the two agents

In [84]:
### SIMPLE MCTS AGENTS

# Global Parameter for local testing - iterations or time
iterations = 1000

# Simple Agent - static
# Can be used to optimise **playouts**, and **c**

class simplemctsAgent:
    def __init__(self, _playouts, _c):
        
        ## set up simple params 
        # like c, playouts number, iterations must be same for all...
        self.playouts = _playouts
        self.c = _c
        
    def makeMove(self, board):
        root = mctsNode(None, board, None)
        child = mcts.mctsSearch(root,iterations,self.playouts,self.c)
        move = child.getMove()
        return move
    
    def restart(self):
        pass

    
# same but fixed time allocated

class simple_mcts_timelimited_Agent:
    def __init__(self, _playouts, _c, _time):
        
        ## set up simple params 
        # like c, playouts number, iterations must be same for all...
        self.playouts = _playouts
        self.c = _c
        self.time = _time
        
    def makeMove(self, board):
        root = mctsNode(None, board, None)
        child = mctstimed.mctsSearch(root,self.time,self.playouts,self.c)
        move = child.getMove()
        return move
    
    def restart(self):
        pass
        
class simple_mcts_timelimited_full:
    def __init__(self, _playouts, _c, _time):
        
        ## set up simple params 
        # like c, playouts number, iterations must be same for all...
        self.playouts = _playouts
        self.c = _c
        self.time = _time
        
    def makeMove(self, board):
        root = mctsNode(None, board, None)
        child = mctstimed.mctsSearch(root,self.time,self.playouts,self.c)
        move = child.getMove()
        return move   
    
    def restart(self):
        pass
    
class mcts_smart_breadth:
    def __init__(self, _playouts, _c, _time):
        
        ## set up simple params 
        # like c, playouts number, iterations must be same for all...
        self.playouts = _playouts
        self.c = _c
        self.time = _time
        
    def makeMove(self, board):
        root = mctsNode(None, board, None)
        child = mctssmart.mctsSearch(root,self.time,self.playouts,self.c)
        move = child.getMove()
        return move    
    
    def restart(self):
        pass
    
class mcts_smart:
    def __init__(self, _playouts, _c, breadth):
        
        ## set up simple params 
        # like c, playouts number, iterations must be same for all...
        self.playouts = _playouts
        self.c = _c
        self.breadthmode = breadth
        
        self.plays = 0
        
#         # stage 1 - moves 1 to 30
#         start = 8
#         end = 6
#         step = (start-end )/ 29
#         stage1 = [round(start-i*step, 3) for i in range(30)]
#         #stage 2 - moves 31 to 45
#         start = 5
#         end = 2
#         step = (start-end )/ 14
#         stage2 = [round(start-i*step, 3) for i in range(15)]
#         # stage 3 - moves 46 to 60
#         start = 2
#         end = 1
#         step = (start-end) / 14
#         stage3 = [round(start-i*step, 3) for i in range(15)]
#         self.times = stage1 + stage2 + stage3

#         # stage 1 - moves 1 to 30
#         start = 7
#         end = 6
#         step = (start-end )/ 29
#         stage1 = [round(start-i*step, 3) for i in range(30)]
#         #stage 2 - moves 31 to 45
#         start = 5
#         end = 2
#         step = (start-end )/ 29
#         stage2 = [round(start-i*step, 3) for i in range(30)]
#         self.times = stage1 + stage2

        init = [5,5,5]
        # stage 1 - moves 1 to 30
        start = 7
        end = 6
        step = (start-end )/ 26
        stage1 = [round(start-i*step, 3) for i in range(27)]
        #stage 2 - moves 31 to 45
        start = 5.2
        end = 2.1
        step = (start-end )/ 29
        stage2 = [round(start-i*step, 3) for i in range(30)]
        self.times = init + stage1 + stage2
        
    def makeMove(self, board):
        root = mctsNode(None, board, None)
        if self.breadthmode:
            child = mctssmart.mctsSearch(root,self.times[self.plays],self.playouts,self.c)
        else:
            child = mctstimed.mctsSearch(root,self.times[self.plays],self.playouts,self.c)
        move = child.getMove()
        self.plays+=1
        return move    
    
    def restart(self):
        self.plays = 0

In [59]:
### BASIC GAME CLASS IMPLEMENTATION

class Game:
    
    def __init__(self, _agent1, _agent2, _reps):
        self.agent1 = _agent1
        self.agent2 = _agent2
        self.reps = _reps
        self.stat = None
        
    def run(self):
        wins = 0
        for i in range(self.reps):
            wins += self._play(i%2)
            
            # print midway wins
            print(wins)
            
            self.agent1.restart()
            self.agent2.restart()
            
        self.stat = wins / self.reps
        
    def win_rate(self):
        return self.stat
    
    def _play(self, first):
        board = [
            [0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],
            [0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],
        ]
        player1 = first
        while True:
            if player1:
                move = self.agent1.makeMove(board)
                board[move[0]][move[1]] = 1
                if mcts._winner(board):
                    return 1
                
            
            else:
                board = self._opposite_board(board)
                move = self.agent2.makeMove(board)
                board[move[0]][move[1]] = 1
                if mcts._winner(board):
                    return 0
                board = self._opposite_board(board)
                
            player1 = not player1
            
        return None    
        
        
    def _opposite_board(self, board):
        opp = deepcopy(board)
        #rotate along diagonal
        for i in range(11):
            for j in range(i,11):
                opp[i][j], opp[j][i] = opp[j][i], opp[i][j]
        # and change -1 and 1        
        for i in range(11):
            for j in range(11):
                opp[i][j]*= -1
        return opp
        
            

-----------

# Part 2 - Basic test example #

In [56]:
### IGNORE
### JUST A CHECK THAT MY CODE WORKS

start = monotonic()
# benchmark test - 1 vs 20 default playouts, c = 0.5, 1 sec per mcts
# simple_mcts_timelimited_Agent(1, 0.08, 1)
# simple_mcts_timelimited_Agent(3, 0.1, 5)
# game = Game(simple_mcts_timelimited_Agent(3, 0.08, 5), simple_mcts_timelimited_full(3, 0.05, 5), 10) #-> 0.8

game = Game(simple_mcts_timelimited_Agent(3, 0.08, 5), mcts_smart(3, 0.08, 1), 10)

game.run()
print(game.win_rate())
end = monotonic()
print("Time elapsed during matches:", end - start)


#===============
#  0.6
#  Time elapsed during matches: 340.4184336870003
#===============
# Result for same agents was 0.6 proving that my testing method is all correct
# (I was afraid it might somehow be bugging and only return 1 or 0)
#===============

0
1
2
2
2
2
2
2
3
4
0.4
Time elapsed during matches: 2512.8691186649994


In [66]:
### IGNORE
### JUST A CHECK THAT MY CODE WORKS

start = monotonic()
# benchmark test - 1 vs 20 default playouts, c = 0.5, 1 sec per mcts
# simple_mcts_timelimited_Agent(1, 0.08, 1)
# simple_mcts_timelimited_Agent(3, 0.1, 5)
# game = Game(simple_mcts_timelimited_Agent(3, 0.08, 5), simple_mcts_timelimited_full(3, 0.05, 5), 10) #-> 0.8


game = Game(mcts_smart_breadth(1, 0.06, 1), mcts_smart_breadth(1, 0.07, 1), 10)

game.run()
print(game.win_rate())
end = monotonic()
print("Time elapsed during matches:", end - start)


#===============
#  0.6
#  Time elapsed during matches: 340.4184336870003
#===============
# Result for same agents was 0.6 proving that my testing method is all correct
# (I was afraid it might somehow be bugging and only return 1 or 0)
#===============

0
1
2
3
3
3
4
4
5
5
0.5
Time elapsed during matches: 656.6836823869999


In [None]:
# 0.6 for second less extreme time strategy
# now trying third, finally
start = monotonic()

game = Game(mcts_smart(3, 0.1, 0), simple_mcts_timelimited_Agent(3, 0.1, 5), 5)
game.run()
print(game.win_rate())

game = Game(mcts_smart(3, 0.1, 0), simple_mcts_timelimited_Agent(3, 0.1, 5), 5)
game.run()
print(game.win_rate())

game = Game(mcts_smart(3, 0.1, 0), simple_mcts_timelimited_Agent(3, 0.1, 5), 5)
game.run()
print(game.win_rate())

end = monotonic()
print("Time elapsed during matches:", end - start)

#0.8, 0.4,     ~0.6


1
2
3
3
4
0.8
0
1
1
2
2
0.4
0
