from https://jeffbradberry.com/posts/2015/09/intro-to-monte-carlo-tree-search/

In [1]:
from Game import *
from Game.minimax import *

Version:  0.2.5


In [2]:
def initial_state():
    return 21

def valid_moves(state,player):
    if state==1:
        return [1]
    elif state==2:
        return [1,2]
    else:
        return [1,2,3]
        
def show_state(state):
    print ("There are ",state," sticks left.")

def update_state(state,player,move):
    new_state=state-move
    return new_state

def win_status(state,player):

    if state==1:
        return 'win'
    
    elif state==0:
        return 'lose'
    
    else:
        return None

def random_move(state,player):
    move=random_choice(valid_moves(state,player))
    return move


def human_move(state,player):

    move=input('Take 1, 2 or 3 sticks ')
    return move


def minimax_move(state,player):

    values,moves=minimax_values(state,player)
    return top_choice(moves,values)
    
    
minimax_agent=Agent(minimax_move)
random_agent=Agent(random_move)
human_agent=Agent(human_move)


In [3]:
from copy import deepcopy
import datetime

def mcts_values(current_state,player,seconds=30,max_moves=100):
    moves=valid_moves(current_state,player)
    if len(moves)==1:
        return moves[0]
    

    original_state=deepcopy(current_state)
    
    T=Table()
    
    calculation_time=datetime.timedelta(seconds=seconds)
    
    begin=datetime.datetime.utcnow()
    
    games=0
    while datetime.datetime.utcnow()-begin< calculation_time:
        mcts_run_simulation(current_state,player,max_moves,T)
        games+=1
        
    
    available_states=[update_state(deepcopy(current_state),player,move)
                                    for move in moves]    

    percent_wins,move=max(
        (T[(S,player)].get('wins',0)/T[(S,player)].get('plays',1),
         move) for S,move in zip(available_states,moves)
    )
    
    
    values=[T[(S,player)].get('wins',0)/T[(S,player)].get('plays',1) for S in available_states]
    
    # sort by value
    values,moves=mysort(values,moves,reverse=True)

    
    return values,moves
    
def mcts_move(state,player):

    values,moves=mcts_values(state,player)
    return top_choice(moves,values)
    
    
from math import log, sqrt
    
def mcts_run_simulation(state,player,max_moves,T):
    visited_state_player=[]
    original_player=player
    
    C=1.4  # what is this?
    
    if player==1:
        other_player=2
    else:
        other_player=1
    
    
    first_time=True
    for t in range(max_moves):
        state=deepcopy(state)
        
        moves=valid_moves(state,player)
        available_states=[update_state(deepcopy(state),player,move)
                                        for move in moves]    
        
        if all( [(S,player) in T for S in available_states] ):
            plays=[T[(S,player)]['plays'] for S in available_states]
            wins=[T[(S,player)]['wins'] for S in available_states]
            
            log_total = log(sum(plays))
            values=[w/p+C*sqrt(log_total/p) for w,p in zip(wins,plays)]
            values,moves=mysort(values,moves,reverse=True)
            
            move=top_choice(moves,values)
        else:
            move=random.choice(moves)
        
        
        
        state=update_state(state,player,move)
        status=win_status(state,player)
        
        # note - this is the state *after* the move by player
        visited_state_player.append((state,player))
        if first_time and not (state,player) in T:  # not sure why only the first time this call
            T[(state,player)]={'plays':0,'wins':0}
            first_time=False
        
        if not status is None:  # end game
            break
            
        player,other_player=other_player,player
        
    if status=='win':
        winner=player
    elif status=='lose':
        winner=other_player
    else:
        winner=None
        
    for state,player in visited_state_player:
        if (state,player) not in T:
            continue
            
        T[(state,player)]['plays']+=1
        if player==winner:
            T[(state,player)]['wins']+=1
            
    
    

In [4]:
mcts_values(6,1,seconds=5)

([0.9979108252240365, 0.3157894736842105, 0.047619047619047616], 18151, 18189, 1)
([0.9979108252240365, 0.3157894736842105, 0.047619047619047616], 12, 38, 2)
([0.9979108252240365, 0.3157894736842105, 0.047619047619047616], 1, 21, 3)


([0.9979108252240365, 0.3157894736842105, 0.047619047619047616], [1, 3, 2])