In [9]:
from Game import *

def initial_state():
    return randint(15,25)

def valid_moves(state,player):
    if state==1:
        return [1]
    elif state==2:
        return [1,2]
    else:
        return [1,2,3]
        
def show_state(state):
    print ("There are ",state," sticks left.")

def update_state(state,player,move):
    new_state=state-move
    return new_state

def win_status(state,player):

    if state==1:
        return 'win'
    
    elif state==0:
        return 'lose'
    
    else:
        return None


def human_move(state,player):

    move=input('Take 1, 2 or 3 sticks ')
    return move


def perfect_move(state,player):
    move=(state-1)%4
    if move==0:
        move=1
    return move

def random_move(state,player):
    move=random_choice(valid_moves(state,player))
    return move


human_agent=Agent(human_move)
random_agent=Agent(random_move)
perfect_agent=Agent(perfect_move)


In [13]:
def Q_move(state,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state
    
    alpha=info.alpha  # learning rate
    gamma=info.gamma  # memory 
    epsilon=info.epsilon  # probability of doing random move
    
    if not state in Q:
        Q[state]=Table()
        for action in valid_moves(state,player):
            Q[state][action]=0.0
            
    if random.random()<epsilon:  # random move
        action=random_choice(Q[state])
    else:
        action=top_choice(Q[state])
        
        
    if not last_action is None:  # anything but the first move
        r=0.0
        Q[last_state][last_action]+=alpha*(r + 
            gamma*max([Q[state][a] for a in Q[state]]) -
            Q[last_state][last_action] )
        
    return action

def Q_post(status,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state
    
    alpha=info.alpha  # learning rate
    gamma=info.gamma  # memory 
    epsilon=info.epsilon  # probability of doing random move

    if status=='lose':
        r=-1.0
    elif status=='win':
        r=1.0
    else:
        r=0.0
        
    if not last_action is None:  # anything but the first move
        Q[last_state][last_action]+=alpha*(r -
            Q[last_state][last_action] )
        

In [14]:
Q_agent=Agent(Q_move)
Q_agent.post=Q_post

Q_agent.Q=Remember(filename='Q_data.dat')
Q_agent.alpha=0.3  # learning rate
Q_agent.gamma=0.9  # memory
Q_agent.epsilon=0.1  # chance of making a random move

In [15]:
g=Game()
g.run(Q_agent,perfect_agent)
g.report()

====
Game  1
There are  20  sticks left.
Player 1 moves 3
There are  17  sticks left.
Player 2 moves 1
There are  16  sticks left.
Player 1 moves 3
There are  13  sticks left.
Player 2 moves 1
There are  12  sticks left.
Player 1 moves 3
There are  9  sticks left.
Player 2 moves 1
There are  8  sticks left.
Player 1 moves 3
There are  5  sticks left.
Player 2 moves 1
There are  4  sticks left.
Player 1 moves 3
There are  1  sticks left.
Player  1 won.
Total number of games:  1
Winning 100.00 percent
Losing 0.00 percent
Tie 0.00 percent


In [16]:
Q_agent.Q

{4: {1: 0.0, 2: 0.0, 3: 0.3},
 8: {1: 0.0, 2: 0.0, 3: 0.0},
 12: {1: 0.0, 2: 0.0, 3: 0.0},
 16: {1: 0.0, 2: 0.0, 3: 0.0},
 20: {1: 0.0, 2: 0.0, 3: 0.0}}

While learning, set epsilon to 0.1

In [17]:
Q_agent.epsilon=0.1

g=Game(number_of_games=1000)
g.display=False
g.run(Q_agent,perfect_agent)
Remember(Q_agent.Q,filename='Q_data.dat')
g.report()

Total number of games:  1000
Winning 49.50 percent
Losing 50.50 percent
Tie 0.00 percent


In [18]:
Q_agent.Q

{4: {1: -0.9802267325699999, 2: -0.9976736948601279, 3: 0.9999999999999999},
 5: {1: -0.9999999999999999, 2: -0.9999999999999999, 3: -0.9999999999999999},
 8: {1: -0.8999458497115415, 2: -0.8996447914918484, 3: 0.8999999999999998},
 9: {1: -0.8999999999999998, 2: -0.8999999999999998, 3: -0.8999999999999998},
 12: {1: -0.8095456441527977, 2: -0.8095410356342704, 3: 0.8099999999999997},
 13: {1: -0.8099999999999997, 2: -0.8099999999999997, 3: -0.8099999999999997},
 15: {1: -0.2687603840722708, 2: 0.7289999999996157, 3: -0.6357260094845962},
 16: {1: -0.7275125008615304, 2: -0.7281594284287439, 3: 0.7289999999999996},
 17: {1: -0.7289999999999976, 2: -0.7289999999999981, 3: -0.728999999999998},
 18: {1: 0.656099999999967, 2: -0.5563359303749515, 3: -0.548915855099521},
 19: {1: -0.19655038985347068,
  2: 0.6560999999998212,
  3: -0.029878794000000004},
 20: {1: -0.6122984463304509, 2: -0.6454440910244558, 3: 0.6560999999999996},
 21: {1: -0.656099999483206, 2: -0.6560999994351752, 3: -0.6

When we want to see how good it really is, turn off epsilon (no random moves)

In [19]:
Q_agent.epsilon=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(Q_agent,perfect_agent)
Remember(Q_agent.Q,filename='Q_data.dat')
g.report()

Total number of games:  1000
Winning 72.00 percent
Losing 28.00 percent
Tie 0.00 percent


## Can a Q-agent play against another?

In [20]:
Q1_agent=Agent(Q_move)
Q1_agent.post=Q_post

Q1_agent.Q=Remember(filename='Q1_data.dat')
Q1_agent.alpha=0.3  # learning rate
Q1_agent.gamma=0.9  # memory
Q1_agent.epsilon=0.1  # chance of making a random move

Q2_agent=Agent(Q_move)
Q2_agent.post=Q_post

Q2_agent.Q=Remember(filename='Q2_data.dat')
Q2_agent.alpha=0.3  # learning rate
Q2_agent.gamma=0.9  # memory
Q2_agent.epsilon=0.1  # chance of making a random move


Resetting the database Q1_data.dat
Resetting the database Q2_data.dat


In [21]:
Q1_agent.epsilon=0.1
Q2_agent.epsilon=0.1

g=Game(number_of_games=1000)
g.display=False
g.run(Q1_agent,Q2_agent)
Remember(Q1_agent.Q,filename='Q1_data.dat')
Remember(Q2_agent.Q,filename='Q2_data.dat')
g.report()

Total number of games:  1000
Winning 60.20 percent
Losing 39.80 percent
Tie 0.00 percent


In [22]:
Q1_agent.epsilon=0.0
Q2_agent.epsilon=0.0
Q1_agent.alpha=0.0
Q2_agent.alpha=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(Q1_agent,perfect_agent)
g.report()

Total number of games:  1000
Winning 70.70 percent
Losing 29.30 percent
Tie 0.00 percent


In [23]:
Q1_agent.epsilon=0.0
Q2_agent.epsilon=0.0
Q1_agent.alpha=0.0
Q2_agent.alpha=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(perfect_agent,Q2_agent)
g.report()

Total number of games:  1000
Winning 72.80 percent
Losing 27.20 percent
Tie 0.00 percent
