In [1]:
from Game import *

Version:  0.2.6


In [2]:
def initial_state():
    return randint(15,25)

def valid_moves(state,player):
    if state==1:
        return [1]
    elif state==2:
        return [1,2]
    else:
        return [1,2,3]
        
def show_state(state):
    print ("There are ",state," sticks left.")

def update_state(state,player,move):
    new_state=state-move
    return new_state

def win_status(state,player):

    if state==1:
        return 'win'
    
    elif state==0:
        return 'lose'
    
    else:
        return None


def human_move(state,player):

    move=input('Take 1, 2 or 3 sticks ')
    return move


def perfect_move(state,player):
    move=(state-1)%4
    if move==0:
        move=1
    return move

def random_move(state,player):
    move=random_choice(valid_moves(state,player))
    return move


human_agent=Agent(human_move)
random_agent=Agent(random_move)
perfect_agent=Agent(perfect_move)


In [3]:
def Q_move(state,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state
    
    alpha=info.alpha  # learning rate
    gamma=info.gamma  # memory 
    epsilon=info.epsilon  # probability of doing random move
    
    if not state in Q:
        Q[state]=Table()
        for action in valid_moves(state,player):
            Q[state][action]=0.0
            
    if random.random()<epsilon:  # random move
        action=random_choice(Q[state])
    else:
        action=top_choice(Q[state])
        
        
    if not last_action is None:  # anything but the first move
        r=0.0
        Q[last_state][last_action]+=alpha*(r + 
            gamma*max([Q[state][a] for a in Q[state]]) -
            Q[last_state][last_action] )
        
    return action

def Q_post(status,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state
    
    alpha=info.alpha  # learning rate
    gamma=info.gamma  # memory 
    epsilon=info.epsilon  # probability of doing random move

    if status=='lose':
        r=-1.0
    elif status=='win':
        r=1.0
    else:
        r=0.0
        
    if not last_action is None:  # anything but the first move
        Q[last_state][last_action]+=alpha*(r -
            Q[last_state][last_action] )
        

In [4]:
Q_agent=Agent(Q_move)
Q_agent.post=Q_post

Q_agent.Q=Remember(filename='Q_data.json')
Q_agent.alpha=0.3  # learning rate
Q_agent.gamma=0.9  # memory
Q_agent.epsilon=0.1  # chance of making a random move

In [6]:
g=Game()
g.run(Q_agent,perfect_agent)
g.report()

====
Game  1
There are  15  sticks left.
Player 1 moves 3
There are  12  sticks left.
Player 2 moves 3
There are  9  sticks left.
Player 1 moves 3
There are  6  sticks left.
Player 2 moves 1
There are  5  sticks left.
Player 1 moves 2
There are  3  sticks left.
Player 2 moves 2
There are  1  sticks left.
Player  2 won.
Total number of games:  1
Winning 0.00 percent
Losing 100.00 percent
Tie 0.00 percent


In [7]:
Q_agent.Q

{5: {1: 0.0, 2: -0.3, 3: -0.3},
 9: {1: 0.0, 2: 0.0, 3: 0.0},
 13: {1: 0.0, 2: 0.0, 3: 0.0},
 15: {1: 0.0, 2: 0.0, 3: 0.0},
 18: {1: 0.0, 2: 0.0, 3: 0.0}}

While learning, set epsilon to 0.1

In [9]:
Q_agent.epsilon=0.1

g=Game(number_of_games=1000)
g.display=False
g.run(Q_agent,perfect_agent)
Remember(Q_agent.Q,filename='Q_data.json')
g.report()

Total number of games:  1000
Winning 52.60 percent
Losing 47.40 percent
Tie 0.00 percent


In [10]:
Q_agent.Q

{4: {1: -0.9999987006518856, 2: -0.9999889557232575, 3: 0.9999999999999999},
 5: {1: -0.9999999999999999, 2: -0.9999999999999999, 3: -0.9999999999999999},
 8: {1: -0.899997584229671, 2: -0.8999993573327671, 3: 0.8999999999999998},
 9: {1: -0.8999999999999998, 2: -0.8999999999999998, 3: -0.8999999999999998},
 12: {1: -0.8099903460229622, 2: -0.8099989455410559, 3: 0.8099999999999997},
 13: {1: -0.8099999999999997, 2: -0.8099999999999997, 3: -0.8099999999999997},
 15: {1: -0.6971149344558585, 2: 0.7289999999999996, 3: -0.7838892613793348},
 16: {1: -0.7289874345941785, 2: -0.7289998866946924, 3: 0.7289999999999996},
 17: {1: -0.7289999999999996, 2: -0.7289999999999996, 3: -0.7289999999999996},
 18: {1: 0.6560999999999996, 2: -0.5452048208018773, 3: -0.6751207186925269},
 19: {1: -0.5487382389206135, 2: 0.6560999999999996, 3: -0.6650656245835707},
 20: {1: -0.6560353979861371, 2: -0.6560888375914812, 3: 0.6560999999999996},
 21: {1: -0.6560999999999996, 2: -0.6560999999999996, 3: -0.65609

When we want to see how good it really is, turn off epsilon (no random moves)

In [11]:
Q_agent.epsilon=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(Q_agent,perfect_agent)
Remember(Q_agent.Q,filename='Q_data.json')
g.report()

Total number of games:  1000
Winning 71.20 percent
Losing 28.80 percent
Tie 0.00 percent


## Can a Q-agent play against another?

In [12]:
Q1_agent=Agent(Q_move)
Q1_agent.post=Q_post

Q1_agent.Q=Remember(filename='Q1_data.json')
Q1_agent.alpha=0.3  # learning rate
Q1_agent.gamma=0.9  # memory
Q1_agent.epsilon=0.1  # chance of making a random move

Q2_agent=Agent(Q_move)
Q2_agent.post=Q_post

Q2_agent.Q=Remember(filename='Q2_data.json')
Q2_agent.alpha=0.3  # learning rate
Q2_agent.gamma=0.9  # memory
Q2_agent.epsilon=0.1  # chance of making a random move


In [13]:
Q1_agent.epsilon=0.1
Q2_agent.epsilon=0.1

g=Game(number_of_games=1000)
g.display=False
g.run(Q1_agent,Q2_agent)
Remember(Q1_agent.Q,filename='Q1_data.json')
Remember(Q2_agent.Q,filename='Q2_data.json')
g.report()

Total number of games:  1000
Winning 60.60 percent
Losing 39.40 percent
Tie 0.00 percent


In [14]:
Q1_agent.epsilon=0.0
Q2_agent.epsilon=0.0
Q1_agent.alpha=0.0
Q2_agent.alpha=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(Q1_agent,perfect_agent)
g.report()

Total number of games:  1000
Winning 73.20 percent
Losing 26.80 percent
Tie 0.00 percent


In [15]:
Q1_agent.epsilon=0.0
Q2_agent.epsilon=0.0
Q1_agent.alpha=0.0
Q2_agent.alpha=0.0

g=Game(number_of_games=1000)
g.display=False
g.run(perfect_agent,Q2_agent)
g.report()

Total number of games:  1000
Winning 71.80 percent
Losing 28.20 percent
Tie 0.00 percent


## things you should do:

1. run this more times and explain the structure of the json files
2. run this with tic tac toe