In [2]:
from Game import *
from Game.minimax import *
from tqdm import tqdm

Version:  0.3.02


# Connect 3

In [6]:
def initial_state():
    return Board(4,5)

def valid_moves(state,player):
    moves=[]

    for i in range(5):
        if state[i]==0:
            moves.append(i)

    return moves

def show_state(state,player):
    print(state)

def update_state(state,player,move):
    if isinstance(move,int):
        new_state=state

        i=move+15

        while new_state[i]!=0:
            i-=5

        new_state[i]=player

    return new_state

def win_status(state,player):
    # 0  1  2  3  4 
    # 5  6  7  8  9 
    # 10 11 12 13 14 
    # 15 16 17 18 19

    if player==1:
        other_player=2

    else:
        other_player=1

    for row in state.rows(3):
        if row==[player,player,player]:
            return "win"
            
    for col in state.cols(3):
        if col==[player,player,player]:
            return "win"

    for diag in state.diags(3):
        if diag==[player,player,player]:
            return "win"

    if not valid_moves(state,other_player):
        return "stalemate"

    return None

## Agents

In [10]:
def random_move(state,player):
    moves=valid_moves(state,player)
    return random.choice(moves)

random_agent=Agent(random_move)

def human_move(state,player):
    while True:
        move = int(input("What is your move? "))

        if move not in valid_moves(state,player):
            print("Illegal move.")
        else:
            break
        
    return move

human_agent=Agent(human_move) 

In [37]:
def minimax_move(state,player):
    values,actions=minimax_values(state,player,maxdepth=5,display=False)
    return top_choice(actions,values)
minimax_agent=Agent(minimax_move)

In [16]:
def first_move_heuristic(state,player):
    if state[17]==0:
        move=2
        return move
    else:
        moves=valid_moves(state,player)
        return random.choice(moves)

smarter_agent=Agent(first_move_heuristic)

### Skittles Agent

In [21]:
def skittles_move(state,player,info):
    S=info.S
    last_state=info.last_state
    last_action=info.last_action
    verbose=info.verbose

    
    if verbose:
        print("\t","Player ",player," is thinking...")
        print("\t","State: ",state)
        print("\t","Table:",S)
        print("\t","Last state,action: ",last_state,last_action)
    
    # make/adjust the table

    if state not in S:
        # add a row to the table for each move
        S[state]=Table()
        moves=valid_moves(state,player)
        for action in moves:
            S[state][action]=3  # number of skittles/beads for each move
    
        if verbose:
            print("\t","State ",state,"unknown...added to table")
            print("\t","Table",S)
    
    move=weighted_choice(S[state])
    if verbose:
        print("\t","Choosing from S[",state,"]",S[state],"....Move: ",move)

    if move is None:  # there are no skittles in this row
        if last_state:
            if verbose:
                print("\t","No possible moves!")
                print("\t","Modifying the table: removing one skittle from (state,action) ",last_state,last_action)
            S[last_state][last_action]=S[last_state][last_action]-1
            
            if verbose:
                print("\t","Table:",S)
                
            if S[last_state][last_action]<0:                
                S[last_state][last_action]=0
                if verbose:
                    print("\t","Negative skittles...fixing.")
                    print("\t","Table:",S)                
        else:
            if verbose:
                print("\t","Started in a bad state ",state,"with no moves and no last state.  Won't modify table.")
                  
                  

        move=random_move(state,player)

    
    return move

def skittles_after(status,player,info):
    S=info.S
    last_state=info.last_state
    last_action=info.last_action
    verbose=info.verbose

    if verbose:
        print("\t","End of Game adjustments")
        print("\t","Player ",player," is thinking...")
        print("\t","Win Status: ",status)
        print("\t","Table:",S)
        print("\t","Last state,action: ",last_state,last_action)
        
    if status=='lose':
        if last_state:
            S[last_state][last_action]=S[last_state][last_action]-1
            if S[last_state][last_action]<0:
                S[last_state][last_action]=0
                
            if verbose:
                print("\t","Modifying the table: removing one skittle from (state,action) ",last_state,last_action)
                print("\t","Table:",S)

                
        else:
            if verbose:
                print("\t","No last state, so nothing to learn.")
            
                
    else:
        if verbose:
            print("\t","No adjustments needed.")
        
    # does this double-count the learning if you lose on your own turn

In [24]:
skittles_agent1=Agent(skittles_move)
skittles_agent1.S=Table()
skittles_agent1.post=skittles_after
skittles_agent1.verbose=False

skittles_agent2=Agent(skittles_move)
skittles_agent2.S=Table()
skittles_agent2.post=skittles_after
skittles_agent2.verbose=False

### Q-Agent

In [60]:
def Q_move(state,player,info):
    Q=info.Q
    last_state=info.last_state
    last_action=info.last_action
    learning=info.learning
    
    α=info.α  # learning rate
    ϵ=info.ϵ  # how often to take a random move
    γ=info.γ  # memory constant -- how quickly does the table update back in time (earlier in the game)
    
    # \alpha <hit tab>    α
    # \epsilon <hit tab>  ϵ
    # \gamma <hit tab>    γ
    
    if state not in Q:
        actions=valid_moves(state,player)
        Q[state]=Table()
        for action in actions:
            Q[state][action]=0  # initial value of table
    
    if learning:
        if random.random()<ϵ:  # take a random move occasionally to explore the environment
            move=random_move(state,player)
        else:
            move=top_choice(Q[state])
    else:
        move=top_choice(Q[state])
    
    if not last_action is None:  # not the first move
        reward=0
        
        # learn
        if learning:
            Q[last_state][last_action]+=α*(reward +
                        γ*max([Q[state][a] for a in Q[state]]) - Q[last_state][last_action])
    
    return move

In [62]:
def Q_after(status,player,info):
    Q=info.Q
    last_state=info.last_state
    last_action=info.last_action
    learning=info.learning
    
    α=info.α  # learning rate
    ϵ=info.ϵ  # how often to take a random move
    γ=info.γ  # memory constant -- how quickly does the table update back in time (earlier in the game)
    
    # \alpha <hit tab>    α
    # \epsilon <hit tab>  ϵ
    # \gamma <hit tab>    γ

    if status=='lose':
        reward=-1
    elif status=='win':
        reward=1
    elif status=='stalemate':
        reward=.5 # value stalemate a little closer to a win
    else:
        reward=0
    
    
    if learning:
        Q[last_state][last_action]+=α*(reward - Q[last_state][last_action])

In [64]:
Q1_agent=Agent(Q_move)
Q1_agent.post=Q_after
Q1_agent.Q=Table()  # makes an empty table
Q1_agent.learning=True

Q1_agent.α=0.8  # learning rate
Q1_agent.ϵ=0.5  # how often to take a random move
Q1_agent.γ=0.9  # memory constant -- how quickly does the table update back in time (earlier in the game)

In [67]:
Q2_agent=Agent(Q_move)
Q2_agent.post=Q_after
Q2_agent.Q=Table()  # makes an empty table
Q2_agent.learning=True

Q2_agent.α=0.8  # learning rate
Q2_agent.ϵ=0.5  # how often to take a random move
Q2_agent.γ=0.9  # memory constant -- how quickly does the table update back in time (earlier in the game)

## Load the Previous Tables

In [28]:
skittles_agent1.S=LoadTable("connect3_skittles1.json")
skittles_agent2.S=LoadTable("connect3_skittles2.json")

In [90]:
Q1_agent.Q=LoadTable("Connect3 Q1.json")
Q2_agent.Q=LoadTable("Connect3 Q2.json")

## Testing Learning Agents and Minimax

In [93]:
Q1_agent.learning=False
Q2_agent.learning=False

In [94]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(minimax_agent,Q2_agent)
g.report()

Total number of games:  1000
Winning 58.90 percent
Losing 41.10 percent
Tie 0.00 percent


In [97]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(random_agent,minimax_agent)
g.report()

Total number of games:  1000
Winning 18.60 percent
Losing 81.40 percent
Tie 0.00 percent


In [98]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(random_agent,Q2_agent)
g.report()

Total number of games:  1000
Winning 5.40 percent
Losing 94.60 percent
Tie 0.00 percent


In [79]:
skittles_agent1.learning=False
skittles_agent2.learning=False

In [80]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(minimax_agent,skittles_agent2)
g.report()

Total number of games:  1000
Winning 68.10 percent
Losing 31.90 percent
Tie 0.00 percent


In [81]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(random_agent,minimax_agent)
g.report()

Total number of games:  1000
Winning 19.50 percent
Losing 80.50 percent
Tie 0.00 percent


In [82]:
g=Game(number_of_games=1000)
g.display=False
result=g.run(random_agent,skittles_agent2)
g.report()

Total number of games:  1000
Winning 39.50 percent
Losing 60.50 percent
Tie 0.00 percent


In [110]:
g=Game()
g.run(skittles_agent1,human_agent)

====
Game  1
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 

Player 1 moves 1
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  1  0  0  0 



What is your move?  2


Player 2 moves 2
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  1  2  0  0 

Player 1 moves 3
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  1  2  1  0 



What is your move?  2


Player 2 moves 2
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  2  0  0 
 0  1  2  1  0 

Player 1 moves 2
 0  0  0  0  0 
 0  0  1  0  0 
 0  0  2  0  0 
 0  1  2  1  0 



What is your move?  1


Player 2 moves 1
 0  0  0  0  0 
 0  0  1  0  0 
 0  2  2  0  0 
 0  1  2  1  0 

Player 1 moves 3
 0  0  0  0  0 
 0  0  1  0  0 
 0  2  2  1  0 
 0  1  2  1  0 



What is your move?  3


Player 2 moves 3
 0  0  0  0  0 
 0  0  1  2  0 
 0  2  2  1  0 
 0  1  2  1  0 

Player 1 moves 4
 0  0  0  0  0 
 0  0  1  2  0 
 0  2  2  1  0 
 0  1  2  1  1 

Player  1 won.


[1]

In [115]:
g=Game()
g.run(human_agent,Q2_agent)

====
Game  1
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 



What is your move?  2


Player 1 moves 2
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  1  0  0 

Player 2 moves 3
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  1  2  0 



What is your move?  1


Player 1 moves 1
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 0  1  1  2  0 

Player 2 moves 0
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  0  0  0 
 2  1  1  2  0 



What is your move?  2


Player 1 moves 2
 0  0  0  0  0 
 0  0  0  0  0 
 0  0  1  0  0 
 2  1  1  2  0 

Player 2 moves 0
 0  0  0  0  0 
 0  0  0  0  0 
 2  0  1  0  0 
 2  1  1  2  0 



What is your move?  0


Player 1 moves 0
 0  0  0  0  0 
 1  0  0  0  0 
 2  0  1  0  0 
 2  1  1  2  0 

Player 2 moves 1
 0  0  0  0  0 
 1  0  0  0  0 
 2  2  1  0  0 
 2  1  1  2  0 



What is your move?  2


Player 1 moves 2
 0  0  0  0  0 
 1  0  1  0  0 
 2  2  1  0  0 
 2  1  1  2  0 

Player  1 won.


[1]