In [1]:
from Game import *

Version:  0.2.27


## Rules of the Game

In [2]:
def initial_state():
    board=Board(3,3)
    board.pieces=['.','X','O']
    return board

def show_state(state):
    print(state)
    
def valid_moves(state,player):  # returns a list of all of the possible moves given a state
    moves=[]
    
    for i in range(9):
        if state[i]==0:
            moves.append(i)
    
    return moves
    
def update_state(state,player,move):
    
    new_state=state
    state[move]=player
    
    return new_state

def win_status(state,player):
    # "win" if the player wins
    # "lose" if the player loses
    # "stalemate" if a tie
    # None if the game continues
    
    # 0 1 2
    # 3 4 5
    # 6 7 8
    
    if state[0]==player and state[1]==player and state[2]==player:
        return "win"
    if state[3]==player and state[4]==player and state[5]==player:
        return "win"
    if state[6]==player and state[7]==player and state[8]==player:
        return "win"
    if state[0]==player and state[3]==player and state[6]==player:
        return "win"
    if state[1]==player and state[4]==player and state[7]==player:
        return "win"
    if state[2]==player and state[5]==player and state[8]==player:
        return "win"
    if state[0]==player and state[4]==player and state[8]==player:
        return "win"
    if state[6]==player and state[4]==player and state[2]==player:
        return "win"
    
    if player==1:
        other_player=2
    else:
        other_player=1
        
        
    if not valid_moves(state,other_player):
        return "stalemate"
    
    
    return None
    

## Agents

In [3]:
def human_move(state,player):
    print("""
     0 1 2
     3 4 5
     6 7 8
    """)
    
    move=int(input("What move?"))
    
    return move

human_agent=Agent(human_move)

In [4]:
def random_move(state,player):
    possible_moves=valid_moves(state,player)
    move=random.choice(possible_moves)
    return move


random_agent=Agent(random_move)

In [5]:
from Game.minimax import *
def minimax_move(state,player):

    values,moves=minimax_values(state,player,display=True)
    return top_choice(moves,values)


minimax_agent=Agent(minimax_move)

In [6]:
def skittles_move(state,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state
    
    
    # if Ive never seen this state before
    if not state in S:
        actions=valid_moves(state,player)

        S[state]=Table()
        for action in actions:
            S[state][action]=3     
    
    move=weighted_choice(S[state])  # weighted across actions
    
    # what if there are no skittles for a particular state?
    # move is None in that case
    
    if move is None:
        # learn a little bit
        if last_state:
            S[last_state][last_action]=S[last_state][last_action]-1
            if S[last_state][last_action]<0:
                S[last_state][last_action]=0
        
        move=random_move(state,player)
    
    return move

def skittles_after(status,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state

    if status=='lose':
        # learn a little bit
        S[last_state][last_action]=S[last_state][last_action]-1
        if S[last_state][last_action]<0:
            S[last_state][last_action]=0
        
    


skittles_agent=Agent(skittles_move)
skittles_agent.S=Table()
skittles_agent.post=skittles_after


skittles_agent2=Agent(skittles_move)
skittles_agent2.S=Table()
skittles_agent2.post=skittles_after

In [7]:
g=Game()
g.run(minimax_agent,skittles_agent)

====
Game  1
 .  .  . 
 .  .  . 
 .  .  . 

  Choice Time: 2.3205368518829346 seconds 
Player 1 moves 5
 .  .  . 
 .  .  X 
 .  .  . 

Player 2 moves 8
 .  .  . 
 .  .  X 
 .  .  O 

  Choice Time: 0.17641425132751465 seconds 
Player 1 moves 0
 X  .  . 
 .  .  X 
 .  .  O 

Player 2 moves 2
 X  .  O 
 .  .  X 
 .  .  O 

  Choice Time: 0.012498855590820312 seconds 
Player 1 moves 3
 X  .  O 
 X  .  X 
 .  .  O 

Player 2 moves 6
 X  .  O 
 X  .  X 
 O  .  O 

  Choice Time: 0.0004279613494873047 seconds 
Player 1 moves 4
 X  .  O 
 X  .  X 
 O  .  O 

Player  1 won.


[1]

In [9]:
skittles_agent.S

{(0, 0, 0, 1, 0, 0, 0, 0, 0): {0: 3, 1: 3, 2: 3, 4: 3, 5: 3, 6: 3, 7: 3, 8: 3},
 (0, 0, 0, 1, 0, 2, 1, 0, 0): {0: 3, 1: 3, 2: 3, 4: 3, 7: 3, 8: 2}}

In [12]:
g=Game(100)
g.display=False
result=g.run(skittles_agent,skittles_agent2)
g.report()

Total number of games:  100
Winning 64.00 percent
Losing 26.00 percent
Tie 10.00 percent


In [14]:
SaveTable(skittles_agent.S,"ttt player 1 skittles.json")
SaveTable(skittles_agent2.S,"ttt player 2 skittles.json")

In [None]:
skittles_agent.S=Table()
total_number_of_games=0

In [28]:
number_of_batches=100
wins=[]
losses=[]
ties=[]

for i in range(number_of_batches):
    N=1000
    g=Game(N)
    g.display=False
    result=g.run(skittles_agent,random_agent)
    wins.append(sum([r==1 for r in result]))
    losses.append(sum([r==2 for r in result]))
    ties.append(sum([r==0 for r in result]))
    total_number_of_games+=N
    print("total games ",total_number_of_games,"W L T",wins[-1],losses[-1],ties[-1])

total games  101000 W L T 837 2 161
total games  102000 W L T 839 9 152
total games  103000 W L T 840 3 157
total games  104000 W L T 838 3 159
total games  105000 W L T 844 2 154
total games  106000 W L T 826 2 172
total games  107000 W L T 828 3 169
total games  108000 W L T 817 2 181
total games  109000 W L T 825 3 172
total games  110000 W L T 837 2 161
total games  111000 W L T 837 2 161
total games  112000 W L T 837 2 161
total games  113000 W L T 838 5 157
total games  114000 W L T 835 4 161
total games  115000 W L T 825 2 173
total games  116000 W L T 843 4 153
total games  117000 W L T 819 3 178
total games  118000 W L T 833 2 165
total games  119000 W L T 841 3 156
total games  120000 W L T 849 2 149
total games  121000 W L T 840 1 159
total games  122000 W L T 833 1 166
total games  123000 W L T 842 6 152
total games  124000 W L T 841 3 156
total games  125000 W L T 823 3 174
total games  126000 W L T 809 4 187
total games  127000 W L T 841 0 159
total games  128000 W L T 85

# Can we modify the code to have it rewarded with a win?

In [29]:
def skittles_move(state,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state
    
    
    # if Ive never seen this state before
    if not state in S:
        actions=valid_moves(state,player)

        S[state]=Table()
        for action in actions:
            S[state][action]=3     
    
    move=weighted_choice(S[state])  # weighted across actions
    
    # what if there are no skittles for a particular state?
    # move is None in that case
    
    if move is None:
        # learn a little bit
        if last_state:
            S[last_state][last_action]=S[last_state][last_action]-1
            if S[last_state][last_action]<0:
                S[last_state][last_action]=0
        
        move=random_move(state,player)
    
    return move

def skittles_after(status,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state

    if status=='lose':
        # learn a little bit
        S[last_state][last_action]=S[last_state][last_action]-1
        if S[last_state][last_action]<0:
            S[last_state][last_action]=0
    elif status=='win':
        # learn a little bit
        S[last_state][last_action]=S[last_state][last_action]+1
        if S[last_state][last_action]<0:
            S[last_state][last_action]=0
        
    


skittles_agent=Agent(skittles_move)
skittles_agent.S=Table()
skittles_agent.post=skittles_after


skittles_agent2=Agent(skittles_move)
skittles_agent2.S=Table()
skittles_agent2.post=skittles_after

In [30]:
skittles_agent.S=Table()
total_number_of_games=0

In [31]:
number_of_batches=100
wins=[]
losses=[]
ties=[]

for i in range(number_of_batches):
    N=1000
    g=Game(N)
    g.display=False
    result=g.run(skittles_agent,random_agent)
    wins.append(sum([r==1 for r in result]))
    losses.append(sum([r==2 for r in result]))
    ties.append(sum([r==0 for r in result]))
    total_number_of_games+=N
    print("total games ",total_number_of_games,"W L T",wins[-1],losses[-1],ties[-1])

total games  1000 W L T 580 282 138
total games  2000 W L T 599 270 131
total games  3000 W L T 616 259 125
total games  4000 W L T 634 254 112
total games  5000 W L T 603 279 118
total games  6000 W L T 624 243 133
total games  7000 W L T 640 242 118
total games  8000 W L T 650 229 121
total games  9000 W L T 683 196 121
total games  10000 W L T 672 216 112
total games  11000 W L T 682 196 122
total games  12000 W L T 705 186 109
total games  13000 W L T 684 193 123
total games  14000 W L T 698 175 127
total games  15000 W L T 723 178 99
total games  16000 W L T 717 171 112
total games  17000 W L T 705 182 113
total games  18000 W L T 753 141 106
total games  19000 W L T 726 172 102
total games  20000 W L T 732 148 120
total games  21000 W L T 746 147 107
total games  22000 W L T 771 133 96
total games  23000 W L T 745 145 110
total games  24000 W L T 749 141 110
total games  25000 W L T 769 126 105
total games  26000 W L T 768 130 102
total games  27000 W L T 777 115 108
total games 

In [32]:
number_of_batches=100
wins=[]
losses=[]
ties=[]

for i in range(number_of_batches):
    N=1000
    g=Game(N)
    g.display=False
    result=g.run(skittles_agent,random_agent)
    wins.append(sum([r==1 for r in result]))
    losses.append(sum([r==2 for r in result]))
    ties.append(sum([r==0 for r in result]))
    total_number_of_games+=N
    print("total games ",total_number_of_games,"W L T",wins[-1],losses[-1],ties[-1])

total games  101000 W L T 898 5 97
total games  102000 W L T 894 4 102
total games  103000 W L T 905 6 89
total games  104000 W L T 882 11 107
total games  105000 W L T 890 10 100
total games  106000 W L T 898 3 99
total games  107000 W L T 890 6 104
total games  108000 W L T 891 9 100
total games  109000 W L T 884 3 113
total games  110000 W L T 896 6 98
total games  111000 W L T 877 8 115
total games  112000 W L T 880 4 116
total games  113000 W L T 880 10 110
total games  114000 W L T 877 8 115
total games  115000 W L T 907 6 87
total games  116000 W L T 908 8 84
total games  117000 W L T 896 4 100
total games  118000 W L T 899 3 98
total games  119000 W L T 896 5 99
total games  120000 W L T 882 7 111
total games  121000 W L T 907 2 91
total games  122000 W L T 895 4 101
total games  123000 W L T 909 5 86
total games  124000 W L T 885 2 113
total games  125000 W L T 906 8 86
total games  126000 W L T 888 6 106
total games  127000 W L T 884 4 112
total games  128000 W L T 883 7 110
