In [1]:
from Game import *

Version:  0.2.21


## Rules of the Game

In [2]:
def initial_state():
    board=Board(3,3)
    board.pieces=['.','X','O']
    return board

def show_state(state):
    print(state)
    
def valid_moves(state,player):  # returns a list of all of the possible moves given a state
    moves=[]
    
    for i in range(9):
        if state[i]==0:
            moves.append(i)
    
    return moves
    
def update_state(state,player,move):
    
    new_state=state
    state[move]=player
    
    return new_state

def win_status(state,player):
    # "win" if the player wins
    # "lose" if the player loses
    # "stalemate" if a tie
    # None if the game continues
    
    # 0 1 2
    # 3 4 5
    # 6 7 8
    
    if state[0]==player and state[1]==player and state[2]==player:
        return "win"
    if state[3]==player and state[4]==player and state[5]==player:
        return "win"
    if state[6]==player and state[7]==player and state[8]==player:
        return "win"
    if state[0]==player and state[3]==player and state[6]==player:
        return "win"
    if state[1]==player and state[4]==player and state[7]==player:
        return "win"
    if state[2]==player and state[5]==player and state[8]==player:
        return "win"
    if state[0]==player and state[4]==player and state[8]==player:
        return "win"
    if state[6]==player and state[4]==player and state[2]==player:
        return "win"
    
    if player==1:
        other_player=2
    else:
        other_player=1
        
        
    if not valid_moves(state,other_player):
        return "stalemate"
    
    
    return None
    

## Agents

In [3]:
def human_move(state,player):
    print("""
     0 1 2
     3 4 5
     6 7 8
    """)
    
    move=int(input("What move?"))
    
    return move

human_agent=Agent(human_move)

In [4]:
def random_move(state,player):
    possible_moves=valid_moves(state,player)
    move=random.choice(possible_moves)
    return move


random_agent=Agent(random_move)

In [12]:
from Game.minimax import *
def minimax_move(state,player):

    values,moves=minimax_values(state,player,display=False)
    return top_choice(moves,values)


minimax_agent=Agent(minimax_move)

In [6]:
def skittles_move(state,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state
    
    
    # if Ive never seen this state before
    if not state in S:
        actions=valid_moves(state,player)

        S[state]=Table()
        for action in actions:
            S[state][action]=3     
    
    move=weighted_choice(S[state])  # weighted across actions
    
    # what if there are no skittles for a particular state?
    # move is None in that case
    
    if move is None:
        # learn a little bit
        if last_state:
            S[last_state][last_action]=S[last_state][last_action]-1
            if S[last_state][last_action]<0:
                S[last_state][last_action]=0
        
        move=random_move(state,player)
    
    return move

def skittles_after(status,player,info):
    S=info.S
    last_action=info.last_action
    last_state=info.last_state

    if status=='lose':
        # learn a little bit
        S[last_state][last_action]=S[last_state][last_action]-1
        if S[last_state][last_action]<0:
            S[last_state][last_action]=0
        
    


skittles_agent=Agent(skittles_move)
skittles_agent.S=Table()
skittles_agent.post=skittles_after


skittles_agent2=Agent(skittles_move)
skittles_agent2.S=Table()
skittles_agent2.post=skittles_after

In [7]:
def Q_move(state,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state
    
    α=info.α
    γ=info.γ
    ϵ=info.ϵ
    

    # if Ive never seen this state before
    if not state in Q:
        actions=valid_moves(state,player)

        Q[state]=Table()
        for action in actions:
            Q[state][action]=0     
    
    # deal with random vs top choice here
    if random.random()<ϵ:
        move=random_move(state,player)  
    else:
        move=top_choice(Q[state]) 
    
    # what if there are no skittles for a particular state?
    # move is None in that case
    
    if not last_action is None:  # not the first move
        # learn a little bit
        # change equation here
        reward=0
        
        # Bellman equation
        Q[last_state][last_action] += α*(reward+
                         γ*max([Q[state][a] for a in Q[state]])  - 
                                Q[last_state][last_action])
    
        
    
    return move

def Q_after(status,player,info):
    Q=info.Q
    last_action=info.last_action
    last_state=info.last_state

    α=info.α
    γ=info.γ
    ϵ=info.ϵ
    
    if status=='lose':
        reward=-1
    elif status=='win':
        reward=1
    elif status=='stalemate':
        reward=0.5
    else:
        reward=0
        
    # learn a little bit
    Q[last_state][last_action] += α*(reward-Q[last_state][last_action])
        


In [8]:
Q1_agent=Agent(Q_move)
Q1_agent.Q=LoadTable('Q1_TTT_data.json')
Q1_agent.post=Q_after

Q1_agent.α=0.3  # learning rate
Q1_agent.γ=0.9  # memory constant, discount factor
Q1_agent.ϵ=0.1  # probability of a random move during learning

Q2_agent=Agent(Q_move)
Q2_agent.Q=LoadTable('Q2_TTT_data.json')
Q2_agent.post=Q_after

Q2_agent.α=0.3  # learning rate
Q2_agent.γ=0.9  # memory constant, discount factor
Q2_agent.ϵ=0.1  # probability of a random move during learning

In [9]:
total_number_of_games=0
for epoch in range(100):
    
    number_training_games=10
    number_of_testing_games=10
    
    #=================
    # traning cycle
    Q1_agent.α=0.3  # learning rate
    Q1_agent.ϵ=0.1  # probability of a random move during learning
    Q2_agent.α=0.3  # learning rate
    Q2_agent.ϵ=0.1  # probability of a random move during learning
    
    g=Game(number_training_games)
    g.display=False
    g.run(Q1_agent,Q2_agent)

    #=================
    # testing cycle
    Q1_agent.α=0.0  # learning rate
    Q1_agent.ϵ=0.0  # probability of a random move during learning
    Q2_agent.α=0.0  # learning rate
    Q2_agent.ϵ=0.0  # probability of a random move during learning
    
    
    g=Game(number_of_testing_games)
    g.display=False
    result=g.run(Q1_agent,Q2_agent)
    
    total_number_of_games+=number_training_games
    win_percentage=sum([r==1 for r in result])/number_training_games*100
    loss_percentage=sum([r==2 for r in result])/number_training_games*100
    tie_percentage=sum([r==0 for r in result])/number_training_games*100

    print(total_number_of_games,":",win_percentage," ",end="")
    
    SaveTable(Q1_agent.Q,'Q1_TTT_data.json')
    SaveTable(Q2_agent.Q,'Q2_TTT_data.json')    
    

10 : 60.0  20 : 60.0  30 : 70.0  40 : 70.0  50 : 70.0  60 : 50.0  70 : 40.0  80 : 60.0  90 : 50.0  100 : 60.0  110 : 50.0  120 : 70.0  130 : 40.0  140 : 60.0  150 : 60.0  160 : 90.0  170 : 60.0  180 : 20.0  190 : 60.0  200 : 60.0  210 : 50.0  220 : 40.0  230 : 30.0  240 : 20.0  250 : 20.0  260 : 0.0  270 : 0.0  280 : 0.0  290 : 50.0  300 : 0.0  310 : 0.0  320 : 0.0  330 : 0.0  340 : 0.0  350 : 0.0  360 : 0.0  370 : 0.0  380 : 0.0  390 : 0.0  400 : 0.0  410 : 0.0  420 : 0.0  430 : 0.0  440 : 0.0  450 : 0.0  460 : 0.0  470 : 0.0  480 : 0.0  490 : 0.0  500 : 0.0  510 : 0.0  520 : 0.0  530 : 0.0  540 : 0.0  550 : 0.0  560 : 0.0  570 : 0.0  580 : 0.0  590 : 0.0  600 : 0.0  610 : 0.0  620 : 0.0  630 : 0.0  640 : 0.0  650 : 0.0  660 : 0.0  670 : 0.0  680 : 0.0  690 : 0.0  700 : 0.0  710 : 0.0  720 : 0.0  730 : 0.0  740 : 0.0  750 : 0.0  760 : 0.0  770 : 0.0  780 : 0.0  790 : 0.0  800 : 0.0  810 : 0.0  820 : 0.0  830 : 0.0  840 : 0.0  850 : 0.0  860 : 0.0  870 : 0.0  880 : 0.0  890 : 0.0  900 

In [10]:
g=Game(number_of_testing_games)
g.display=False
result=g.run(minimax_agent,Q2_agent)

  Choice Time: 2.572774887084961 seconds 
  Choice Time: 0.16410183906555176 seconds 
  Choice Time: 0.006684303283691406 seconds 
  Choice Time: 0.0007719993591308594 seconds 
  Choice Time: 7.081031799316406e-05 seconds 
  Choice Time: 2.532865047454834 seconds 
  Choice Time: 0.14731168746948242 seconds 
  Choice Time: 0.010020017623901367 seconds 
  Choice Time: 2.549088954925537 seconds 
  Choice Time: 0.17021489143371582 seconds 
  Choice Time: 0.007784128189086914 seconds 
  Choice Time: 0.0006473064422607422 seconds 
  Choice Time: 2.530078411102295 seconds 
  Choice Time: 0.1370072364807129 seconds 
  Choice Time: 0.0028641223907470703 seconds 
  Choice Time: 2.5515668392181396 seconds 
  Choice Time: 0.14461898803710938 seconds 
  Choice Time: 0.007224082946777344 seconds 
  Choice Time: 0.0006101131439208984 seconds 
  Choice Time: 2.5883829593658447 seconds 
  Choice Time: 0.1966710090637207 seconds 
  Choice Time: 0.00590205192565918 seconds 
  Choice Time: 2.5538289546966

In [11]:
g.report()

Total number of games:  10
Winning 80.00 percent
Losing 0.00 percent
Tie 20.00 percent


In [13]:
g=Game(number_of_testing_games)
g.display=False
result=g.run(Q1_agent,minimax_agent)

In [14]:
g.report()

Total number of games:  10
Winning 0.00 percent
Losing 0.00 percent
Tie 100.00 percent
