## mini checkers

In [None]:
from mplturtle import *
from random import randint, choice, random
from Game import *

# Checkers

In [4]:
def initial_state():
    state = Board(6, 6)
    for i in [1, 3, 5, 6, 8, 10]:
        state[i] = 1
    for i in [34, 32, 30, 29, 27, 25]:
        state[i] = 2
    state.pieces = '.', 'o', '●', '◯', '⚫'
    return state

In [5]:
def show_state(state, player):
    print(state)

In [6]:
def P1diagonals(start):
    if start in [1, 3, 8, 10, 13, 15, 20, 22, 25, 27]:
        return (start+5, start+7)
    if start in [5, 17, 29]:
        return (start+5,)
    if start in [6, 18]:
        return (start+7,)

In [7]:
def P2diagonals(start):
    if start in [34, 32, 27, 25, 22, 20, 15, 13, 10, 8, 3, 1]:
        return (start-7, start-5)
    if start in [17, 29]:
        return (start-7,)
    if start in [6, 18, 30]:
        return (start-5,)

In [8]:
def Kdiagonals(start):
    if start in [8, 10, 13, 15, 20, 22, 25, 27]:
        return (start-7, start-5, start+5, start+7)
    if start in [32, 34]:
        return (start-7, start-5)
    if start in [1, 3]:
        return (start+5, start+7)
    if start in [17, 29]:
        return (start-7, start+5)
    if start in [6, 18]:
        return (start-5, start+7)
    if start==5:
        return (start+5,)
    if start==30:
        return (start-5,)

In [9]:
def jump(state, start, player):
    jumps = []
    board_locations=Board(6,6)
    board_locations.board=list(range(36))

    for a,b,c in board_locations.diags(3):
        if player==1 and start==a and state[start]==1 and (state[b]==2 or state[b]==4) and state[c]==0:
            jumps.append((a,c))
        if player==1 and start==c and state[start]==1 and (state[b]==2 or state[b]==4) and state[c]==0:
            jumps.append((c,a))
        if player==2 and start==c and state[start]==2 and (state[b]==1 or state[b]==3) and state[a]==0:
            jumps.append((c,a))
        if player==2 and start==a and state[start]==2 and (state[b]==1 or state[b]==3) and state[c]==0:
            jumps.append((a,c))

    return jumps



In [10]:
def valid_moves(state, player): 
    moves=[]
    
    for start in range(36):
        if player==1 and state[start]==1:
            end_locations=P1diagonals(start)
            for end in end_locations:
                if state[end]==0:
                    moves.append((start, end))
                elif state[end]==2 or state[end]==4:
                    moves.extend(jump(state, start, player))
          
        elif player==2 and state[start]==2:
            end_locations=P2diagonals(start)
            for end in end_locations:
                if state[end]==0:
                    moves.append((start, end))
                elif state[end]==1 or state[end]==3:
                    moves.extend(jump(state, start, player))
                    
        elif player==1 and state[start]==3:
            end_locations=Kdiagonals(start)
            for end in end_locations:
                if state[end]==0:
                    moves.append((start, end))
                elif state[end]==2 or state[end]==4:
                    moves.extend(jump(state, start, player))
                
                    
        elif player==2 and state[start]==4:
            end_locations=Kdiagonals(start)
            for end in end_locations:
                if state[end]==0:
                    moves.append((start, end))
                elif state[end]==1 or state[end]==3:
                    moves.extend(jump(state, start, player))

        else:
            pass

    moves=[list(_) for _ in moves]
            
    return moves

In [11]:
def update_state(state, player, move):
    start,end=move
    new_state = state

        
    #king function
    if state[start]==1 and end in [30, 32, 34]:
        new_state[end]=3
    elif state[start]==2 and end in [1, 3, 5]:
        new_state[end]=4
            
    else:
        new_state[end]=state[start]
            
    new_state[start]=0
    if abs(end-start)>7:
        new_state[int((end + start)/2)]=0
    else:
        pass

    state = new_state
    return state

In [12]:
def win_status(state, player):
    if player==1 and not valid_moves(state,2):
        return('win')
    if player==2 and not valid_moves(state,1):
        return('win')


# Agents

In [14]:
def human_move(state,player):
    moves=valid_moves(state,player)
    print('Valid moves are: ',moves)
    move=None
    while move not in moves:
        move_input=input('enter your move as start,end: ')
        start,end=move_input.split(',')
        move=(int(start),int(end))
    
    return move

In [15]:
def random_move(state,player):    
    return random.choice(valid_moves(state,player))


In [16]:
random_agent=Agent(random_move)
human_agent=Agent(human_move)
   # state the percentage of wins, ties, etc...

## Minimax

In [18]:
from Game.minimax import *

In [19]:
def minimax_move(state, player):
    values,actions = minimax_values(state, player, display=False, maxdepth=4) 
#max depth easily gets much too high; 8 seems to be the most acceptable time to wait, but 
#the computer performs terribly
    return top_choice(actions, values)
minimax_agent = Agent(minimax_move)

## Skittles

In [21]:
def skittles_move(state, player, info):
    T=info.T
    learning=info.learning
    last_state=info.last_state
    last_action=info.last_action
    move_count=info.move_count

    #initialzing state:
    if state not in T:
        actions=valid_moves(state,player)
        T[state]=Table()
        for action in actions:
            T[state][action]=2
    
    move=weighted_choice(T[state])

    if move is None:
        move=random_move(state,player)

        if learning:
            if last_state:
                T[last_state][last_action]-=1
                if T[last_state][last_action]==0:
                    T[last_state][last_action]==0
                
    return move

def skittles_after(status,player,info):
    T=info.T
    learning=info.learning
    last_state=info.last_state
    last_action=info.last_action

    if learning:
        if status=='lose':
            T[last_state][last_action]-=2
            if T[last_state][last_action]==0:
                T[last_state][last_action]=0
                
        if status=='win':
            T[last_state][last_action]+=1
            if T[last_state][last_action]>20:
                T[last_state][last_action]=20

        if status=='stalemate':
            T[last_state][last_action]-=1
            if T[last_state][last_action]==0:
                T[last_state][last_action]=0

In [22]:
skittles_agent = Agent(skittles_move)
skittles_agent.T=Table()
skittles_agent.post=skittles_after
skittles_agent.learning=True

In [23]:
epoch_number=50
N_test=300
N_train=50

In [24]:
from tqdm import tqdm

In [25]:
agent1 = Agent(skittles_move)
agent1.T=Table()
agent1.post=skittles_after
agent1.learning=True

agent2 = Agent(skittles_move)
agent2.T=Table()
agent2.post=skittles_after
agent2.learning=True

In [26]:
iteration_count=0
percentage_won_player1=[]
percentage_won_player2=[]
number_of_iterations=[]

In [27]:
from matplotlib import pyplot as plt

In [None]:
plt.close('all')
fig,ax=plt.subplots(figsize=(6,4))

for i in tqdm(range(epoch_number)):
    try:
        #train
        agent1.learning=True
        agent2.learning=True
    
        g=Game(number_of_games=N_train)
        g.display=False
        g.max_move_count=50
        result=g.run(agent1,agent2)
    
        #test
        agent1.learning=False
        agent2.learning=False
    
        g=Game(number_of_games=N_test)
        g.display=False
        result=g.run(agent1,agent2)

        iteration_count+=N_train
        
        percentage_won_player1.append(result.count(1)/N_test*100)
        percentage_won_player2.append(result.count(2)/N_test*100)
        number_of_iterations.append(iteration_count)

    except KeyboardInterrupt:
        break

 88%|██████████████████████████████████████████████████████████████████▉         | 44/50 [21:44:54<1:00:44, 607.42s/it]

In [None]:
plt.plot(number_of_iterations,percentage_won_player1, '-o',label='Player 1')
plt.plot(number_of_iterations,percentage_won_player2, '-x',label='Player 2')
plt.legend()

In [None]:
result

In [None]:
SaveTable(agent1.T, 'CheckersSkittles.json')

In [None]:
g=Game(number_of_games=10)
g.display=False
result=g.run(minimax_agent, skittles_agent)
print(result)

discuss skittles performance, including successes, failures, limitations
    it takes the current code 100 hours to run so I cannot tell you much besides that it does not seem to converge (so far). I doubt that it will converge at 300,000.

how long it takes for minimax to function for many different search (depth and breadth) sizes:
    how long does it for the last 2 moves, 3 moves, etc...
    2-10 seconds per move on max depth 8, less time for the last moves, but 15 mins per game because of the wandering around
    
    how long does it take for different size boards
    the size greatly affects the game time because the wandering is limited on smaller boards
    
be able to predict how long it would take for a board or game at least twice as large as you can practically run. show your calculations!
    if it is twice as large (12x12), I'd square the time, so an estimated 225 minutes (if there is still no move limit or incentive to win quickly)

Is Skittles the Same as Menace? What are the similarities and differences?
    sort of the same thing, because they both have "skittles" to count values, but skittles is currently playing against itself instead of a human player and it takes A LOT longer than Menace. It also is an integer and not a physical skittle, like Menace would have.

    

## Heuristic

In [None]:
%%time
g=Game(number_of_games=10)
g.display=False
g.check_repeated_states=True
result=g.run(minimax_agent, minimax_agent)
print(result)

In [None]:
def material(state,player):
    your_pcs=0
    your_kings=0
    opp_pcs=0
    opp_kings=0
    i=0
    for i in range(36):
        if player==1:
            if state[i]==1:
                your_pcs+=1
            elif state[i]==2:
                opp_pcs+=1
            elif state[i]==3:
                your_kings+=1
            elif state[i]==4:
                opp_kings+=1
        
        elif player==2:
            if state[i]==2:
                your_pcs+=1
            elif state[i]==1:
                opp_pcs+=1
            elif state[i]==4:
                your_kings+=1
            elif state[i]==3:
                opp_kings+=1
    
        else:
            pass


    denom = 0.5*your_pcs + 0.5*opp_pcs + your_kings + opp_kings
    if denom==0:
        value=0.0

    else:
        value=((0.5*your_pcs + your_kings) - (your_kings + opp_kings)) /denom

    return value
        

In [None]:
def mobility(state,player):
    if player==1:
        opponent=2
    else:
        opponent=1
    your_moves=len(valid_moves(state,player))
    opp_moves=len(valid_moves(state,opponent))

    denom= your_moves+opp_moves
    if denom==0:
        value=0.0

    else:
        value=(your_moves-opp_moves)/denom

    return value
    

In [None]:
def heuristic (state,player):
    mat = material(state,player)
    mob = mobility(state,player)

    score = 0.8*mat + 0.2*mob
    if score > 0.99:
        score=0.99
    elif score < -0.99:
        score=-0.99

    return score

## Q

In [None]:
Q=0
t=0

α=0.1

_S=Storage()
_S+= t,Q
for i in range(1000):
    r=rand()
    if r<0.9:
        reward=1

    else:
        reward=0
        
    Q+=α*(reward-Q)
    t+=1
    _S+=t,Q

t,Q=_S.arrays()
plot(t,Q)

In [None]:
def Q_move(state, player, info):
    Q=info.Q
    learning=info.learning
    last_state=info.last_state
    last_action=info.last_action
    move_count=info.move_count
    alpha=info.alpha #learning rate
    gamma=info.gamma #discount
    epsilon=info.epsilon #random

    if state not in Q: #initialize
        actions=valid_moves(state,player)
        Q[state]=Table()
        for action in actions:
            Q[state][action]=0.0

    if learning and random.random()<epsilon:
        move=random_move(state,player)

    else:
        move=top_choice(Q[state])

    if not last_action is None:
        reward=0
        move=random_move(state,player)

        if learning:
            if last_state is not None:
                
                Q[last_state][last_action] += alpha *(reward + 
                                                      gamma*max([Q[state][a] for a in Q[state]]) -
                                                      Q[last_state][last_action])
                
                                                                   
    return move        

In [None]:
def Q_after(status,player,info):
    Q=info.Q
    learning=info.learning
    last_state=info.last_state
    last_action=info.last_action
    move_count=info.move_count
    alpha=info.alpha #learning rate
    gamma=info.gamma #discount
    epsilon=info.epsilon #random

    if status=='lose':
        reward=-1
    elif status=='win':
        reward=1
    else:
        reward=0

    if learning:
        Q[last_state][last_action]+=alpha*(reward - Q[last_state][last_action])

In [None]:
Q1_agent= Agent(Q_move)
Q1_agent.post=Q_after
Q1_agent.Q=Table()
Q1_agent.learning=True

Q1_agent.alpha=0.3
Q1_agent.gamma=0.9
Q1_agent.epsilon=0.1

In [None]:
Q2_agent= Agent(Q_move)
Q2_agent.post=Q_after
Q2_agent.Q=Table()
Q2_agent.learning=True

Q2_agent.alpha=0.3
Q2_agent.gamma=0.9
Q2_agent.epsilon=0.1

In [None]:
agent1=Q1_agent
agent2=Q2_agent

In [None]:
iteration_count=0
percentage_won_player1=[]
percentage_won_player2=[]
number_of_iterations=[]

In [None]:
epoch_number=100
N_test=500
N_train=300

In [None]:
plt.close('all')
fig,ax=plt.subplots(figsize=(6,4))

for i in tqdm(range(epoch_number)):
    try:
        #train
        agent1.learning=True
        agent2.learning=True
    
        g=Game(number_of_games=N_train)
        g.display=False
        g.max_move_count=50
        result=g.run(agent1,agent2)
    
        #test
        agent1.learning=False
        agent2.learning=False
    
        g=Game(number_of_games=N_test)
        g.display=False
        result=g.run(agent1,agent2)

        iteration_count+=N_train
        
        percentage_won_player1.append(result.count(1)/N_test*100)
        percentage_won_player2.append(result.count(2)/N_test*100)
        number_of_iterations.append(iteration_count)

    except KeyboardInterrupt:
        break

In [None]:
SaveTable(Q1_agent.Q, 'CheckersQ.json')

Q performance, including successes, failures, limitations