In [5]:
import random
import numpy as np
import pandas as pd
import time
import ast
from copy import deepcopy

The code is divided as it follows:

First section shows the training and final Q-states Reinforcement Learning player code.
Second section shows the code for both player with Alpha-Beta algorithms.
Third section shows the code for the player with Monte Carlo Tree Search.
Final section shows the code for the competition between the players that is explained in the final report.

As all players were developed by different programmers, the strategies followed were different. However, compatibility was guaranteed to allow the run of the competition.

# Q-states Reinforcement Learning player

## Training of RL player using a random player

In [None]:
#For training, a cards class was used. This function initiate a deck and randomly dealt two hands
def init_cards():
    #Call global variables of deck to play in a game and player and computer hands
    global deck, p_hand, c_hand
    
    #Create iteratively cards using Card class and appending it to the deck
    for suit_id in range(1, 5):
        for rank_id in range(1, 14):
            new_card = Card(suit_id, rank_id)
            if new_card.rank == 8:
                new_card.value = 50
            deck.append(new_card)
    
    # Deal 5 random cards for player and computer, eliminating those cards from the deck
    for card in range(5):
        p_card = random.choice(deck)
        deck.remove(p_card)
        p_hand.append(p_card)
    for card in range(5):
        c_card = random.choice(deck)
        deck.remove(c_card)
        c_hand.append(c_card)

In [None]:
# Class that allow to assign value, short name, and suit to a card with two numerical identifiers: suit and rank
class Card():
    def __init__ (self, suit_id, rank_id):
        self.suit_id = suit_id
        self.rank_id = rank_id
        #Rank assignation from id
        if self.rank_id == 1:
            self.rank = "Ace"
            self.value = 1
        elif self.rank_id == 11:
            self.rank = "Jack"
            self.value = 10
        elif self.rank_id == 12:
            self.rank = "Queen"
            self.value = 10
        elif self.rank_id == 13:
            self.rank = "King"
            self.value = 10
        elif 2 <= self.rank_id <= 10:
            self.rank = str(self.rank_id)
            self.value = self.rank_id
        else:
            self.rank = "RankError"
            self.value = -1

        #Suit assignation from id    
        if self.suit_id == 1:
            self.suit = "Diamonds"
        elif self.suit_id == 2:
            self.suit = "Hearts"
        elif self.suit_id == 3:
            self.suit = "Spades"
        elif self.suit_id == 4:
            self.suit = "Clubs"
        else:
            self.suit = "SuitError"
        
        #Short and long name definitions
        self.short_name = self.rank[0] + self.suit[0]
        if self.rank == '10':
            self.short_name = self.rank + self.suit[0]
        self.long_name = self.rank + " of " + self.suit

    #When printing, return long name of the card    
    def __str__(self):
        return  self.long_name

In [None]:
# Function that identify all possible states of the game
def states_t():
    # Different values that features can have in our environment, according to rules and cap defined
    suits = ["H", "D", "C", "S"]
    cards_hand = range(1,9)
    cards_opp = range(1,9)
    eights_hand = range(0,5)
    cards_deck = range(0,41)
    cards_same_rank = range(0,5)
    cards_same_suit = range(0,5)

    #Create a unique state for every possible combination of features
    states = []
    states_all = []
    for suit in suits:
        for card_h in cards_hand:
            for card_opp in cards_opp:
                for eights in eights_hand:
                    for card_deck in cards_deck:
                        for card_rank in cards_same_rank:
                            for card_suit in cards_same_suit:
                                states.append((suit, card_h, card_opp, eights, card_deck, card_rank, card_suit))

    #Discard unfeasible states, returning all possible feasible combinations of features
    for state in states:
        if  (state[1] >= state[3]) and (state[1] >= state[5]) and (state[1] >= state[6]):
            states_all.append(state)  
    return states_all
    
#Function that return all possible actions in a state
def actions_t():
    actions_all = ["H", "D", "C", "S", "Eight"]
    return actions_all

#Initiate Rewards for our player, assigning 1 to actions that lead to a win and 0 to the rest
def rewards_t(total_states, total_actions):
    
    #Start with a matrix with value zero for all state-action pair
    R = np.zeros((len(total_states), len(total_actions)))
    
    #Assign value of 1 for all actions that obtain wins
    for i in range(len(total_states)):
        if total_states[i][1] == 1:
            for c in range(len(total_actions)):
                if (total_states[i][0] == total_actions[c]) or (total_actions[c] == "Eight"): 
                    R[i][c] = 1
            if total_states[i][3] == 1 or total_states[i][5] == 1 or total_states[i][6] == 1:
                R[i] = 1
    
    #Transform the matrix to a dataframe and return
    R = pd.DataFrame(
        data=R, 
        columns=total_actions, 
        index=total_states) 
    return R

#Find all possible actions a player can do in a play
def possible_actions_t(p_hand, active_suit, up_card):
    
    #look up to all cards in the hand and see if they are playable by the rank, the suit, or being an 8
    options = []
    for card in p_hand:
        if (card.rank == "8") and ("Eight" not in options):
            options.append("Eight")
            
        elif (card.rank == up_card.rank) and (card.suit not in options):
            options.append(card.suit[0])
        
        elif (card.suit == active_suit) and (card.suit not in options):
            options.append(card.suit[0])
    
    #Return all different actions that the player can do
    return options

#define a state by perceiving the environment
def identify_state_t(p_hand, deck, active_suit, up_card, card_opp):
    
    #check features of number of cards, and following the cap
    card_opp = min(card_opp, 8)
    cards_hand = min(len(p_hand), 8)
    card_deck = min(len(deck), 40)
    
    #look up cards in the hand, card on top and suit being played to obtain the rest of the features
    eights = 0
    card_rank = 0
    card_suit = 0
    for card in p_hand:
        if card.rank == "8":
            eights += 1
            continue
        if card.rank == up_card.rank:
            card_rank += 1
        if card.suit == active_suit and card_suit < 4:
            card_suit += 1
            
    #return a tuple with the state 
    state = (active_suit[0], cards_hand, card_opp, eights, card_deck, card_rank, card_suit)
    return state

#Greedy-epsilon move
def step_t(possible_actions, state, epsilon, q):
    
    #If random number lower than our treshold, move randomly between possible actions
    if random.random() < epsilon:
        final_action = random.choice(possible_actions)
    #In other case, choose action with a higher q value. 
    else:
        #Actions are shuffled to ensure that, when q values are equal, increase exploration opportunities
        random.shuffle(possible_actions)
        max_value = 0     
        for action in possible_actions:
            value = q.loc[[state],action][0]
            if value >= max_value:
                max_value = value
                final_action = action
    
    return final_action         

#Update Bellman Equation after a move
def update_t(state, action, previous_state, previous_action, q, rewards, step_size, visit):
    
    #Follow Bellman equation when a previous state exists    
    if previous_state != 0:
        prev_q = q.loc[[previous_state], previous_action][0]
        this_q = q.loc[[state], action][0]
        reward = rewards.loc[[state], action][0]
        
        #Bellman equation update
        #When rewards are zero, it can be ignored, per definition, when they are 1 this_q = 0, so also ignored 
        if reward == 0:
            q.loc[[previous_state], previous_action] = prev_q + step_size * (this_q - prev_q) 
        else:
            q.loc[[previous_state], previous_action] = prev_q + step_size * (reward - prev_q)

        #Update number of visits to each state
        visit.loc[[previous_state], previous_action] += 1
            
    #Save and return action/state for next move
    previous_state  = state
    previous_action = action
    return q, visit, previous_state, previous_action
    

#Choosing active suit when player plays an eight
def choose_color_t(p_hand):
    
    #Count total number of cards in each suit
    suit_totals = [0, 0, 0, 0]
    for suit in range(1, 5):
        for card in p_hand:
            if card.suit_id == suit:
                suit_totals[suit - 1] += 1
    
    #Determine suit most repeated in the hand
    long_suit = 0
    for i in range(4):
        if suit_totals[i] > long_suit:
            long_suit = i
            
    #Return suit most repeated
    if long_suit == 0:
        active_suit = "Diamonds"
    elif long_suit == 1:
        active_suit = "Hearts"
    elif long_suit == 2:
        active_suit = "Spades"
    elif long_suit == 3:
        active_suit = "Clubs"        
    return active_suit

In [None]:
# Computer player that plays a strategy of allways play bigger card. Adaptation from Warren Sande and Carter Sande's
# Hello world!: computer programming for kids and other beginners (2019)
def computer_turn_t():
    #Reading the environment
    global c_hand, up_card, active_suit, deck, blocked
    
    #Analise possible moves
    options = []
    for card in c_hand:
        if card.rank == '8':
            #If the player has an 8, play inmediately
            c_hand.remove(card)
            up_card = card
            
            #choose suits. First count total number of cards in each suit
            suit_totals = [0, 0, 0, 0]
            for suit in range(1, 5):
                for card in c_hand:
                    if card.suit_id == suit:
                        suit_totals[suit - 1] += 1
                        
            #Determine suit most repeated in the hand
            long_suit = 0
            for i in range(4):
                if suit_totals[i] > long_suit:
                    long_suit = i
            
            #Return suit most repeated
            if long_suit == 0:
                active_suit = "Diamonds"
            elif long_suit == 1:
                active_suit = "Hearts"
            elif long_suit == 2:
                active_suit = "Spades"
            elif long_suit == 3:
                acitve_suit = "Clubs"
            return
        else:
            #If not an eight, append a list with all cards that can be played
            if card.suit == active_suit:
                options.append(card)
            elif card.rank == up_card.rank:
                options.append(card)
                
    #When there are options, play card with higher value
    if len(options) > 0:
        best_play = options[0]
        for card in options:
            if card.value > best_play.value:
                best_play = card

        c_hand.remove(best_play)
        up_card = best_play
        active_suit = up_card.suit
    
    #If there are no options, try to draw from deck
    else:
        if len(deck) > 0:
            next_card = random.choice(deck)
            c_hand.append(next_card)
            deck.remove(next_card)
        else:
            #When deck is empty, player cannot play and is blocked
            blocked += 1

In [None]:
# Player with RL move for training, both decide a move and update the state of the game
def Reinforcement_player(p_hand, deck, active_suit, up_card, blocked,n_opponent, q, previous_state, previous_action, rewards, visit):
    #Parameters for epsilon-greedy search and Bellman Equation update
    epsilon = 0.4
    step_size = 0.2
        
    #Identify the state of the game and possible actions to play
    state = identify_state_t(p_hand, deck, active_suit, up_card, n_opponent)
    actions = possible_actions_t(p_hand, active_suit, up_card)
    
    #If there are no possible action, either draw or be blocked, and return the updated state
    if len(actions) == 0:
        if len(deck) > 0:
            new_card = random.choice(deck)
            p_hand.append(new_card)
            deck.remove(new_card)
        else:
            blocked += 1
        
        return p_hand, deck, active_suit, up_card, blocked, q, previous_state, previous_action, visit      
    
    #Select move through epsilon-greedy strategy
    move = step_t(actions, state, epsilon, q)
    
    #Use move to update hand, card on top, and active suit
    if move == "Eight":
        for card in p_hand:
            if card.rank == '8':
                p_hand.remove(card)
                up_card = card
                active_suit = choose_color_t(p_hand)
                break
    elif move != active_suit[0]:
        for card in p_hand:
            if (card.suit[0] == move) and  (card.rank == up_card.rank):
                p_hand.remove(card)
                up_card = card
                active_suit = card.suit
                break   
    else:
        for card in p_hand:
            if card.suit[0] == move:
                p_hand.remove(card)
                up_card = card
                active_suit = card.suit
                break
    
    #update q-values
    q, visit, previous_state, previous_action = update_t(state, move, previous_state, previous_action, q, rewards, step_size, visit)
    
    # return new state for rival player and updated q values
    return p_hand, deck, active_suit, up_card, blocked, q, previous_state, previous_action, visit
    
    

In [None]:
#Train of RL player with 600000 simulations

#Initial considerations, including rewards and q values.
p_total = c_total = 0
game = 0
total_states = states_t()
total_actions = actions_t()
R = rewards_t(total_states, total_actions)
q = pd.DataFrame(
            data    = np.zeros((len(total_states), len(total_actions))), 
            columns = total_actions, 
            index   = total_states
        )
visit = q.copy()

#6000 games are played

start_time = time.time()
while game < 600000:
    #variable that change to True once a game is finished
    game_done = False
    
    #Cards initialization
    deck = []
    p_hand = []
    c_hand = []
    init_cards()
    
    #Game start without blocked players, no previous actions, and selecting a card at random
    blocked = 0
    up_card = random.choice(deck)
    active_suit = up_card.suit
    previous_state = 0
    previous_action = 0
    
    while not game_done:
        #RL player plays first
        p_hand, deck, active_suit, up_card, blocked, q, previous_state, previous_action, visit = Reinforcement_player(p_hand, deck, active_suit, up_card, blocked,len(c_hand), q, previous_state, previous_action, R, visit)
        
        #Check if game is done due to player winning
        if len(p_hand) == 0:
            game_done = True
            p_total += 1
        
        #If RL player didn't win, computer plays    
        if not game_done:
            computer_turn_t()
            
        #Check if game is done due to computer winning
        if len(c_hand) == 0:
            game_done = True
            c_total += 1
        
        #Check if game is done due to tie
        if blocked >= 2:
            game_done = True
        
        #Go to new game
        if game_done == True:
            game += 1
    
end_time = time.time()   

#Time analysis and number of games won
print(end_time - start_time)
print((end_time - start_time)/game)
print(p_total, c_total)

#Saving updated q states and visits for player to play in competition
q.to_excel("q_states.xlsx")
visit.to_excel("visits.xlsx")


## Trained player to be called in tournament

In [None]:
#Function that create a global variable to be used by the RL player, uploading the trained q-states
def initiate_rl():
    global q
    q = pd.read_excel("q_states.xlsx")
    q.iloc[:,0] = q.iloc[:,0].apply(ast.literal_eval)
    q.set_index(q.columns[0], inplace=True)


#Find all possible actions a player can do in a play
def possible_actions(p_hand, active_suit, up_card):
    
    #look up to all cards in the hand and see if they are playable by the rank, the suit, or being an 8
    options = []
    for card in p_hand:
        if (card[:-1] == "8") and ("Eight" not in options):
            options.append("Eight")
            
        elif (card[:-1] == up_card[:-1]) and (card[-1] not in options):
            options.append(card[-1])
        
        elif (card[-1] == active_suit) and (card[-1] not in options):
            options.append(card[-1])
    
    #return all different moves that player can play
    return options

#define a state by perceiving the environment adapted to the tournament environment
def identify_state(p_hand, deck, active_suit, up_card, card_opp):
    
    #check features of number of cards, and following the cap
    card_deck = min(len(deck), 40)
    card_opp = min(card_opp, 8)
    cards_hand = min(len(p_hand), 8)
    
    #look up cards in the hand, card on top and suit being played to obtain the rest of the features
    eights = 0
    card_rank = 0
    card_suit = 0
    for card in p_hand:
        if card[:-1] == "8":
            eights += 1
            continue
        if card[:-1] == up_card[:-1]:
            card_rank += 1
        if card[:-1] == active_suit and card_suit < 4:
            card_suit += 1
            
    #return a tuple with the state         
    state = (active_suit, cards_hand, card_opp, eights, card_deck, card_rank, card_suit)
    return state


#Greedy-epsilon move equal to training
def step(possible_actions, state, epsilon, q):
    
    #If random number lower than our treshold, move randomly between possible actions
    if random.random() < epsilon:
        final_action = random.choice(possible_actions)
    #In other case, choose action with a higher q value. 
    else:
        #Actions are shuffled to ensure that, when q values are equal, increase exploration opportunities
        random.shuffle(possible_actions)
        max_value = 0
        for action in possible_actions:
            value = q.loc[[state],action][0]
            if value >= max_value:
                max_value = value
                final_action = action
    
    return final_action  

#Choosing active suit when player plays an eight adapted to tournament environment
def choose_color(p_hand):
    
    #Count total number of cards in each suit
    suits =["D","H", "S", "C"]    
    suit_totals = [0, 0, 0, 0]
    for suit in range(0, 4):
        for card in p_hand:
            if card[-1] == suits[suit]:
                suit_totals[suit] += 1
                
    #Determine suit most repeated in the hand
    long_suit = 0
    for i in range(4):
        if suit_totals[i] > long_suit:
            long_suit = i
            
    #Return suit most repeated
    if long_suit == 0:
        active_suit = "D"
    elif long_suit == 1:
        active_suit = "H"
    elif long_suit == 2:
        active_suit = "S"
    elif long_suit == 3:
        active_suit = "C"
            
    return active_suit

#Movement from RL player. Different than from training, there is no update of state, and q states remain fixed
#The function returns the move an the active suit
def rl_move(p_hand, cards_opponent, up_card, deck, active_suit):
    #Parameters for the movement of the player under tournament conditions
    epsilon = 0.4
    global q
    n_opponent = len(cards_opponent)
    
    #Identify the state of the game and possible actions to play
    state = identify_state(p_hand, deck, active_suit, up_card, n_opponent)
    actions = possible_actions(p_hand, active_suit, up_card)
    
    #If there are no possible actions, return draw
    if len(actions) == 0:
        return ["DRAW", None]
    
    #Find move following epsilon-greedy strategy
    move = step(actions, state, epsilon, q)
    
    #return card and active suit that gives the respective move
    if move == "Eight":
        for card in p_hand:
            if card[:-1] == '8':
                active_suit = choose_color(p_hand)
                return [card, active_suit]
    elif move != active_suit:
        for card in p_hand:
            if (card[-1] == move) and  (card[:-1] == up_card[:-1]):
                active_suit = card[-1]
                return [card, active_suit]    
    else:
        for card in p_hand:
            if card[-1] == move:
                active_suit = card[-1]
                return [card, active_suit]



# ALPHA BETA PRUNING

### APLHA BETA PLAYER 1

In [None]:
#THIS PLAYER USES THE ALPHA BETA PRUNING TECHNIQUE TO GENERATE THE MOVE
#THIS FUNCTION USES THE UTILITY FUNCTION OF THE POINTS WON AT THE END OF THE FINAL MOVE

import random
import numpy as np
import pandas as pd

def legal_possible_moves(rank,suit,m): 
    #THIS FUNCTION LISTS ALL THE LEGAL MOVES BASED ON THE CARDS AVAILABLE TO THE PLAYER
    subset=[]
    for move in m: 
        #WE SPLIT THE LAST CHARACTER AS THE SUIT AND THE REST OF THE CHARACTERS AS THE RANK        
        if move[:-1]==rank: subset.append(move)
        elif move[-1]==suit: subset.append(move)
        elif move[:-1]=='8': subset.append(move)
    return subset

def total_points(hand,value):
    #THIS FUNCTIONS CALCULATES THE TOTAL POINTS BASED IN A PLAYERS HAND BASED ON THE RANK OF EACH CARD
    total = 0
    for card in hand:
        total = total + value[card[:-1]]
    return total

def updated_state(move,card_on_top,hand,deck,rank_cot,suit_cot): 
    #THIS FUNCTION UPDATES THE GAME STATE BASED ON THE MOVE PLAYED BY A PLAYER
    if move == 'DRAW':
        #IN CASE DRAW IS OPTED A CARD IS DRAWN FROM THE DECK
        move = random.choice(deck)
        deck.remove(move)
        hand.append(move)
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    elif move[:-1] == '8':
        #IF A CARD WITH RANK 8 IS PLAYED THIS UPDATES THE SUIT WITH THE HIGHEST FREQUENCY OF REMAINING CARDS
        card_on_top = move
        hand.remove(move)
        suit_cot = case_eight(hand)
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    else:
        #IN ANY OTHER CASE THE CARD IS PLACED ON TOP AND REMOVED FROM THE HAND
        card_on_top = move
        hand.remove(move)
        
        #hand.remove(move); 
        rank_cot = card_on_top[:-1]; suit_cot = card_on_top[-1]       
        return (card_on_top,hand,deck,rank_cot,suit_cot)
        

def case_eight(hand):
    #THIS FUNCTION HELPS TO UPDATE THE EVALUATE THE FREQUENCY OF SUITS OF THE REMAINING CARDS IN A PLAYER'S HAND
    suit_counts = {'D': 0, 'H': 0, 'S': 0, 'C': 0}
    for move in hand:
        if move[-1] in suit_counts:
            suit_counts[move[-1]] += 1
    max_suit = max(suit_counts, key=suit_counts.get)
    return max_suit        

    
def min_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot,alpha,beta):
    #THIS IS A FUNCTION THAT TRIES TO EVALUATE THE BEST POSSIBLE MOVE FOR THE OPPONENT BASED ON OUR PLAYER'S CARDS    
    value={'A':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':50,'9':9,'10':10,'J':10,'Q':10,'K':10}
    
    if len(p1_hand)==0: 
        #THIS CONDITIONAL STATEMENT RETURNS THE POINTS OBTAINED IF OUR PLAYER LOSES
        points = total_points(p2_hand,value)*-1
        return(points,None)
    if len(deck) == 0:
        return(0,None)
    
    #INITIALLY WE SET THE VALUE OF BETA TO BE INIFNITY 
    v = float('inf')
    
    moves = p2_hand.copy()
    moves = legal_possible_moves(rank_cot,suit_cot,moves)
    
    if len(moves)==0:
        if len(deck)!=0:
            moves.append("DRAW")
        else:
            return(0,None)       
        
    p1_hand_copy = p1_hand.copy(); p2_hand_copy = p2_hand.copy(); deck_copy = deck.copy()
    rank_cot_copy = rank_cot; suit_cot_copy = suit_cot; card_on_top_copy = card_on_top 
    
    for move in moves:
        p1_hand = p1_hand_copy ; p2_hand = p2_hand_copy; deck = deck_copy
        rank_cot = rank_cot_copy; suit_cot = suit_cot_copy; card_on_top = card_on_top_copy        
        
        card_on_top, p2_hand, deck, rank_cot, suit_cot = updated_state(move,card_on_top,p2_hand,deck,rank_cot,suit_cot)
        #WE TRY TO SIMULATE UNTIL THE END OF THE GAME PLAYING THE BEST MOVES FOR THE OTHER PLAYER
        v2,a2 = max_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot,alpha,beta)
        #THIS IS THE PART WHERE PRUNING TAKE PLACE, IF THE POSSIBILITY OF WINNING IS FOUND IT UPDATES THE BEST MOVE
        if v2 < v:
            v = v2
            best_move = move
        #THIS SECTION TERMINATES THE FUNCTION AND RETURNS THE BEST MOVE IF IT KNOWS IT LEADS TO A VICTORY
        if v<=alpha:
            return(v,best_move)
    
    return(v,best_move)
        
def max_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot,alpha,beta):
    
    value={'A':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':50,'9':9,'10':10,'J':10,'Q':10,'K':10}
    
    if len(p2_hand)==0:
        #THIS CONDITIONAL STATEMENT RETURNS THE POINTS OBTAINED IF OUR PLAYER LOSES
        points = total_points(p1_hand,value)
        return(points,None)
    if len(deck) == 0:
        return(0,None)
    
    #INITIALLY WE SET THE VALUE OF ALPHA TO BE INFINITY BEFORE EVERY ROUND
    v = float('-inf')
    
    moves = p1_hand.copy()
    moves = legal_possible_moves(rank_cot,suit_cot,moves)
        
    if len(moves)==0:
        if len(deck)!=0:
            moves.append("DRAW")
        else:
            return(0,None)
                 
    p1_hand_copy = p1_hand.copy(); p2_hand_copy = p2_hand.copy(); deck_copy = deck.copy()
    rank_cot_copy = rank_cot; suit_cot_copy = suit_cot; card_on_top_copy = card_on_top 
    
    for move in moves:
        p1_hand = p1_hand_copy ; p2_hand = p2_hand_copy; deck = deck_copy
        rank_cot = rank_cot_copy; suit_cot = suit_cot_copy; card_on_top = card_on_top_copy

        card_on_top, p1_hand, deck, rank_cot, suit_cot = updated_state(move,card_on_top,p1_hand,deck,rank_cot,suit_cot)
        #WE TRY TO SIMULATE UNTIL THE END OF THE GAME PLAYING THE BEST MOVES FOR THE OPPOSITE PLAYER        
        v2,a2 = min_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot,alpha,beta)
        #THIS IS THE PART WHERE PRUNING TAKE PLACE, IF THE POSSIBILITY OF WINNING IS FOUND IT UPDATES THE BEST MOVE
        if v2 > v:
            v = v2
            best_move = move
        #THIS SECTION TERMINATES THE FUNCTION AND RETURNS THE BEST MOVE IF IT KNOWS IT LEADS TO A VICTORY
        if v >= beta:
            return (v,best_move)

    return (v,best_move)
            
def p1_move(p1_hand,p2_hand,card_on_top,deck,suit):
    #THIS IS THE MAIN FUNCTION THAT ACCEPTS BOTH THE PLAYERS' CARDS, DECK AND SUIT OF THE CARD ON TOP
    rank_cot = card_on_top[:-1] 
    suit_cot = suit        
    moves = p1_hand.copy()
    moves = legal_possible_moves(rank_cot,suit_cot,moves) #CONTAINS ALL THE LEGAL MOVES AVAILABLE
    if len(moves)==0:
        if len(deck)!=0:
            moves.append("DRAW") #APPENDS DRAW IF THERE IS NO POSSIBLE MOVES
        else:
            return('DRAW',suit_cot)
    
    #SETTING THE VALUE ALPHA TO BE INFINITY INITIALLY
    best_score = float('inf')
    beta = float('-inf')
    best_move=''
    
    #WE SIMULATE UNTIL THE END PLAYING THE BEST MOVES FOR BOTH THE PLAYERS AND TRY TO PICK THE BEST MOVE AVAILABLE TO US
    value,best_move = max_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot,best_score,beta)
    
    if (best_move[:-1] == '8'):
        #IN CASE THE BEST MOVE CONTAINS RANK 8 WE ALSO RETURN THE SUIT WITH THE HIGHEST FREQUENCY IN ALL OF OUR PLAYER'S CARDS
        p1_hand_copy = p1_hand.copy()
        p1_hand_copy.remove(best_move)
        best_suit = case_eight(p1_hand_copy)
        return(best_move,best_suit)
    elif (best_move == 'DRAW'):
        return(best_move,suit_cot)
    
    else:
        return(best_move,best_move[-1])
#THE FUNCTION RETURNS THE BEST POSSIBLE MOVES AND THE SUIT THAT NEEDS TO BE FOLLOWED

### ALPHA BETA PLAYER 2

In [None]:
#THIS PLAYER USES THE ALPHA BETA PRUNING TECHNIQUE TO GENERATE THE MOVE
#THIS FUNCTION USES A SIMPLE UTILITY FUNCTION OF 1000 IF OUR PLAYER WINS, -1000 IF THE OTHER PLAYER WINS AND 0 DURING A TIE

import numpy as np
import pandas as pd
import random

def legal_possible_moves(rank,suit,m):
    #THIS FUNCTION LISTS ALL THE LEGAL MOVES BASED ON THE CARDS OF THE PLAYER
    subset=[]
    for move in m: 
        #WE SPLIT THE LAST CHARACTER AS THE SUIT AND THE REST OF THE CHARACTERS AS THE RANK        
        if move[:-1]==rank: subset.append(move)
        elif move[-1]==suit: subset.append(move)
        elif move[:-1]=='8': subset.append(move)
    return subset

def updated_state(move,card_on_top,hand,deck,rank_cot,suit_cot): 
    #THIS FUNCTION UPDATES THE GAME STATE BASED ON THE MOVE PLAYED BY A PLAYER
    if move == 'DRAW':
        #IN CASE DRAW IS OPTED A CARD IS DRAWN FROM THE DECK
        move = random.choice(deck)
        deck.remove(move)
        hand.append(move)
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    elif move[:-1] == '8':
        #IF A CARD WITH RANK 8 IS PLAYED THIS UPDATES THE SUIT WITH THE HIGHEST FREQUENCY OF REMAINING CARDS
        card_on_top = move
        hand.remove(move)
        suit_cot = case_eight(hand)
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    else:
        #IN ANY OTHER CASE THE CARD IS PLACED ON TOP AND REMOVED FROM THE HAND
        card_on_top = move
        hand.remove(move)
        
        rank_cot = card_on_top[:-1]; suit_cot = card_on_top[-1]       
        return (card_on_top,hand,deck,rank_cot,suit_cot)
        

def case_eight(hand):
    #THIS FUNCTION HELPS TO UPDATE THE EVALUATE THE FREQUENCY OF SUITS OF THE REMAINING CARDS IN A PLAYER'S HAND
    suit_counts = {'D': 0, 'H': 0, 'S': 0, 'C': 0}
    for move in hand:
        if move[-1] in suit_counts:
            suit_counts[move[-1]] += 1
    max_suit = max(suit_counts, key=suit_counts.get)
    return max_suit  

    
def min_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot):
    #THIS IS A FUNCTION THAT TRIES TO EVALUATE THE BEST POSSIBLE MOVE FOR THE OPPONENT BASED ON OUR PLAYER'S CARDS 
    moves = p2_hand.copy()
    moves = legal_possible_moves(rank_cot,suit_cot,moves)
        
    if len(moves)==0:
        if len(deck)!=0:  
            moves.append("DRAW") #APPENDS DRAW IF THERE IS NO POSSIBLE MOVES
        else:
            #print("Deck is empty")
            return(0)
        
    move_values={}  #THIS DICTIONARY WILL STORE THE VALUES OF ALL THE POSSIBLE MOVES         
    p1_hand_copy = p1_hand.copy(); p2_hand_copy = p2_hand.copy(); deck_copy = deck.copy()
    rank_cot_copy = rank_cot; suit_cot_copy = suit_cot; card_on_top_copy = card_on_top 
    
    for move in moves:
        #FOR EVERY POSSIBLE MOVE WE UPDATE THE STATE AND FIND THE BEST POSSIBLE MOVE FOR THE OPPOSITE PLAYER
        p1_hand = p1_hand_copy ; p2_hand = p2_hand_copy; deck = deck_copy
        rank_cot = rank_cot_copy; suit_cot = suit_cot_copy; card_on_top = card_on_top_copy                
        card_on_top, p2_hand, deck, rank_cot, suit_cot = updated_state(move,card_on_top,p2_hand,deck,rank_cot,suit_cot)                
        if len(p2_hand)==0:             
            return -1000
        #THE NEXT PART STORES THE VALUES OF THE MOVE BASED ON THE SERIES OF OUTCOME OF BEST POSSIBLE MOVES        
        move_values[move] = max_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot)        
        if move_values[move] == -1000:  # THIS PART PRUNES AND RETURNS THE MOVE IF THE OUTCOME ENDS IN A WIN FOR OPPOSITE PLAYER
            return -1000
        
    return min(move_values.values())
    
def max_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot):
    moves = p1_hand.copy()
    moves = legal_possible_moves(rank_cot,suit_cot,moves)
    if len(moves)==0:
        if len(deck)!=0:
            moves.append("DRAW") #APPENDS DRAW IF THERE IS NO POSSIBLE MOVES
        else:
            #print("Deck is empty")
            return(0)
            
    move_values={}        
    p1_hand_copy = p1_hand.copy(); p2_hand_copy = p2_hand.copy(); deck_copy = deck.copy()
    rank_cot_copy = rank_cot; suit_cot_copy = suit_cot; card_on_top_copy = card_on_top 
    
    for move in moves:
        #FOR EVERY POSSIBLE MOVE WE UPDATE THE STATE AND FIND THE BEST POSSIBLE MOVE FOR OUR PLAYER
        p1_hand = p1_hand_copy ; p2_hand = p2_hand_copy; deck = deck_copy
        rank_cot = rank_cot_copy; suit_cot = suit_cot_copy; card_on_top = card_on_top_copy        
        card_on_top, p1_hand, deck, rank_cot, suit_cot = updated_state(move,card_on_top,p1_hand,deck,rank_cot,suit_cot)                
        if len(p1_hand)==0:             
            return 1000
        #THE NEXT PART STORES THE VALUES OF THE MOVE BASED ON THE SERIES OF OUTCOME OF BEST POSSIBLE MOVES 
        move_values[move] = min_value(card_on_top,p1_hand,p2_hand,deck,rank_cot,suit_cot)
        if move_values[move] == 1000: # THIS PART PRUNES AND RETURNS THE MOVE IF THE OUTCOME ENDS IN A WIN FOR OUR PLAYER
            return 1000
    
    return max(move_values.values())
    
def p2_move(p1_hand,p2_hand,card_on_top,deck,suit):
    #THIS IS THE MAIN FUNCTION THAT ACCEPTS BOTH THE PLAYERS' CARDS, DECK AND SUIT OF THE CARD ON TOP
    # WE TAKE A COPY OF ALL THE LISTS BEFORE WE MODIFY THE ACTIONN STATE FOR EACH MOVE
    p1 = p1_hand.copy(); p2 = p2_hand.copy(); cot = card_on_top;  d = deck.copy()
    
    rank_cot = cot[:-1];   suit_cot = str(suit)       
    moves = p1.copy();  moves = legal_possible_moves(rank_cot,suit_cot,moves)
    
    if len(moves)==0:
        if len(d)!=0:
            moves.append("DRAW") #APPENDS DRAW IF THERE IS NO POSSIBLE MOVES
            
    move_values={};  best_move=''
    for move in moves:
        #FOR EVERY POSSIBLE MOVE WE UPDATE THE STATE AND FIND THE BEST POSSIBLE MOVE FOR OUR PLAYER
        cot, p1, d, rank_cot, suit_cot = updated_state(move,cot,p1,d,rank_cot,suit_cot)
        
        #TAKING A COPY
        p1_hand_copy = p1.copy(); p2_hand_copy = p2.copy(); deck_copy = d.copy()
        rank_cot_copy = rank_cot; suit_cot_copy = suit_cot; card_on_top_copy = cot
        #RETURN THE BEST MOVE IF IT LEADS TO A WINNING STATE
        if len(p1_hand_copy) == 0:
            best_move = move
            break
        #THE NEXT PART STORES THE VALUES OF THE MOVE BASED ON THE SERIES OF OUTCOME OF BEST POSSIBLE MOVES
        move_values[move] = min_value(card_on_top_copy,p1_hand_copy,p2_hand_copy,deck_copy,rank_cot_copy,suit_cot_copy)

    if best_move == '': 
        best_move = max(move_values, key=move_values.get) # IF THERE IS NO WINNING MOVE RETURNS THE MOVE WITH MAX VALUE
        if best_move[:-1] == '8':
            #IN CASE THE BEST MOVE CONTAINS RANK 8 WE ALSO RETURN THE SUIT WITH THE HIGHEST FREQUENCY IN ALL OF OUR PLAYER'S CARDS
            best_suit = case_eight(p1_hand)
            return(best_move,best_suit)
        elif (best_move == 'DRAW'):
            return(best_move,suit_cot)
        else:
            return(best_move,best_move[-1])  
    else:
        if best_move[:-1] == '8':
            #IN CASE THE BEST MOVE CONTAINS RANK 8 WE ALSO RETURN THE SUIT WITH THE HIGHEST FREQUENCY IN ALL OF OUR PLAYER'S CARDS
            best_suit = case_eight(p1_hand)
            return(best_move,best_suit)
        elif (best_move == 'DRAW'):
            return(best_move,suit_cot)
        else:
            return(best_move,best_move[-1])  
#THE FUNCTION RETURNS THE BEST POSSIBLE MOVES AND THE SUIT THAT NEEDS TO BE FOLLOWED 

# MCTS PLAYER

In [None]:
class CrazyEights:
    """
    Play a game of Crazy Eights between two players.

    A state is represented by a dictionary of:
        - the cards in the hand of each player (0, 1)
        - the top card on the table (2)
        - the cards in the deck (3)
        - the player to move (4)
        - the suit of the game (only relevant if the top card is an eight) (5)

    state = {'hand1': hand1, 'hand2': hand2, 'top_card': top_card, 'deck': deck, 'player': player, 'suit': suit}

    Each card is represented by a tuple of (suit, rank).
    The deck is represented by a list of cards.    
    """

    full_deck = [rank+suit for rank in ['A','2','3','4','5','6','7','8','9','10','J','Q','K'] for suit in ['D','H','S','C']]

    # the initial state is a random card on the table, random hands and a random player to move
    # 8 is not allowed as the top card
    top_card = random.choice(full_deck)

    # exclude the top card from the deck
    deck = full_deck
    deck.remove(top_card)

    # hands
    my_sample = random.sample(deck, 10)
    hand1, hand2 = my_sample[:5], my_sample[5:10]

    # exclude the hands from the deck
    for card in my_sample:
        deck.remove(card)

    initial = {'hand1': hand1, 'hand2': hand2, 'top_card': top_card, 'deck': deck, 'player': random.choice([1, 2]), 'suit': None}

    def actions(self, state):
        """Return a list of the allowable moves at this point."""
        # hand of the player to move
        hand = state['hand' + str(state['player'])]
        possible_moves = []

        # if eight is the top card, the next player needs to follow the declared suit
        if state['top_card'][0] == '8':
            for card in hand:
                if card[1] == state['suit'] or card[0] == '8':
                    possible_move = [card, card[-1]]
                    possible_moves.append(possible_move)

        # otherwise, available moves are the cards in the hand of the player to move that match the suit 
        # or the rank of the top card on the table or any 8
        else:
            for card in hand:
                if card[0] == state['top_card'][0] or card[1] == state['top_card'][1] or card[0] == '8':
                    possible_move = [card, card[-1]]
                    possible_moves.append(possible_move)

        # for each 8, add the four possible suits
        for card in hand:
            if card[0] == '8':
                # delete the card from the possible moves
                possible_move = [card, card[-1]]
                possible_moves.remove(possible_move)
                # add the four suits
                for suit in ['D','H','S','C']:
                    possible_moves.append([card, suit])

        # if no valid moves and the deck is not empty, draw
        if len(possible_moves) == 0:
            possible_moves = [['DRAW']]

        return possible_moves

    def result(self, state, move):
        """Return the state that results from making a move from a state."""
        # copy the state so that we don't change the original state
        new_state = deepcopy(state)

        if move[0] == 'DRAW' and len(state['deck']) > 0:
            # take a random card from the current deck
            card = random.choice(new_state['deck'])
            new_state['deck'].remove(card)
            # add the card to the hand of the player to move
            new_state['hand' + str(new_state['player'])].append(card)

        elif move[0] == 'DRAW' and len(state['deck']) == 0:
            new_state = state
            
        else:
            # remove the card from the hand of the player to move
            new_state['hand' + str(new_state['player'])].remove(move[0])
            # change the top card
            new_state['top_card'] = move[0]
            # change the suit
            new_state['suit'] = move[1]

        # change the player to move
        new_state['player'] = 3 - new_state['player']
        return new_state

    def utility(self, state, player):
        """Return the value of this final state to player."""
        # if the player has no cards left, he wins
        if len(state['hand' + str(player)]) == 0:
            return 1
        # if the opponent has no cards left, the player loses
        elif len(state['hand' + str(3 - player)]) == 0:
            return -1
        else:
            return 0

    def terminal_test(self, state):
        """Return True if this is a final state for the game."""
        # if one of the players has no cards left, the game is over
        if len(state['hand1']) == 0 or len(state['hand2']) == 0:
            return True
        # if the deck is empty and both players still have cards, the game is over
        elif len(state['deck']) == 0 and len(state['hand1']) > 0 and len(state['hand2']) > 0:
            return True
        else:
            return False


    def to_move(self, state):
        """Return the player whose move it is in this state."""
        return state['player']

    def display(self, state):
        """Print or otherwise display the state."""
        print(state)

    def __repr__(self):
        return '<{}>'.format(self.__class__.__name__)

    def play_game(self, player1, player2):
        """Play an 2-person, move-alternating game."""
        state = self.initial
        print("GAME START")
        while True:
            # the player to move makes a move
            if self.to_move(state) == 1:
                move = player1.generate_move(state, self)
            else:
                move = player2.generate_move(state, self)
            # the move is applied to the state
            state = self.result(state, move)

            # check if the game is over
            if self.terminal_test(state):
                print('Game over')
                print("Final state: ", state)
                break

In [None]:
# this part of the script is borrowed from the AIMA repository
class MCT_Node:
    """Node in the Monte Carlo search tree, keeps track of the children states."""

    def __init__(self, parent=None, state=None, U=0, N=0):
        self.__dict__.update(parent=parent, state=state, U=U, N=N)
        self.children = {}
        self.actions = None

def ucb(n, C=1.4):
    return np.inf if n.N == 0 else n.U / n.N + C * np.sqrt(np.log(n.parent.N) / n.N)

def monte_carlo_tree_search(state, game, N=1000):
    def select(n):
        """select a leaf node in the tree"""
        if n.children:
            return select(max(n.children.keys(), key=ucb))
        else:
            return n

    def expand(n):
        """expand the leaf node by adding all its children states"""
        if not n.children and not game.terminal_test(n.state):
            n.children = {MCT_Node(state=game.result(n.state, action), parent=n): action
                          for action in game.actions(n.state)}
        return select(n)

    def simulate(game, state):
        """simulate the utility of current state by random picking a step"""
        player = game.to_move(state)
        while not game.terminal_test(state):
            action = random.choice(list(game.actions(state)))
            state = game.result(state, action)
        v = game.utility(state, player)
        return -v

    def backprop(n, utility):
        """passing the utility back to all parent nodes"""
        if utility > 0:
            n.U += utility
        # if utility == 0:
        #     n.U += 0.5
        n.N += 1
        if n.parent:
            backprop(n.parent, -utility)

    root = MCT_Node(state=state)

    for _ in range(N):
        leaf = select(root)
        child = expand(leaf)
        result = simulate(game, child.state)
        backprop(child, result)

    try: 
        max_state = max(root.children, key=lambda p: p.N)
    except:
        return ['DRAW']

    return root.children.get(max_state)

In [None]:
# player definition
game = CrazyEights()
def mcts_move(p1_hand, p2_hand, card_on_top, deck, suit_cot):
    # assume always player 1
    state = {'hand1': p1_hand, 'hand2': p2_hand, 'top_card': card_on_top, 'deck': deck, 'suit': suit_cot, 'player': 1}
    return monte_carlo_tree_search(state, game)

# TOURNAMENT

In this section, the different players compete to evaluate their result playing the game

In [34]:
def updated_state(moves,card_on_top,hand,deck,rank_cot,suit_cot): 
    # THIS FUNCTION UPDATES THE GAMES STATE AFTER A MOVE IS PLAYED
    if moves[0].upper() == 'DRAW':
        move_draw = random.choice(deck)
        deck.remove(move_draw)
        hand.append(move_draw)
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    elif moves[0][0] == '8':
        card_on_top = moves[0]
        m = str(moves[0])
        hand.remove(m)
        rank_cot = card_on_top[:-1] ; suit_cot = moves[1]
        return (card_on_top,hand,deck,rank_cot,suit_cot)
    else:
        card_on_top = moves[0]
        m = str(moves[0])
        hand.remove(m)
        rank_cot = card_on_top[:-1]; suit_cot = card_on_top[-1]  
        return (card_on_top,hand,deck,rank_cot,suit_cot)

def total_points(hand):
    #THIS FUNCTION HELPS TO DETERMINE THE TOTAL VALUE OF POINTS FROMT A PLAYERS AVAILABLE CARDS
    value={'A':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':50,'9':9,'10':10,'J':10,'Q':10,'K':10}
    total = 0
    for card in hand:
        total = total + value[card[:-1]]
    return total

def Crazy_8_game(p1_move,p2_move,games):
    #THIS IS THE FUNCTION THAT SIMULATES THE CRAZY 8 GAME BETWEEN 2 PLAYERS WITH N ROUNDS OF GAME
    
    suits=['D','H','S','C']
    rank=['A','2','3','4','5','6','7','8','9','10','J','Q','K']
    full_deck=[]
    #CREATING A DECK OF 52 CARDS
    for s in suits:
        for r in rank:
            values=r+s
            full_deck.append(values)
    
    #INITIALISING POINTS AND WINS FOR BOTH THE PLAYERS
    p1_points = p2_points = 0;  p1_wins = p2_wins = ties = 0; rounds = 0;   blocked = 0
    while rounds < games: #THIS WHILE LOOP WILL ENABLE TO PLAY N ROUNDS OF GAME
        rounds = rounds + 1
        deck = full_deck.copy() #A COPY OF THE FULL DECK IS TAKEN EVERY TIME
        value={'A':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':50,'9':9,'10':10,'J':10,'Q':10,'K':10}   
        p1_hand=[];   p2_hand=[]
        #INITIALISING PLAYER 1 AND 2 WITH RANDOM CARDS FROM THE DECK
        for i in range(5):
            r1 = random.choice(deck)
            p1_hand.append(r1)
            deck.remove(r1)
            r2 = random.choice(deck)
            p2_hand.append(r2)
            deck.remove(r2) 
        #SELECTING THE CARD ON TOP FROM THE DECK
        card_on_top = random.choice(deck)
        deck.remove(card_on_top)     
        rank_cot = card_on_top[:-1] 
        suit_cot = card_on_top[-1]
        Game = False
        count = 1
        while not Game:
            if len(p1_hand)==0: #PLAYER 1 WINS WHEN HE HAS NO CARDS 
                Game = True
                p1_points = p1_points + total_points(p2_hand) #PLAYER 1 GETS THE VALUE OF REMAINING CARDS FROM PLAYER 2
                p1_wins+=1   

            elif len(p2_hand)==0: #PLAYER 2 WINS WHEN HE HAS NO CARDS
                Game = True
                p2_points = p2_points + total_points(p1_hand) #PLAYER 2 GETS THE VALUE OF REMAINING CARDS FROM PLAYER 1
                p2_wins+=1

            elif len(deck) == 0: #IT ENDS IN A TIE WHEN THE DECK HAS NO CARDS
                Game = True 
                p1_points = p1_points + total_points(p2_hand)
                p2_points = p2_points + total_points(p1_hand)
                ties+=1 

            if not Game:
                if (count%2) == 1:
                    count+=1
                    #PLAYER 1 PLAYS HIS MOVE
                    move = p1_move(p1_hand,p2_hand,card_on_top,deck,suit_cot)
                    #THE GAME STATES ARE UPDATED AFTER EVERY MOVE
                    card_on_top, p1_hand, deck, rank_cot, suit_cot = updated_state(move,card_on_top,p1_hand,deck,rank_cot,suit_cot)
                else:
                    count+=1
                    #PLAYER 2 PLAYS HIS MOVE
                    move_2 = p2_move(p2_hand,p1_hand,card_on_top,deck,suit_cot)
                    #THE GAME STATES ARE UPDATED AFTER EVERY MOVE
                    card_on_top, p2_hand, deck, rank_cot, suit_cot = updated_state(move_2,card_on_top,p2_hand,deck,rank_cot,suit_cot)

    #THE FINAL RESULTS ARE PRINTED AFTER A SIMULATION OF N GAMES
    avg_points_per_game_p1 = p1_points / games if games > 0 else 0
    avg_points_per_game_p2 = p2_points / games if games > 0 else 0
    
    print("\n" + "="*55)
    print("{:^55}".format("CRAZY 8 GAME RESULTS"))
    print("="*55 + "\n")
    print("{:^25} | {:^25} |".format("Player 1", "Player 2"))
    print("-"*55)
    print("{:<25} | {:<25} |".format(f"Total Points: {p1_points}", f"Total Points: {p2_points}"))
    print("{:<25} | {:<25} |".format(f"Total Wins: {p1_wins}", f"Total Wins: {p2_wins}"))
    print("{:<25} | {:<25} |".format(f"Ties: {ties}", ""))
    print("ADDITIONAL RESULTS")
    print("{:<25} | {:<25} |".format(f"Avg. Points/Game: {avg_points_per_game_p1:.2f}", f"Avg. Points/Game: {avg_points_per_game_p2:.2f}"))
    print("-"*55)

In [None]:
#GLOBAL VARIABLE THAT UPLOADS Q STATES, MUSTE BE UPLOADED ONLY ONCE TO INITIATE RL
initiate_rl()

In [7]:
#AB1 vs RL
Crazy_8_game(p1_move,rl_move,100)
Crazy_8_game(rl_move,p1_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1210        | Total Points: 1123        |
Total Wins: 55            | Total Wins: 43            |
Ties: 2                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 12.10   | Avg. Points/Game: 11.23   |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1285        | Total Points: 875         |
Total Wins: 54            | Total Wins: 44            |
Ties: 2                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 12.85   | Avg. Points/Game: 8.75    |
-------------------------------------------------------


In [8]:
#AB2 vs RL
Crazy_8_game(p2_move,rl_move,100)
Crazy_8_game(rl_move,p2_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1091        | Total Points: 912         |
Total Wins: 55            | Total Wins: 45            |
Ties: 0                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 10.91   | Avg. Points/Game: 9.12    |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 922         | Total Points: 1234        |
Total Wins: 43            | Total Wins: 54            |
Ties: 3                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 9.22    | Avg. Points/Game: 12.34   |
-------------------------------------------------------


In [9]:
#AB1 vs mcts
Crazy_8_game(p1_move,mcts_move,100)
Crazy_8_game(mcts_move,p1_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1881        | Total Points: 1007        |
Total Wins: 58            | Total Wins: 36            |
Ties: 6                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 18.81   | Avg. Points/Game: 10.07   |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1044        | Total Points: 1648        |
Total Wins: 47            | Total Wins: 47            |
Ties: 6                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 10.44   | Avg. Points/Game: 16.48   |
-------------------------------------------------------


In [10]:
#AB2 vs mcts
Crazy_8_game(p2_move,mcts_move,100)
Crazy_8_game(mcts_move,p2_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1750        | Total Points: 1280        |
Total Wins: 48            | Total Wins: 41            |
Ties: 11                  |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 17.50   | Avg. Points/Game: 12.80   |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1018        | Total Points: 1577        |
Total Wins: 48            | Total Wins: 48            |
Ties: 4                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 10.18   | Avg. Points/Game: 15.77   |
-------------------------------------------------------


In [11]:
#mcts vs RL
Crazy_8_game(mcts_move,rl_move,100)
Crazy_8_game(rl_move,mcts_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1192        | Total Points: 1585        |
Total Wins: 44            | Total Wins: 46            |
Ties: 10                  |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 11.92   | Avg. Points/Game: 15.85   |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1401        | Total Points: 1263        |
Total Wins: 43            | Total Wins: 51            |
Ties: 6                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 14.01   | Avg. Points/Game: 12.63   |
-------------------------------------------------------


In [12]:
#AB1 vs AB2
Crazy_8_game(p1_move,p2_move,100)
Crazy_8_game(p2_move,p1_move,100)


                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1024        | Total Points: 1331        |
Total Wins: 49            | Total Wins: 48            |
Ties: 3                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 10.24   | Avg. Points/Game: 13.31   |
-------------------------------------------------------

                 CRAZY 8 GAME RESULTS                  

        Player 1          |         Player 2          |
-------------------------------------------------------
Total Points: 1024        | Total Points: 1007        |
Total Wins: 53            | Total Wins: 47            |
Ties: 0                   |                           |
ADDITIONAL RESULTS
Avg. Points/Game: 10.24   | Avg. Points/Game: 10.07   |
-------------------------------------------------------
