# Final Project: Euchre
Tyler Gibson & Carter Shavitz | CSC 5661

https://en.wikipedia.org/wiki/Euchre

https://cardgames.io/euchre/

TODO
- include trump and lead suit in state

In [1]:
import random
from player import Player
from score import Score

import numpy as np
import re
import time

import tqdm

## Euchre Environment

In [4]:
class Euchre:
    """
    A class to manage the euchre board.
    """

    def __init__(self, config: dict) -> None:
        """
        The constructor for the class.

        Parameters
        ----------
        config : dict
            The configuration parameters for euchre.
        """
        self.config = config
        self.score_to_win = self.config['score_to_win']  # 5, 7, or 10
        self.values = self.config['values']  # 9 - A that can include 7's and/or 8's

        self.num_players = 4
        self.suits = ['♠', '♥', '♦', '♣']
        self.deck = [f"{value}{suit}" for suit in self.suits for value in self.values]
        self.card_state = {f"{value}{suit}": False for suit in self.suits for value in self.values}
        self.trick_state = {f"{value}{suit}": False for suit in self.suits for value in self.values}


        self.score = Score()

        self.team_1_score = 0
        self.team_2_score = 0
        
        self.top_card = None
        self.trump = None
        self.dealer = 1
        self.lead_suit = ''
        self.card_values = {card: index for index, card in enumerate(self.deck)}

    def deal_cards(self, distribution_pattern=(3, 2)):
        """
        Deal the cards for a new 

        Parameters
        ----------
        

        Returns
        -------

        """
        random.shuffle(self.deck)
    
        players_hands = {
            '1': [],
            '2': [],
            '3': [],
            '4': []
        }

        deck_index = 0
        for count, player in enumerate(list(players_hands.keys()) * 2):
            if (count % 2 == 0 and count < len(players_hands)) or (count % 2 == 1 and count >= len(players_hands)):
                players_hands[player].append(self.deck[deck_index:deck_index+distribution_pattern[0]])
                deck_index += distribution_pattern[0] 
            else:
                players_hands[player].append(self.deck[deck_index:deck_index+distribution_pattern[1]])
                deck_index += distribution_pattern[1]

        for key in players_hands.keys():
            players_hands[key] =  [item for sublist in players_hands[key] for item in sublist]
                    
        return players_hands, self.deck[deck_index]
    
    def reset_game(self, players: list[Player]):
        """
        Reset the game.
        """
        for player in players:
            player.team_id = 2 if player.team_id == 1 else 1

        self.team_1_score = 0
        self.team_2_score = 0

        self.top_card = None
        self.trump = None

        self.dealer = np.random.choice(range(self.num_players)) + 1

        players = self.reset_hand(players=players)

        return players

    def reset_hand(self, players: list[Player]):
        """
        Reset the hand.
        """
        self.card_state = {f"{value}{suit}": False for suit in self.suits for value in self.values}

        for player in players:
            player.points = 0
            
        players_hands, top_card = self.deal_cards(distribution_pattern=(3, 2))

        for count, player in enumerate(players):
            player.set_hand(players_hands[str(count + 1)])

        self.top_card = top_card

        self.reset_trick()
        
        return players

    def reset_trick(self):
        """
        Reset the trick.
        """
        self.dealer = 1 if self.dealer == 4 else self.dealer + 1  
        self.trick_state = {f"{value}{suit}": False for suit in self.suits for value in self.values}


    def reward(self, won_trick, teammate_won) -> float:
        """
        TODO

        Returns
        -------
        float
            The reward based on the action.
        """
        if won_trick:
            return 5
        elif teammate_won:
            return 1
        else:
            return -1

    def step(self, actions: dict) -> dict:
        """
        TODO

        Parameters
        ----------
        a : TODO
            TODO

        Returns
        -------
        dict
            TODO
        """
        trump_suit =  re.findall(self.score.suits, self.top_card)[0]  # determine the trump suit

        # find the player id who won the trick
        highest_player_id = self.score.score_trick(
            actions=actions,
            trump_suit=trump_suit,
            lead_suit=self.lead_suit
        )
        highest_player_team_id = actions[highest_player_id]['team']  # get team that won trick

        rewards = {}  # store rewards for players
        states = {}
        for id, action in actions.items():
            rewards[id] = self.reward(won_trick=id == highest_player_id, teammate_won=action['team'] == highest_player_team_id)

            # update state space
            self.card_state[action['action']] = True
            self.trick_state[action['action']] = True
            states[id] = np.array(
                    [1 if self.card_state[card] else 0 for card in self.deck] + 
                    [1 if self.trick_state[card] else 0 for card in self.deck]
            ).flatten().tolist()

        return {
            'state': states,
            'rewards': rewards,
            'highest_player_id': highest_player_id
        }

In [3]:
env_config = {
    'score_to_win': 10,
    'values': ['9','10','J','Q','K','A'],
}

euchre = Euchre(env_config)

score = Score()

In [11]:
players = [Player(id=id + 1, team_id=1 if id % 2 == 0 else 2) for id in range(euchre.num_players)]

euchre.reset_hand(players=players)

for player in players:
    print(player.hand, player.id, player.team_id)

euchre.top_card, euchre.dealer

TypeError: __init__() missing 1 required positional argument: 'agent_config'

In [None]:
agent_config = {
    'gamma': 0.9,  # the discount factor
    'epsilon': 0.1,  # the epsilon-greedy parameter
    'alpha': 0.001,  # the learning rate
    'input_size': 25,
    'hidden_size': 128,  # the hidden layer size
    'A': euchre.deck,  # the action set
    'M': 100000, # set the memory size
    'B': 64,  # set the batch size
    'C': 500,  # when to update the target approximator
    'n_steps': 100,  # the number of steps to use to update
    'epsilon_burnin': 10000,
    'double_dqn': True,
    'prioritized_replay': True,
    'multi_step': True,
    'omega': 0.05,
    'beta': 0.5,
    'n_step_return': 20,
} 

players = [Player(id=id + 1, team_id=1 if id % 2 == 0 else 2, agent_config=agent_config) for id in range(euchre.num_players)]

In [5]:
players = euchre.reset_game(players=players)

hands = 1  # keep track of which game we are on
while euchre.team_1_score < euchre.score_to_win and euchre.team_2_score < euchre.score_to_win:  # while game not complete
    current_player_id = 1 if euchre.dealer == 4 else euchre.dealer + 1  # get the player to the left of the dealer to start the hand
    player_choose_trump = [player for player in players if player.id == current_player_id][0]  # assuming the player to the left of dealer chooses trump and starts (not based off of actual rules)
    player_choose_trump.trick_team = 'makers'  # assign the first player as the Makers

    makers = player_choose_trump.team_id
    for player in players:
        player.trick_team = 'makers' if player.team_id == makers else 'defenders'

    # loop through 5 tricks (each player should have 5 cards)
    for _ in range(5):
        lead_suit = ''  # init lead suit to empty

        # loop through each player for their turn
        for x in range(euchre.num_players):
            current_player = [player for player in players if player.id == current_player_id][0]  # get current player based on id
            current_player.current_card = current_player.choose_card(random=True)  # set the current card of the player

            # # update card state based on current card chosen
            # euchre.card_state[(
            #     re.split(euchre.score.suits, current_player.current_card)[0],
            #     re.findall(euchre.score.suits, current_player.current_card)[0]
            # )] = True

            # if it is the first player, determine the lead suit
            if x == 0:
                lead_suit = re.findall(score.suits, current_player.current_card)[0] # use regex

            current_player_id = 1 if current_player_id == 4 else current_player_id + 1  # update the current player id to the next player

        trump_suit =  re.findall(score.suits, euchre.top_card)[0]  # determine the trump suit

        # find the player id who won the trick
        highest_player_id = score.score_trick(
            players=players,
            trump_suit=trump_suit,
            lead_suit=lead_suit
        )
        highest_player = [player for player in players if player.id == highest_player_id][0]  # get player with won trick
        highest_player.points += 1  # update their points by 1 for the hand
        current_player_id = highest_player_id  # the player who won the trick will lead the next trick

    hand_scores = score.score_hand(players, solo_call=False)  # calculate the hand scores by looking at each players points for all tricks
    
    # update team scores
    if player_choose_trump.team_id == 1:
        euchre.team_1_score += hand_scores['makers']
        euchre.team_2_score += hand_scores['defenders']
    else:
        euchre.team_2_score += hand_scores['makers']
        euchre.team_1_score += hand_scores['defenders']

    print(f'Hand {hands}\t\t Team 1 Score: {euchre.team_1_score}\t\tTeam 2 Score: {euchre.team_2_score}')
    hands += 1  # update hand counter
    players = euchre.reset_hand(players=players)  # reset hand

print('Team 1 Wins' if euchre.team_1_score >= 10 else 'Team 2 Wins')

NameError: name 'players' is not defined

## Agent Creation

In [5]:
env_config = {
    'score_to_win': 10,
    'values': ['9','10','J','Q','K','A'],
}

euchre = Euchre(env_config)

score = Score()

In [21]:
agent_config = {
    'gamma': 0.9,  # the discount factor
    'epsilon': 0.1,  # the epsilon-greedy parameter
    'alpha': 0.001,  # the learning rate
    'input_size': 49,
    'hidden_size': 128,  # the hidden layer size
    'A': euchre.deck,  # the action set
    'M': 100000, # set the memory size
    'B': 64,  # set the batch size
    'C': 500,  # when to update the target approximator
    'n_steps': 100,  # the number of steps to use to update
    'epsilon_burnin': 10000,
    'double_dqn': False,
    'prioritized_replay': False,
    'multi_step': False,
    'omega': 0.05,
    'beta': 0.5,
    'n_step_return': 20,
    'card_values': euchre.card_values
} 

players = [Player(id=id + 1, team_id=1 if id % 2 == 0 else 2, agent_config=agent_config, train=False) for id in range(euchre.num_players)]
# players[0].train = True
# players[2].train = True

In [None]:
count = 0
debug = False

results = {
    'team_wins': [],
    'individual_wins': []
}
for epi in tqdm.tqdm(range(25000)):
    players = euchre.reset_game(players=players)
    hands = 1  # keep track of which game we are on
    while euchre.team_1_score < euchre.score_to_win and euchre.team_2_score < euchre.score_to_win:  # while game not complete
        count += 1
        
        current_player_id = 1 if euchre.dealer == 4 else euchre.dealer + 1  # get the player to the left of the dealer to start the hand
        player_choose_trump = [player for player in players if player.id == current_player_id][0]  # assuming the player to the left of dealer chooses trump and starts (not based off of actual rules)
        player_choose_trump.trick_team = 'makers'  # assign the first player as the Makers

        makers = player_choose_trump.team_id
        for player in players:
            player.trick_team = 'makers' if player.team_id == makers else 'defenders'

        # loop through 5 tricks (each player should have 5 cards)
        for _ in range(5):
            euchre.lead_suit = ''  # init lead suit to empty

            round_state = euchre.card_state.copy()
            trick_state = euchre.trick_state.copy()
            actions = {}
            # loop through each player for their turn
            for x in range(euchre.num_players):
                data_t = {}    #init the data for this turn
                data_t['s_t'] = np.array(
                    [1 if round_state[card] else 0 for card in euchre.deck] + 
                    [1 if trick_state[card] else 0 for card in euchre.deck]
                ).flatten().tolist()

                current_player = [player for player in players if player.id == current_player_id][0]  # get current player based on id
                current_player.agent.config['A'] = current_player.hand
                current_player.current_card = current_player.agent.pi(s_t=data_t['s_t'], epsilon=1)  # set the current card of the player
                data_t['a_t'] = euchre.card_values[current_player.current_card]    #record the action at t
                data_t['d_s_a'] = data_t['s_t'] + [data_t['a_t']]   #make the state action pair

                # if it is the first player, determine the lead suit
                if x == 0:
                    lead_suit = re.findall(score.suits, current_player.current_card)[0] # use regex

                current_player_id = 1 if current_player_id == 4 else current_player_id + 1  # update the current player id to the next player
                current_player.data_t = data_t

                # update card state based on current card chosen
                round_state[current_player.current_card] = True
                trick_state[current_player.current_card] = True

                actions[str(current_player.id)] = {
                    'action': current_player.current_card,
                    'team': current_player.team_id
                }

            next_step = euchre.step(actions)  # step

            highest_player = [player for player in players if player.id == int(next_step['highest_player_id'])][0]  # get player with won trick
            highest_player.points += 1  # update their points by 1 for the hand
            current_player_id = int(next_step['highest_player_id'])  # the player who won the trick will lead the next trick

            results['individual_wins'].append(next_step['highest_player_id'])
            for id in next_step['state'].keys():
                current_player = [player for player in players if player.id == int(id)][0]  # get player with won trick
                if current_player.train:
                    current_player.data_t['r_t+1'] = next_step['rewards'][id]
                    current_player.data_t['s_t+1'] = next_step['state'][id]
                    current_player.agent.D.append(current_player.data_t)
                    current_player.agent.p.append(5 ** current_player.agent.config['omega'])
                    if len(current_player.agent.D) > current_player.agent.config['M']:    #if we've exceeded memory...
                        current_player.agent.D.pop(0)    #drop the oldest transition
                        current_player.agent.p.pop(0)  # drop oldest priority

                    if count % current_player.agent.config['n_steps'] == 0:    #if it is time for a new update...
                        batch = current_player.agent.make_batch()    #make a batch for training from the memory buffer
                        X = batch[0]    #pull out the features
                        y = batch[1]    #pull out the target
                        current_player.agent.update_Q(X,y)    #update the MLP modeling Q

                    if count % current_player.agent.config['C'] == 0:    #if it is time for an update...
                        current_player.agent.update_Q_prime()    #overwrite the target approximator

            euchre.reset_trick()

        hand_scores = score.score_hand(players, solo_call=False)  # calculate the hand scores by looking at each players points for all tricks
        
        # update team scores
        if player_choose_trump.team_id == 1:
            euchre.team_1_score += hand_scores['makers']
            euchre.team_2_score += hand_scores['defenders']
        else:
            euchre.team_2_score += hand_scores['makers']
            euchre.team_1_score += hand_scores['defenders']

        if debug: print(f'Hand {hands}\t\t Team 1 Score: {euchre.team_1_score}\t\tTeam 2 Score: {euchre.team_2_score}')
        hands += 1  # update hand counter
        players = euchre.reset_hand(players=players)  # reset hand

    if debug: print('Team 1 Wins' if euchre.team_1_score >= 10 else 'Team 2 Wins')
    results['team_wins'].append(1 if euchre.team_1_score >= 10 else 2)

 77%|███████▋  | 19321/25000 [02:22<01:01, 92.45it/s] 

 94%|█████████▍| 23469/25000 [02:56<00:11, 132.89it/s]


KeyboardInterrupt: 

In [25]:
print(np.unique(results['individual_wins'], return_counts=True))
print(np.unique(np.array(results['team_wins']), return_counts=True))

(array(['1', '2', '3', '4'], dtype='<U1'), array([273620, 273121, 274017, 274172], dtype=int64))
(array([1, 2]), array([11770, 11699], dtype=int64))


Simulate Some Games

In [30]:
count = 0
debug = False

results_test = {
    'team_wins': [],
    'individual_wins': []
}
for epi in tqdm.tqdm(range(1000)):
    players = euchre.reset_game(players=players)
    hands = 1  # keep track of which game we are on
    while euchre.team_1_score < euchre.score_to_win and euchre.team_2_score < euchre.score_to_win:  # while game not complete
        count += 1
        
        current_player_id = 1 if euchre.dealer == 4 else euchre.dealer + 1  # get the player to the left of the dealer to start the hand
        player_choose_trump = [player for player in players if player.id == current_player_id][0]  # assuming the player to the left of dealer chooses trump and starts (not based off of actual rules)
        player_choose_trump.trick_team = 'makers'  # assign the first player as the Makers

        makers = player_choose_trump.team_id
        for player in players:
            player.trick_team = 'makers' if player.team_id == makers else 'defenders'

        # loop through 5 tricks (each player should have 5 cards)
        for _ in range(5):
            euchre.lead_suit = ''  # init lead suit to empty

            round_state = euchre.card_state.copy()
            trick_state = euchre.trick_state.copy()
            actions = {}
            # loop through each player for their turn
            for x in range(euchre.num_players):
                data_t = {}    #init the data for this turn
                data_t['s_t'] = np.array(
                    [1 if round_state[card] else 0 for card in euchre.deck] + 
                    [1 if trick_state[card] else 0 for card in euchre.deck]
                ).flatten().tolist()

                current_player = [player for player in players if player.id == current_player_id][0]  # get current player based on id
                current_player.agent.config['A'] = current_player.hand
                current_player.current_card = current_player.agent.pi(s_t=data_t['s_t'], epsilon=0)  # set the current card of the player

                # if it is the first player, determine the lead suit
                if x == 0:
                    lead_suit = re.findall(score.suits, current_player.current_card)[0] # use regex

                current_player_id = 1 if current_player_id == 4 else current_player_id + 1  # update the current player id to the next player

                # update card state based on current card chosen
                round_state[current_player.current_card] = True
                trick_state[current_player.current_card] = True

                actions[str(current_player.id)] = {
                    'action': current_player.current_card,
                    'team': current_player.team_id
                }

            next_step = euchre.step(actions)  # step

            highest_player = [player for player in players if player.id == int(next_step['highest_player_id'])][0]  # get player with won trick
            highest_player.points += 1  # update their points by 1 for the hand
            current_player_id = int(next_step['highest_player_id'])  # the player who won the trick will lead the next trick
            results_test['individual_wins'].append(next_step['highest_player_id'])
            euchre.reset_trick()

        hand_scores = score.score_hand(players, solo_call=False)  # calculate the hand scores by looking at each players points for all tricks
        
        # update team scores
        if player_choose_trump.team_id == 1:
            euchre.team_1_score += hand_scores['makers']
            euchre.team_2_score += hand_scores['defenders']
        else:
            euchre.team_2_score += hand_scores['makers']
            euchre.team_1_score += hand_scores['defenders']

        if debug: print(f'Hand {hands}\t\t Team 1 Score: {euchre.team_1_score}\t\tTeam 2 Score: {euchre.team_2_score}')
        hands += 1  # update hand counter
        players = euchre.reset_hand(players=players)  # reset hand

    if debug: print('Team 1 Wins' if euchre.team_1_score >= 10 else 'Team 2 Wins')
    results_test['team_wins'].append(1 if euchre.team_1_score >= 10 else 2)

100%|██████████| 1000/1000 [00:32<00:00, 30.82it/s]


In [31]:
print(np.unique(results_test['individual_wins'], return_counts=True))
print(np.unique(np.array(results_test['team_wins']), return_counts=True))

(array(['1', '2', '3', '4'], dtype='<U1'), array([11069,  7051, 10439,  8271], dtype=int64))
(array([1, 2]), array([491, 509], dtype=int64))


Save Players

In [16]:
import pickle

with open('players.pkl', 'wb') as f:
    pickle.dump(players, f)

Load Players

In [28]:
with open('players.pkl', 'rb') as f:
    players = pickle.load(f)