# Final Project: Euchre
Tyler Gibson & Carter Shavitz | CSC 5661

https://en.wikipedia.org/wiki/Euchre

https://cardgames.io/euchre/

In [1]:
import random
from player import Player
from score import Score

import numpy as np
import re
import itertools

from tqdm import tqdm
import time

## Euchre Environment

In [2]:
class Euchre:
    """
    A class to manage the euchre board.
    """

    def __init__(self, config: dict) -> None:
        """
        The constructor for the class.

        Parameters
        ----------
        config : dict
            The configuration parameters for euchre.
        """
        self.config = config
        self.score_to_win = self.config['score_to_win']  # 5, 7, or 10
        self.values = self.config['values']  # 9 - A that can include 7's and/or 8's

        self.num_players = 4
        self.suits = ['♠', '♥', '♦', '♣']
        self.deck = [f"{value}{suit}" for suit in self.suits for value in self.values]

        self.players = [Player(id=id + 1, team_id=1 if id % 2 == 0 else 2) for id in range(self.num_players)]
        self.score = Score()

        self.team_1_score = 0
        self.team_2_score = 0
        
        self.top_card = None
        self.trump = None
        self.dealer = 1

    def deal_cards(self, distribution_pattern=(3, 2)):
        """
        Deal the cards for a new 

        Parameters
        ----------
        

        Returns
        -------

        """
        random.shuffle(self.deck)
    
        players_hands = {
            '1': [],
            '2': [],
            '3': [],
            '4': []
        }

        deck_index = 0
        for count, player in enumerate(list(players_hands.keys()) * 2):
            if (count % 2 == 0 and count < len(players_hands)) or (count % 2 == 1 and count >= len(players_hands)):
                players_hands[player].append(self.deck[deck_index:deck_index+distribution_pattern[0]])
                deck_index += distribution_pattern[0] 
            else:
                players_hands[player].append(self.deck[deck_index:deck_index+distribution_pattern[1]])
                deck_index += distribution_pattern[1]

        for key in players_hands.keys():
            players_hands[key] =  [item for sublist in players_hands[key] for item in sublist]
                    
        return players_hands, self.deck[deck_index]
    
    def value_hand(self, hand, lead_suit=None):
        scored_hand = []
        print(self.trump)
        print(self.score.left_bower.get('♠'))
        left_bower_suit = self.score.left_bower.get(self.trump)
        print(left_bower_suit)
        for card in hand:
            rank = re.split(self.score.suits, card)[0] # get rank
            suit = re.findall(self.score.suits, card)[0]  # get suit

            rank_value = self.score.rank_order[rank]
            #print(rank_value)
            if suit == self.trump or (rank == 'J' and suit == left_bower_suit):  # is trump suite
                # add highest rank card to rank to ensure it is higher than other cards
                card_rank = rank_value + 6
                #print(card_rank)
                # see if right or left bower
                if rank == 'J':
                    card_rank = card_rank + 5 if suit == self.trump else card_rank + 4
                    #print(card_rank)
            elif lead_suit == None or suit == lead_suit:  # non trump card that is lead suit
                card_rank = rank_value  
            else:  # non trump card and not lead suit
                card_rank = 0
            scored_hand.append(card_rank)
            if card_rank == 9:
                #print(rank)
                #print(suit)
                #print(self.trump)
                pass
        scored_hand.sort()
        return scored_hand
    
    def reset_game(self):
        """
        Reset the game.
        """
        self.players = [Player(id=id + 1, team_id=1 if id % 2 == 0 else 2) for id in range(self.num_players)]
        
        self.team_1_score = 0
        self.team_2_score = 0

        self.top_card = None
        self.trump = None

        self.dealer = np.random.choice(range(self.num_players)) + 1

        self.reset_hand()

    def reset_hand(self):
        """
        Reset the hand.
        """
        for player in self.players:
            player.points = 0
            
        players_hands, top_card = self.deal_cards(distribution_pattern=(3, 2))

        for count, player in enumerate(self.players):
            player.set_hand(players_hands[str(count + 1)])

        self.top_card = top_card
        self.trump = re.findall(self.score.suits, self.top_card)[0]

        self.reset_trick()

    def reset_trick(self):
        """
        Reset the trick.
        """
        self.dealer = 1 if self.dealer == 4 else self.dealer + 1  

    def reward(self, won_trick, teammate_won) -> float:
        """
        TODO

        Returns
        -------
        float
            The reward based on the action.
        """
        if won_trick:
            return 1
        elif teammate_won:
            return 0.5
        else:
            return -0.1

    def hand_step(self, agent) -> dict:
        """
        TODO

        Parameters
        ----------
        a : location of card to play

        Returns
        -------
        dict
            TODO
        """
        done = False
        states = []
        rewards = []
        actions = []

        current_player_id = 1 if self.dealer == 4 else self.dealer + 1  # get the player to the left of the dealer to start the hand
        player_choose_trump = [player for player in self.players if player.id == current_player_id][0]  # assuming the player to the left of dealer chooses trump and starts (not based off of actual rules)
        player_choose_trump.trick_team = 'makers'  # assign the first player as the Makers

        # loop through 5 tricks (each player should have 5 cards)
        for _ in range(5):
            lead_suit = ''  # init lead suit

            # loop through each player for their turn
            for x in range(self.num_players):
                current_player = [player for player in self.players if player.id == current_player_id][0]  # get current player based on id
                if current_player.id == 1:
                    s = self.value_hand(current_player.hand)
                    a = agent.pi(str(s))
                    current_player.current_card = current_player.hand[a] #TODO fix indexing issue
                    self.trick_step(current_player, a)
                    r_pr = 1#update['reward']
                    s_pr = self.value_hand(current_player.hand)
                    states.append(s)
                    actions.append(a)
                    rewards.append(r_pr)
                    s = s_pr    #set the current state to the new state
                    #TODO update states and actons and rewards with the agent doing a step here, trick step will allow us to batch update with the s,r,a for the whole hand.
                    #current_player.current_card = current_player.trick_step(agent)  # set the current card of the player  
                else:
                    current_player.current_card = current_player.choose_card(random=True)  # set the current card of the player			current_player.current_card = current_player.trick_step(agent)  # set the current card of the player
                # if it is the first player, determine the lead suit
                if x == 0:
                    lead_suit = re.findall(self.score.suits, current_player.current_card)[0] # use regex

                current_player_id = 1 if current_player_id == 4 else current_player_id + 1  # update the current player id to the next player
            trump_suit =  re.findall(self.score.suits, self.top_card)[0]  # determine the trump suit

            # find the player id who won the trick
            highest_player_id = self.score.score_trick(
                players=self.players,
                trump_suit=trump_suit,
                lead_suit=lead_suit
            )
            highest_player = [player for player in self.players if player.id == highest_player_id][0]  # get player with won trick
            highest_player.points += 1  # update their points by 1 for the hand
            current_player_id = highest_player_id  # the player who won the trick will lead the next trick

        hand_scores = self.score.score_hand(self.players, solo_call=False)  # calculate the hand scores by looking at each players points for all tricks
        
        # update team scores
        if player_choose_trump.team_id == 1:
            self.team_1_score += hand_scores['makers']
            self.team_2_score += hand_scores['defenders']
        else:
            self.team_2_score += hand_scores['makers']
            self.team_1_score += hand_scores['defenders']

        self.reset_hand()  # reset hand

        if self.team_1_score >= 10 or self.team_2_score >= 10:
            done = True


        return {'states':states,'rewards':rewards,'actions':actions,'done':done}


    def trick_step(self, player, a): #TODO fix indexing of card to remove
        player.remove_card(a)

    def step(self, a) -> dict:
        """
        TODO

        Parameters
        ----------
        a : location of card to play

        Returns
        -------
        dict
            TODO
        """
        done = False
        if a == "order_it_up":
            pass
        elif a == "pass":
            pass
        if self.team_1_score >= 10 or self.team_2_score >= 10:
            done = True


        return {'state':[],'reward':1,'done':done}

In [3]:
#Create 15^5 + 15^4 + 15^3 + 15^2 (7093) states of possible vales in all 5 spots of the hand sorted.
def compute_hands():
	hands = {}
	values = [0,1,2,3,4,5,6,7,8,10,11,12,13,14]
	for i in [0,1,2,3,4,5]:
		states = itertools.product(values, repeat=i)
		count = 0
		for state in states:
			state = list(state)
			valid = True
			for j in [1,2,3,4,5,6]: # remove states that are not possible i.e. more than 3 of non-trump same numbers, or more than 1 of trump same numbers
				if state.count(j) > 3:
					valid = False
					break
			if valid:
				for j in [7,8,10,11,12,13,14]:
					if state.count(j) > 1:
						valid = False
						break
			state.sort()
			if valid and str(state) not in hands.keys(): # if state not already in boards, add
				count += 1
				hands[str(state)] = 0
	return hands

all_hands = compute_hands()
print(len(all_hands.keys())) # confirm 7093 states

7093


In [4]:
class Classical_Agents:
	def __init__(self, config):
		self.A = range(5)
		self.config = config
		self.Q = {} 
		self.Z = {}
		self.shots = {}
		for s in all_hands.keys(): #give Q and Z dictionarys to keep track of related values for future use
			self.Q[str(s)] = {}
			self.Z[str(s)] = {}
			for a in self.A:
				self.Q[str(s)][str(a)] = round(np.random.uniform(),2) #pick random starting Q for each s,a
				self.Z[str(s)][str(a)] = []

	
	def pi(self, s):
		probs = []
		a = list(self.Q[s].keys())[np.argmax(list(self.Q[s].values()))]
		for action in self.A:
			action = str(action)
			if action == a: # if the action is the best action, give it the highest prob
				probs.append(1-self.config['epsilon']+self.config['epsilon']/len(self.A))
			else:
				probs.append(self.config['epsilon']/len(self.A))
		a = np.random.choice(self.A, p=probs) # make a random choice based on the probabilities.
		return a
	
	def update_Monte_Carlo(self,s,a):
		self.Q[str(s)][str(a)] = np.average(self.Z[str(s)][str(a)])
	
	def update_Q_SARSA(self,s,a,r_pr,s_pr,a_pr):
		self.Q[str(s)][str(a)] = (self.Q[str(s)][str(a)] + self.config['alpha']*(r_pr + self.config['gamma']*self.Q[str(s_pr)][str(a_pr)] - self.Q[str(s)][str(a)]))    #perform the SARSA update
	
	def update_Q_learning(self,s,a,r_pr,s_pr):
		self.Q[str(s)][str(a)] = (self.Q[str(s)][str(a)] + self.config['alpha']*(r_pr + self.config['gamma']*max(list(self.Q[str(s_pr)].values())) - self.Q[str(s)][str(a)]))    #perform the Q-learning update
	
	def update_Expected_SARSA(self,s,a,r_pr,s_pr):
		pi = [self.config['epsilon']/2]*(len(self.Q[str(s_pr)].values()))
		pi[np.argmax(list(self.Q[str(s_pr)].values()))] = 1 - self.config['epsilon'] + self.config['epsilon']/2
		expectation = sum(np.array(pi)*np.array(list(self.Q[str(s_pr)].values())))
		self.Q[str(s)][str(a)] = (self.Q[str(s)][str(a)] 
							+ self.config['alpha']
								*(r_pr + self.config['gamma']*expectation - self.Q[str(s)][str(a)]))    #perform the expected SARSA update

In [5]:
class Agent:
    """
    A class to manage the euchre board.
    """

    def __init__(self, config: dict) -> None:
        """
        The constructor for the class.

        Parameters
        ----------
        config : dict
            The configuration parameters for euchre.
        """
        self.config = config

In [6]:
env_config = {
    'score_to_win': 10,
    'values': ['9','10','J','Q','K','A']
}

classical_agent_config = {
    'alpha':0.1,
    'gamma':0.9,
	'epsilon':0.05,
	}

euchre = Euchre(env_config)
agent = Classical_Agents(classical_agent_config)

In [7]:
euchre.reset_hand()

for player in euchre.players:
    print(player.hand, player.team_id)

euchre.top_card, euchre.dealer

['9♥', 'J♥', 'A♦', 'K♠', '9♣'] 1
['10♠', '9♦', 'K♣', '9♠', 'A♥'] 2
['10♥', '10♣', 'K♦', 'Q♠', 'K♥'] 1
['Q♦', 'J♦', 'A♣', '10♦', 'J♣'] 2


('Q♣', 2)

In [8]:
def play_game(euchre, agent=None):
    hands = 1  # keep track of which game we are on
    while euchre.team_1_score < euchre.score_to_win and euchre.team_2_score < euchre.score_to_win:  # while game not complete
        current_player_id = 1 if euchre.dealer == 4 else euchre.dealer + 1  # get the player to the left of the dealer to start the hand
        player_choose_trump = [player for player in euchre.players if player.id == current_player_id][0]  # assuming the player to the left of dealer chooses trump and starts (not based off of actual rules)
        player_choose_trump.trick_team = 'makers'  # assign the first player as the Makers

        # loop through 5 tricks (each player should have 5 cards)
        for _ in range(5):
            lead_suit = ''  # init lead suit

            # loop through each player for their turn
            for x in range(euchre.num_players):
                current_player = [player for player in euchre.players if player.id == current_player_id][0]  # get current player based on id
                if agent == None:
                    current_player.current_card = current_player.choose_card(random=True)  # set the current card of the player
                else:
                    current_player.current_card = current_player.choose_card_agent(agent)  # set the current card of the player
                # if it is the first player, determine the lead suit
                if x == 0:
                    lead_suit = re.findall(euchre.score.suits, current_player.current_card)[0] # use regex

                current_player_id = 1 if current_player_id == 4 else current_player_id + 1  # update the current player id to the next player

            trump_suit =  re.findall(euchre.score.suits, euchre.top_card)[0]  # determine the trump suit

            # find the player id who won the trick
            highest_player_id = euchre.score.score_trick(
                players=euchre.players,
                trump_suit=trump_suit,
                lead_suit=lead_suit
            )
            highest_player = [player for player in euchre.players if player.id == highest_player_id][0]  # get player with won trick
            highest_player.points += 1  # update their points by 1 for the hand
            current_player_id = highest_player_id  # the player who won the trick will lead the next trick

        hand_scores = euchre.score.score_hand(euchre.players, solo_call=False)  # calculate the hand scores by looking at each players points for all tricks
        
        # update team scores
        if player_choose_trump.team_id == 1:
            euchre.team_1_score += hand_scores['makers']
            euchre.team_2_score += hand_scores['defenders']
        else:
            euchre.team_2_score += hand_scores['makers']
            euchre.team_1_score += hand_scores['defenders']

        hands += 1  # update hand counter
        euchre.reset_hand()  # reset hand


In [9]:
euchre.reset_game()
play_game(euchre)
print(f'Team 1 Score: {euchre.team_1_score}\t\tTeam 2 Score: {euchre.team_2_score}')

Team 1 Score: 10		Team 2 Score: 6


In [10]:
def q_learning_training(agent, euchre, N):
	euchre.reset_game()
	for i in tqdm(range(N)):    #loop over games
		s = euchre.value_hand(euchre.players[0].hand)    #choose an initial state -- center of the walk
		print(s)
		update = {'state':[],'done':False}
		count = 0
		while not update['done']:    #while the episode is ongoing...
			update = euchre.hand_step(agent)
			update = euchre.step()   #choose a new state
			count += 4
			r_pr = update['rewards']
			s = update['states']
			a = update['actions']
			for i in range(5):
				if i != 5:
					agent.update_Q_learning(str(s[i]),str(a[i]),r_pr[i],str(s[i+1]))
				else:
					agent.update_Q_learning(str(s[i]),str(a[i]),r_pr[i],[])
			euchre.reset_hand()
		euchre.reset()
		agent.shots[str(i)] = count
	return agent

In [11]:
q_learning_training(agent, euchre, 1)

  0%|          | 0/1 [00:00<?, ?it/s]


♦
♣
♥
[2, 4, 6, 8, 13]
♦
♣
♥
♦
♣
♥
♦
♣
♥


IndexError: list index out of range

In [None]:
print(euchre.score.left_bower.get("♣"))

In [None]:
print(euchre.score.left_bower)