# DL4G - Jass Introduction

In this exercise we will look at some properties of the jass kit environment that can be used to develop your own jass agent.

You will need to have numpy installed, as well as the jass-kit environment.

In [62]:
from jass.agents.agent_rule_based import AgentRuleBased
from jass.game.game_util import *
from jass.game.game_sim import GameSim
from jass.game.game_observation import GameObservation
from jass.game.const import *
from jass.game.rule_schieber import RuleSchieber
from jass.agents.agent import Agent
from jass.agents.agent_random_schieber import AgentRandomSchieber
from jass.arena.arena import Arena
# import agent_rule_based.py
from jass.agents.agent_rule_based import AgentRuleBased
import math
import random
import copy


Information about the cards is stored as one-hot encoded arrays, there are several tools available to access the information in the cards. 

Lets deal some random cards first.

In [63]:
# Lets set the seed of the random number generater, so that we get the same results
np.random.seed(1)

# This distributes the cards randomly among the 4 players.
hands = deal_random_hand()
print(hands.shape)

(4, 36)


In [64]:
# There is an entry for each player, to access the cards of the first player
cards = hands[0,:]
print(cards)

[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0]


In [65]:
# This should be 9 cards
assert(cards.sum() == 9)

# The cards can be converted to other formats for easier reading or processing
print(convert_one_hot_encoded_cards_to_str_encoded_list(cards))

# Each card is encoded as a value between 0 and 35.
print(convert_one_hot_encoded_cards_to_int_encoded_list(cards))


[np.str_('DJ'), np.str_('H6'), np.str_('SK'), np.str_('SJ'), np.str_('S9'), np.str_('CK'), np.str_('CQ'), np.str_('CJ'), np.str_('C7')]
[3, 17, 19, 21, 23, 28, 29, 30, 34]


In [66]:
# There is a method to count colors too
colors = count_colors(cards)
print(colors)

[1 1 3 4]


There is a common jass "rule" to select trump, when you have the "Puur" (Jack of trump) and 3 or more other cards of the same color. 

Task 1: Write a function that returns an array of 4 values that contains a 1 for each color that fulfills the rule or 0 otherwise, i.e. [0 0 0 0] is returned, if you do not have any color with Jack and 3 other cards.


In [67]:
def havePuurWithFour(hand: np.ndarray) -> np.ndarray:
    result = np.zeros(4, dtype=int)
    # add your code here
    for i in range(4):
        if colors[i] > 3:
            # checks if there is a buur
            if hand[3 + 9 * i] == 1:
                result[i] = 1
    return result
        

In [68]:
assert (havePuurWithFour(cards) == [0, 0, 0, 1]).all()
cards_2 = hands[1,:]
assert (havePuurWithFour(cards_2) == [0, 0, 0, 0]).all()

Another possibility to select trump is by assigning a value to each card, depending on whether the color is trump or not. This table is from the Maturawork of Daniel Graf from 2009: "Jassen auf Basis der Spieltheorie".

In [69]:
# Score for each card of a color from Ace to 6

# score if the color is trump
trump_score = [15, 10, 7, 25, 6, 19, 5, 5, 5]
# score if the color is not trump
no_trump_score = [9, 7, 5, 2, 1, 0, 0, 0, 0]
# score if obenabe is selected (all colors)
obenabe_score = [14, 10, 8, 7, 5, 0, 5, 0, 0,]
# score if uneufe is selected (all colors)
uneufe_score = [0, 2, 1, 1, 5, 5, 7, 9, 11]

Task 2: Implement a function that evaluates a hand that is given as a list of 9 cards and with a given trump value and returns a score depending on the table above. For example the score of our hand ['DJ', 'H6', 'SK', 'SJ', 'S9', 'CK', 'CQ', 'CJ', 'C7'] when Club is trump should be:

2 + 0 + 7 + 2 + 0 + 10 + 7 + 25 + 5 = 58

while the score is 70 if Spade is selected, which is better as you have both the jack and the nine.

You can use the arrays offset_of_card and color_of_card to get the offset (Ace, King, etc.) and color of a card.

In [70]:
class Node:
    def __init__(self, game_obs, parent=None, move=None):
        self.game_obs = game_obs
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.total_reward = 0
        self.ruleset = RuleSchieber()

    def add_child(self, child_node):
        self.children.append(child_node)

    def is_leaf(self):
        return len(self.children) == 0

    def is_fully_expanded(self):
        valid_moves = self.ruleset.get_valid_cards_from_obs(self.game_obs)
        return len(valid_moves) == len(self.children)

    def best_child(self, exploration_param=1.41):
        # print("selecting best child")
        # print(self.children)
        best = max(self.children, key=lambda child:
                   (child.total_reward / child.visits) +
                   exploration_param * math.sqrt(math.log(self.visits) / child.visits))
        # print(f"best_cild: {best}\ntotal reward: {best.total_reward}")
        return best

In [71]:
class MCTS:
    def __init__(self, player_id, max_depth=15):
        self.player_id = player_id
        self.ruleset = RuleSchieber()
        self.max_depth = max_depth
    def selection(self, node):
        # tree policy
        while not node.game_obs.nr_tricks == 9 and node.is_fully_expanded():
            node = node.best_child()
        return node

    def simulate_move(self, game_obs, move):
        game_obs_copy = copy.deepcopy(game_obs)
        game_sim = GameSim(RuleSchieber())
        # simulated cards
        hands = self.generate_opponent_hands(game_obs_copy)
        game_sim.init_from_cards(hands, dealer=game_obs_copy.dealer)
        game_sim.action_play_card(move)
        # print(f"Move {move} simulated")
        return game_sim.get_observation()

    def expansion(self, node):
        if node.is_fully_expanded():
            return None

        valid_moves = convert_one_hot_encoded_cards_to_int_encoded_list(node.ruleset.get_valid_cards_from_obs(node.game_obs))

        tried_moves = [child.move for child in node.children]
        # print(f"tried_moves: \n{tried_moves}")
        # print(f"valid_moves: \n{valid_moves}")
        untried_moves = [move for move in valid_moves if move not in tried_moves]

        if not untried_moves:
            return None

        move = random.choice(untried_moves)
        # print(f"chosen_move:\n{move}")

        new_game_obs = self.simulate_move(node.game_obs, move)

        child_node = Node(game_obs=new_game_obs, parent=node, move=move)
        node.add_child(child_node)

        return child_node
    def evaluate_outcome(self, game_obs):
        #print(f"evaluating outcome: \n{game_obs.points}")
        if game_obs.points[0] == max(game_obs.points):
            return 1  # Win
        else:
            return -1  # Loss

    def backpropagate(self, node, outcome):
        while node is not None:
            node.visits += 1
            if node.game_obs.player_view == self.player_id:
                node.total_reward += outcome  #
            else:
                node.total_reward += -outcome  # Opponent's perspective: MCTS agent loses

            node = node.parent

    def generate_opponent_hands(self, obs):
        possible_cards = [i for i in range(36)]

        possible_cards = set(possible_cards) ^ set(convert_one_hot_encoded_cards_to_int_encoded_list(obs.hand))
        if len(obs.tricks[obs.tricks > 0]):
            possible_cards = set(possible_cards) ^ set(obs.tricks[obs.tricks != -1])
        unplayed_cards = np.full((1,4), 9 - obs.nr_tricks)[0]

        # iterate backwards through players starting from the person who started the trick
        for i in range(obs.trick_first_player[obs.nr_tricks], obs.trick_first_player[obs.nr_tricks] - obs.nr_cards_in_trick, -1):
            unplayed_cards[i % 4] -= 1 # modulo to loop back to the end, e.g. 1, 0, 3, 2
        # print(f"unplayed_cards: {unplayed_cards}")

        hands = np.zeros(shape=[4, 36], dtype=np.int32)
        hands[obs.player_view] = obs.hand

        for player_id in range(4):

            for i in range(unplayed_cards[player_id]):
                if player_id == obs.player_view:
                    continue
                int_possible_cards = list(possible_cards)
                card_choice = random.choice(int_possible_cards)
                possible_cards.remove(card_choice)

                hands[player_id] += get_cards_encoded(card_choice)
        return hands


    def simulate(self, game_obs):
        # copy the current game state to avoid modifying the original state
        simulated_state = copy.deepcopy(game_obs)

        for _ in range(self.max_depth):
            # Check if the game has ended
            if simulated_state.nr_tricks >= 9:
                break

            valid_moves = convert_one_hot_encoded_cards_to_int_encoded_list(self.ruleset.get_valid_cards_from_obs(game_obs))
            if np.sum(valid_moves) == 0:
                break

            move = random.choice(valid_moves)
            

            simulated_state = self.simulate_move(simulated_state, move)

        outcome = self.evaluate_outcome(simulated_state)

        return outcome


In [72]:
def calculate_trump_selection_score2(cards, trump: int) -> int:
    # add your code here
    trump_selection_score = 0
    print(trump_score)
    print("cards:", cards, "trump:", trump) 
    if trump < 4:
        for i in range(9):
            print("i:", i)
            if trump * 9 -1 < cards[i] < (trump + 1) * 9 -1:
                if cards[i] % 9 == 0:
                    trump_selection_score += trump_score[(cards[i] % 9)]
                trump_selection_score += trump_score[(cards[i] % 9)]
                print("score:", trump_selection_score, "cardID:", cards[i], "i", (cards[i] % 9) )
                
    return trump_selection_score    

In [73]:
def calculate_trump_selection_score(cards, trump: int) -> int:
    # Initialize trump selection score
    trump_selection_score = 0
    
    # Print debug information
    #print("cards:", cards, "trump:", trump)
    
    for i in range(9):  # Loop through 9 cards
        card_suit = cards[i] // 9  # Get the suit of the card (0 to 3)
        card_rank = cards[i] % 9   # Get the rank of the card (0 to 8)
        
        # If the card suit matches the trump suit, use trump_score
        if trump <= 3:
            if card_suit == trump:
                trump_selection_score += trump_score[card_rank]
            else:
                # Use a different score array when the card is not from the trump suit
                trump_selection_score += no_trump_score[card_rank]  # Example for handling non-trump scores
        elif trump == 4:
             trump_selection_score += obenabe_score[card_rank]
        elif trump == 5:
            trump_selection_score += uneufe_score[card_rank]
            

        # Debug output to trace computation
        #print(f"Card {i}: suit={card_suit}, rank={card_rank}, current score={trump_selection_score}")
    
    return trump_selection_score


In [74]:
card_list = convert_one_hot_encoded_cards_to_int_encoded_list(cards)
assert calculate_trump_selection_score(card_list, CLUBS) == 58
assert calculate_trump_selection_score(card_list, SPADES) == 70

## Agents

In order to play a game you have to program an agent that decides on the action. For that you have to override the methods action_trump and action_play_card.

Task 3: Use the function implemented above to select the best trump value. If the calculated trump value is below a threshold (for example let us take 68, as suggested in the work by Daniel Graf) you should "Schiebe", i.e. pass to your partner if you are still allowed to do that.

The game observation allows you to access the information about your card, and if you are the first or second player to select trump.

For playing a card, we just take a random action.

In [75]:
class MyAgent(Agent):
    def __init__(self):
        super().__init__()
        # we need a rule object to determine the valid cards
        self._rule = RuleSchieber()
        
    def calculate_trump_selection_score(self, cards, trump: int) -> int:
        # score if the color is trump
        trump_score = [15, 10, 7, 25, 6, 19, 5, 5, 5]
        # score if the color is not trump
        no_trump_score = [9, 7, 5, 2, 1, 0, 0, 0, 0]
        # score if obenabe is selected (all colors)
        obenabe_score = [14, 10, 8, 7, 5, 0, 5, 0, 0, ]
        # score if uneufe is selected (all colors)
        uneufe_score = [0, 2, 1, 1, 5, 5, 7, 9, 11]
        # Initialize trump selection score
        trump_selection_score = 0
    
        # Print debug information
        # print("cards:", cards, "trump:", trump)
    
        for i in range(9):  # Loop through 9 cards
            card_suit = cards[i] // 9  # Get the suit of the card (0 to 3)
            card_rank = cards[i] % 9  # Get the rank of the card (0 to 8)
    
            # If the card suit matches the trump suit, use trump_score
            if trump <= 3:
                if card_suit == trump:
                    trump_selection_score += trump_score[card_rank]
                else:
                    # Use a different score array when the card is not from the trump suit
                    trump_selection_score += no_trump_score[card_rank]  # Example for handling non-trump scores
            elif trump == 4:
                trump_selection_score += obenabe_score[card_rank]
            elif trump == 5:
                trump_selection_score += uneufe_score[card_rank]
    
            # Debug output to trace computation
            # print(f"Card {i}: suit={card_suit}, rank={card_rank}, current score={trump_selection_score}")
    
        return trump_selection_score
        
    def action_trump(self, obs: GameObservation) -> int:
        """
        Determine trump action for the given observation
        Args:
            obs: the game observation, it must be in a state for trump selection
        
        Returns:
            selected trump as encoded in jass.game.const or jass.game.const.PUSH
        """
        # add your code here using the function above
        if obs.forehand == -1:
        # if forehand is not yet set, we are the forehand player and can select trump or push
            trump_scores = [0] * 6 
            for i in range(6):
                trump_scores[i] = calculate_trump_selection_score(obs.hand, i)

    # if not push or forehand, select a trump
            result = trump_scores.index(max(trump_scores))
            print("result:", result)
            if result < 50:
                return PUSH
            return result

    def action_play_card(self, obs: GameObservation) -> int:
        """
        Determine the card to play.

        Args:
            obs: the game observation

        Returns:
            the card to play, int encoded as defined in jass.game.const
        """
        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        # we use the global random number generator here
        return np.random.choice(np.flatnonzero(valid_cards))

In [76]:
class MCTSAgent(Agent):
    def __init__(self, max_iterations=100):
        super().__init__()
        self.max_iterations = max_iterations
        self._rule = RuleSchieber()


    def action_trump(self, obs: GameObservation) -> int:
        """
        Determine trump action for the given observation
        Args:
            obs: the game observation, it must be in a state for trump selection

        Returns:
            selected trump as encoded in jass.game.const or jass.game.const.PUSH
        """
        hand = obs.hand

        d = calculate_trump_selection_score(hand, DIAMONDS)
        s = calculate_trump_selection_score(hand, SPADES)
        h = calculate_trump_selection_score(hand, HEARTS)
        c = calculate_trump_selection_score(hand, CLUBS)
        max_points = max((d, s, h, c))

        if d == max_points:
            return DIAMONDS
        elif s == max_points:
            return SPADES
        elif h == max_points:
            return HEARTS
        else:
            return CLUBS



    def action_play_card(self, obs: GameObservation) -> int:
        """
        Determine the card to play using MCTS.
        Args:
            obs: the game observation

        Returns:
            the card to play, int encoded as defined in jass.game.const
        """

        mcts = MCTS(obs.player_view)

        root = Node(game_obs=obs)

        for _ in range(self.max_iterations):
            leaf = mcts.selection(root)
            child = mcts.expansion(leaf)
            if child:
                outcome = mcts.simulate(child.game_obs)
                mcts.backpropagate(child, outcome)
            else:
                outcome = mcts.simulate(leaf.game_obs)
                mcts.backpropagate(leaf, outcome)
                
        print("played cards", obs.nr_played_cards)

        best_move = root.best_child(exploration_param=0).move
        return best_move
        #return np.random.choice(np.flatnonzero(valid_cards))



We can use the game simulation to play a game. We will use that to test our implementation, and then use the arena class to play against other agents

In [80]:
rule = RuleSchieber()
game = GameSim(rule=rule)
agent = AgentRuleBased()
mcts_agent = MCTSAgent(max_iterations=50)


np.random.seed(1)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)

In [78]:
obs = game.get_observation()

In [79]:
cards = convert_one_hot_encoded_cards_to_str_encoded_list(obs.hand)
print(cards)
print(obs.hand)
trump = agent.action_trump(obs)
print(f"Selected trump: {trump}", "Hearts:",HEARTS)  # Debugging output
assert trump == HEARTS

[np.str_('DA'), np.str_('DK'), np.str_('D9'), np.str_('D6'), np.str_('HA'), np.str_('HQ'), np.str_('HJ'), np.str_('H8'), np.str_('H7')]
[1 1 0 0 0 1 0 0 1 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Trump selection scores: [64, 72, 20, 20, 73, 41]
Selected trump result: 4
Selected trump: 4 Hearts: 1


AssertionError: 

In [None]:
# tell the simulation the selected trump
game.action_trump(trump)

In [None]:
# play the game to the end and print the result
while not game.is_done():
    game.action_play_card(agent.action_play_card(game.get_observation()))

print(game.state.points)

Another possibility to test agents locally is to use the arena. Let us play 100 games against the Random Agent and see if our trump methods makes any difference.


In [81]:
arena = Arena(nr_games_to_play=1)
arena.set_players(mcts_agent, AgentRandomSchieber(), mcts_agent, AgentRandomSchieber())

In [82]:
arena.play_all_games()

played cards 1
played cards 3
played cards 4
played cards 6
played cards 8
played cards 10
played cards 13
played cards 15
played cards 17
played cards 19
played cards 20
played cards 22
played cards 25
played cards 27
played cards 29
played cards 31
played cards 33
played cards 35



In [40]:
print(arena.points_team_0.sum(), arena.points_team_1.sum())

8101.0 7599.0


Now you can continue with a rule based implemenation of the card play. Also look at the flask implementation of the service to see how you can get your agent online.