In [117]:
import logging
from collections import defaultdict
import numpy as np

from jass.game.game_util import *
from jass.game.game_observation import GameObservation
from jass.game.const import *
from jass.game.rule_schieber import RuleSchieber
from jass.agents.agent import Agent
from jass.arena.arena import Arena
from jass.agents.agent_random_schieber import AgentRandomSchieber
from jass.game.game_rule import GameRule
from jass.game.game_sim import GameSim
from jass.game.game_state_util import *

In [202]:
# 1 game sim
# jedi node speichert GameState
# RandomAgent zum simulieren
ruleSchieber = RuleSchieber()
gameSim = GameSim(RuleSchieber())


class MonteCarloTreeSearchNode():
    def __init__(self, gameState, parent=None, parent_action=None):
        self.gameState = gameState
        self.parent = parent
        self.parent_action = parent_action
        self.children = []
        self._number_of_visits = 0
        self._results = defaultdict(int)
        self._results[1] = 0
        self._results[-1] = 0
        self._untried_actions = None
        self._untried_actions = self.untried_actions()
        return

    def untried_actions(self):
        #valid_cards = ruleSchieber.get_valid_actions_from_obs(observation_from_state(self.gameState))
        valid_cards = ruleSchieber.get_valid_cards(self.gameState.hands[self.gameState.player, :],
                                                   self.gameState.current_trick, self.gameState.nr_cards_in_trick,
                                                   self.gameState.trump)
        self._untried_actions = np.flatnonzero(valid_cards)
        return self._untried_actions

    def differenceWinLoss(self):
        wins = self._results[1]
        loses = self._results[-1]
        return wins - loses

    def numberOfVisits(self):
        return self._number_of_visits

    def expand(self):
        action, self._untried_actions = self._untried_actions[-1], self._untried_actions[:-1]
        # Initilize GameSim with state
        gameSim.init_from_state(self.gameState)
        gameSim.action_play_card(action)
        child_node = MonteCarloTreeSearchNode(self.gameState, parent=self, parent_action=action)
        self.children.append(child_node)
        return child_node

    def is_terminal_node(self):
        return self.gameState.nr_played_cards == 36

    def rollout(self):
        gameSim.init_from_state(self.gameState)

        while not gameSim.is_done():
            #possible_moves = ruleSchieber.get_valid_actions_from_obs(gameSim.get_observation())
            possible_moves = ruleSchieber.get_valid_cards(gameSim.get_observation().hand,
                                                          gameSim.get_observation().current_trick,
                                                          gameSim.get_observation().nr_cards_in_trick,
                                                          gameSim.get_observation().trump)
            action = self.rollout_policy(possible_moves)
            gameSim.action_play_card(action)

        points = gameSim.state.points
        return 1 if points[0] > points[1] else 0

    def backpropagate(self, result):
        self._number_of_visits += 1
        self._results[result] += 1
        if self.parent:
            self.parent.backpropagate(result)

    def is_fully_expanded(self):
        return len(self._untried_actions) == 0

    def best_child(self, c_param=0.1):
        choices_weights = [(c.differenceWinLoss() / c.numberOfVisits()) + c_param * np.sqrt(
            (2 * np.log(self.numberOfVisits()) / c.numberOfVisits())) for c in self.children]
        return self.children[np.argmax(choices_weights)]

    def rollout_policy(self, possible_moves):
        return possible_moves[np.random.randint(len(possible_moves))]

    def _tree_policy(self):
        current_node = self
        while not current_node.is_terminal_node():
            if not current_node.is_fully_expanded():
                return current_node.expand()
            else:
                current_node = current_node.best_child()
        return current_node

    def best_action(self):
        simulation_no = 100

        for i in range(simulation_no):
            v = self._tree_policy()
            reward = v.rollout()
            v.backpropagate(reward)

        return self.best_child(c_param=0.1)

In [196]:
def calculate_trump_selection_score(cards, trump: int) -> int:
    # Score for each card of a color from Ace to 6
    # score if the color is trump
    trump_score = [15, 10, 7, 25, 6, 19, 5, 5, 5]
    # score if the color is not trump
    no_trump_score = [9, 7, 5, 2, 1, 0, 0, 0, 0]
    # score if obenabe is selected (all colors)
    obenabe_score = [14, 10, 8, 7, 5, 0, 5, 0, 0, ]
    # score if uneufe is selected (all colors)
    uneufe_score = [0, 2, 1, 1, 5, 5, 7, 9, 11]

    result = 0

    for card in cards:
        color = color_of_card[card]
        offset = offset_of_card[card]

        if trump == 4:
            result = result + obenabe_score[offset]
            continue
        if trump == 5:
            result = result + uneufe_score[offset]
            continue

        if color == trump:
            val = trump_score[offset]
            result = result + val
        else:
            result = result + no_trump_score[offset]

    return result

In [197]:
def generate_random_hands(player_hand):
    cards = np.arange(0, 36, dtype=np.int32)
    player_hand_int = convert_one_hot_encoded_cards_to_int_encoded_list(player_hand)
    np.random.shuffle(cards)

    for card in cards:
        if card in player_hand_int:
            cards = np.delete(cards, np.where(cards == card))

    hands = np.zeros(shape=[4, 36], dtype=np.int32)

    i = 0
    for x in range(0, 4):
        print(x)
        if x == self.game_obs.player_view:
            print("is gleich")
            hands[x,] = player_hand
        else:
            if i == 0:
                hands[x, cards[0:9]] = 1
            if i == 1:
                hands[x, cards[9:18]] = 1
            if i == 2:
                hands[x, cards[18:27]] = 1
            i += 1
    return hands

In [198]:
def get_hands(obs):
    cards = np.arange(36, dtype=np.int32)
    player_hand_int = convert_one_hot_encoded_cards_to_int_encoded_list(obs.hand)
    np.random.shuffle(cards)

    for card in cards:
        if card in player_hand_int:
            cards = np.delete(cards, np.where(cards == card))

    hands = np.zeros([4, 36], dtype=np.int32)

    hands[obs.player, :] = obs.hand[:]

    # determine players in current trick to find out the number of cards for each player
    trick_players = []
    if obs.nr_cards_in_trick > 0:
        player = obs.trick_first_player[obs.nr_tricks]
        for _ in range(obs.nr_cards_in_trick):
            trick_players.append(player)
            player = next_player[player]
    assert len(trick_players) == obs.nr_cards_in_trick

    # distribute unknown cards of the current player, or one less of the player already played in the trick
    len_hand = np.count_nonzero(obs.hand)
    for p in range(NORTH, WEST + 1):
        if p != obs.player:
            # players that already played in current trick have one card less
            pn = len_hand if p not in trick_players else len_hand - 1
            hands[p] = np.zeros(36, np.int32)
            hands[p][cards[0:pn]] = 1
            cards = cards[pn:]
    return hands


In [214]:
class DMCTSAgent(Agent):
    def __init__(self):
        self._logger = logging.getLogger("DMTSAgent")
        super().__init__()

        self._rule = RuleSchieber()
        self.round = 0

    def action_trump(self, obs: GameObservation) -> int:
        trump_scores = {}
        player_cards = convert_one_hot_encoded_cards_to_int_encoded_list(obs.hand)
        for trump in range(0, 6):
            trump_scores[trump] = calculate_trump_selection_score(player_cards, trump)

        highest_trump = max(trump_scores, key=trump_scores.get)
        highest_value = trump_scores[highest_trump]
        #print("Trump scores are: ", trump_scores)
        #print("Highest trump is: ", highest_trump, " with value of " , highest_value)

        if obs.forehand == -1:
            # if forehand is not set yet, we are the forehand player
            if highest_value < 68:
                return PUSH

        return highest_trump

    def action_play_card(self, obs: GameObservation) -> int:
        self.round += 1
        self.round = self.round % 9

        hands = get_hands(obs)
        gameState = state_from_observation(obs, get_hands(obs))
        mcts = MonteCarloTreeSearchNode(gameState)
        best_action = mcts.best_action().parent_action

        print(convert_one_hot_encoded_cards_to_str_encoded_list(obs.hand))

        print(card_strings[best_action])
        print("-------------------------------------------------")

        #valid_cards = self._rule.get_valid_cards_from_obs(obs)
        return best_action

In [210]:
def main():
    # setup the arena
    arena = Arena(nr_games_to_play=1)
    player = AgentRandomSchieber()
    my_player = DMCTSAgent()

    arena.set_players(my_player, player, my_player, player)
    print('Playing {} games'.format(arena.nr_games_to_play))
    arena.play_all_games()
    print('Average Points Team 0: {:.2f})'.format(arena.points_team_0.mean()))
    print('Average Points Team 1: {:.2f})'.format(arena.points_team_1.mean()))

In [215]:
if __name__ == '__main__':
    main()


Playing 1 games
['D9', 'D8', 'HA', 'HJ', 'H9', 'H7', 'H6', 'S7', 'C10']
D9
-------------------------------------------------
['DQ', 'DJ', 'D10', 'H8', 'SA', 'SQ', 'S10', 'CA', 'C6']
DJ
-------------------------------------------------
['DQ', 'D10', 'H8', 'SA', 'SQ', 'S10', 'CA', 'C6']
CA
-------------------------------------------------
['D8', 'HA', 'HJ', 'H9', 'H7', 'H6', 'S7', 'C10']
C10
-------------------------------------------------
['DQ', 'D10', 'H8', 'SA', 'SQ', 'S10', 'C6']
C6
-------------------------------------------------
['D8', 'HA', 'HJ', 'H9', 'H7', 'H6', 'S7']
HJ
-------------------------------------------------
['DQ', 'D10', 'H8', 'SA', 'SQ', 'S10']
H8
-------------------------------------------------
['D8', 'HA', 'H9', 'H7', 'H6', 'S7']
H6
-------------------------------------------------
['D8', 'HA', 'H9', 'H7', 'S7']
S7
-------------------------------------------------
['DQ', 'D10', 'SA', 'SQ', 'S10']
SQ
-------------------------------------------------
['D8', 'HA'