In [1]:
import pyspiel
import numpy as np
import random

In [2]:
game = pyspiel.load_game("goofspiel")

Layout of `state.information_state_tensor(player_id)` in Goofspiel:

 1st 92 values define the one-hot vector representing player 0's score.
 
 Following 92 values define the one-hot vector representing player 1's score.
 
 Following 13 values define the 1st point card if it has been opened, else they are all 0.
 
 There are 12*13 more values, each defined in the same way.
 
 Final 26 values define players' hands.


In the cell below, a deterministic agent who always bids the card which has the value equal to the card in the middle, plays against a random agent who always bids one of his remaining cards uniform at random. A total of `num_episodes` games are played. The win rate of the deterministic agent is reported.

In [5]:
num_episodes = 1000
deterministic_agent_returns = 0
random_agent_returns = 0

for i in range(num_episodes):
    state = game.new_initial_state()
    turn = 1
    
    while not state.is_terminal():

        legal_actions = state.legal_actions()

        if state.is_chance_node():
            # Chance node: sample an outcome
            outcomes = state.chance_outcomes()
            num_actions = len(outcomes)
            action_list, prob_list = zip(*outcomes)
            action = np.random.choice(action_list, p=prob_list)
            state.apply_action(action)

        elif state.is_simultaneous_node():
            # Simultaneous node: sample actions for all players.
            chosen_actions = [
              random.choice(state.legal_actions(pid))
              for pid in range(game.num_players())
            ]
            chosen_actions[0] = current_point_card(state.information_state_tensor(0), turn) - 1
            state.apply_actions(chosen_actions)

            turn += 1
            
    returns = state.returns()
    deterministic_agent_returns += np.max([0, returns[0]])
    random_agent_returns += np.max([0, returns[1]])

print("deterministic agent win rate = {}".format(deterministic_agent_returns/num_episodes))


deterministic agent win rate = 0.957


In [4]:
def current_point_card(info_state, turn):
    '''
    Returns the value of the last opened prize card.
    '''
    one_hot = info_state[184+(turn-1)*13:184+turn*13]
    return np.argmax(np.array(one_hot))+1