# basics

In [1]:
import pyspiel
pyspiel.registered_names()

['2048',
 'add_noise',
 'amazons',
 'backgammon',
 'bargaining',
 'battleship',
 'blackjack',
 'blotto',
 'breakthrough',
 'bridge',
 'bridge_uncontested_bidding',
 'catch',
 'checkers',
 'chess',
 'cliff_walking',
 'clobber',
 'coin_game',
 'colored_trails',
 'connect_four',
 'coop_box_pushing',
 'coop_to_1p',
 'coordinated_mp',
 'crazy_eights',
 'cursor_go',
 'dark_chess',
 'dark_hex',
 'dark_hex_ir',
 'deep_sea',
 'dou_dizhu',
 'efg_game',
 'euchre',
 'first_sealed_auction',
 'gin_rummy',
 'go',
 'goofspiel',
 'havannah',
 'hearts',
 'hex',
 'kriegspiel',
 'kuhn_poker',
 'laser_tag',
 'leduc_poker',
 'lewis_signaling',
 'liars_dice',
 'liars_dice_ir',
 'maedn',
 'mancala',
 'markov_soccer',
 'matching_pennies_3p',
 'matrix_bos',
 'matrix_brps',
 'matrix_cd',
 'matrix_coordination',
 'matrix_mp',
 'matrix_pd',
 'matrix_rps',
 'matrix_rpsw',
 'matrix_sh',
 'matrix_shapleys_game',
 'mfg_crowd_modelling',
 'mfg_crowd_modelling_2d',
 'mfg_dynamic_routing',
 'mfg_garnet',
 'misere',
 'mor

In [2]:
game = pyspiel.load_game("tic_tac_toe")
game

tic_tac_toe()

In [6]:
game.num_players()

2

In [7]:
game.max_utility(), game.min_utility()

(1.0, -1.0)

In [9]:
game.num_distinct_actions()

9

In [15]:
state = game.new_initial_state()
state

...
...
...

In [16]:
state.current_player()

0

In [17]:
state.is_terminal(), state.returns()

(False, [0.0, 0.0])

In [18]:
state.legal_actions()

[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [19]:
state.apply_action(1)
state

.x.
...
...

In [20]:
state.apply_action(2)
state.apply_action(4)
state.apply_action(0)
state.apply_action(7)
state

oxo
.x.
.x.

In [21]:
state.is_terminal()

True

In [22]:
state.player_return(0)

1.0

In [23]:
state.current_player()

-4

In [24]:
game = pyspiel.load_game("breakthrough")
game

breakthrough()

In [25]:
state = game.new_initial_state()
state

8bbbbbbbb
7bbbbbbbb
6........
5........
4........
3........
2wwwwwwww
1wwwwwwww
 abcdefgh

In [26]:
game = pyspiel.load_game("breakthrough(rows=6,columns=6)")
game

breakthrough(columns=6,rows=6)

In [27]:
state = game.new_initial_state()
state

6bbbbbb
5bbbbbb
4......
3......
2wwwwww
1wwwwww
 abcdef

In [28]:
game.num_distinct_actions()

432

In [29]:
for action in state.legal_actions():
    print(f"{action} {state.action_to_string(action)}")

74 a5a4
76 a5b4
84 b5a4
86 b5b4
88 b5c4
96 c5b4
98 c5c4
100 c5d4
108 d5c4
110 d5d4
112 d5e4
120 e5d4
122 e5e4
124 e5f4
132 f5e4
134 f5f4


# normal-form games and evolutionary dynamics in openspiel

In [1]:
import pyspiel

In [2]:
game = pyspiel.create_matrix_game([[1, -1], [-1, 1]], [[-1, 1], [1, -1]])
print(game)

short_name()


In [3]:
state = game.new_initial_state()
print(state)

Terminal? false
Row actions: row0 row1 
Col actions: col0 col1 
Utility matrix:
1,-1 -1,1 
-1,1 1,-1 



In [4]:
state.current_player()

-2

In [5]:
state.legal_actions(0)

[0, 1]

In [6]:
state.legal_actions(1)

[0, 1]

In [7]:
state.is_terminal()

False

In [10]:
state.apply_actions([0, 0])
state.is_terminal()

True

In [11]:
state.returns()

[1.0, -1.0]

## evolutionary dynamics in rock, paper, scissors

In [12]:
from open_spiel.python.egt import dynamics
from open_spiel.python.egt.utils import game_payoffs_array
import numpy as np

In [13]:
game = pyspiel.load_matrix_game("matrix_rps")
game

matrix_rps()

In [14]:
payoff_matrix = game_payoffs_array(game)
payoff_matrix

array([[[ 0., -1.,  1.],
        [ 1.,  0., -1.],
        [-1.,  1.,  0.]],

       [[ 0.,  1., -1.],
        [-1.,  0.,  1.],
        [ 1., -1.,  0.]]])

In [15]:
dyn = dynamics.SinglePopulationDynamics(payoff_matrix, dynamics.replicator)
dyn

<open_spiel.python.egt.dynamics.SinglePopulationDynamics at 0x10a919eb0>

In [16]:
x = np.array([0.2, 0.2, 0.6])
dyn(x)

array([ 8.00000000e-02, -8.00000000e-02, -2.33146835e-18])

In [17]:
# step size and apply
alpha = 0.01
x += alpha * dyn(x)
x

array([0.2008, 0.1992, 0.6   ])

In [18]:
x += alpha * dyn(x)
x

array([0.20160481, 0.19840479, 0.5999904 ])

In [19]:
x += alpha * dyn(x)
x += alpha * dyn(x)
x += alpha * dyn(x)
x += alpha * dyn(x)
x

array([0.20487215, 0.19527183, 0.59985601])

# chance nodes and partially-observable games

In [21]:
import pyspiel
game = pyspiel.load_game("kuhn_poker")  # simplified poker with 3-card deck
game

kuhn_poker()

In [22]:
game.num_distinct_actions()  # bet and fold

2

In [23]:
state = game.new_initial_state()
state



In [31]:
state.current_player()  # speical chance player id

-1

In [25]:
state.is_chance_node()

True

In [26]:
state.chance_outcomes()  # distribution over outcomes (outcome, probability) pairs

[(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]

In [27]:
state.apply_action(0)  # choose first card (jack)

In [28]:
state

0

In [29]:
state.is_chance_node()  # still chance node (player 2's card)

True

In [30]:
state.chance_outcomes()  # jack no longer a possible outcome

[(1, 0.5), (2, 0.5)]

In [32]:
state.apply_action(1)  # second player gets queen
state

0 1

In [33]:
state.current_player()  # no longer chance node, time to play

0

In [34]:
state

0 1

In [35]:
state.legal_actions()

[0, 1]

In [36]:
for action in state.legal_actions():
    print(state.action_to_string(action))

Pass
Bet


In [37]:
state.information_state_string()  # only current player's information

'0'

In [38]:
state.apply_action(0)
state

0 1 p

In [39]:
state.current_player()

1

In [41]:
state.information_state_string()  # now contains second player's card and public action sequence

'1p'

In [43]:
state.information_state_tensor()

[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]

## leduc poker
* 6 cards, two suits
* 3 actions: gold, check/call, raise

In [50]:
game = pyspiel.load_game("leduc_poker")
game

leduc_poker()

In [51]:
game.num_distinct_actions()

3

In [52]:
state = game.new_initial_state()
state

Round: 1
Player: -1
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): -10000 -10000 -10000 
Round 1 sequence: 
Round 2 sequence: 

In [53]:
state.apply_action(0)  # first player first jack
state

Round: 1
Player: -1
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): -10000 0 -10000 
Round 1 sequence: 
Round 2 sequence: 

In [54]:
state.apply_action(1)  # second player second jack
state

Round: 1
Player: 0
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): -10000 0 1 
Round 1 sequence: 
Round 2 sequence: 

In [57]:
state.current_player()

0

In [59]:
state.information_state_string()

'[Observer: 0][Private: 0][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]'

In [60]:
state.information_state_tensor()

[1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [63]:
state.legal_actions_mask()

[0, 1, 1]

In [65]:
state.apply_action(1)  # check
state

Round: 1
Player: 1
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): -10000 0 1 
Round 1 sequence: Call
Round 2 sequence: 

In [66]:
state.apply_action(1)  # check
state

Round: 2
Player: -1
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): -10000 0 1 
Round 1 sequence: Call, Call
Round 2 sequence: 

In [67]:
state.chance_outcomes()

[(2, 0.25), (3, 0.25), (4, 0.25), (5, 0.25)]

In [68]:
state.apply_action(2)

In [69]:
state

Round: 2
Player: 0
Pot: 2
Money (p1 p2 ...): 99 99
Cards (public p1 p2 ...): 2 0 1 
Round 1 sequence: Call, Call
Round 2 sequence: 

In [70]:
state.information_state_string()

'[Observer: 0][Private: 0][Round 2][Player: 0][Pot: 2][Money: 99 99][Public: 2][Round1: 1 1][Round2: ]'

# basic rl: self-play q-learning in tic-tac-toe