In [1]:
import numpy as np
import tensorflow as tf

import pyspiel

from open_spiel.python.algorithms.deep_cfr_tf2 import DeepCFRSolver
from open_spiel.python.algorithms import exploitability
from open_spiel.python import policy

In [2]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [3]:
game = pyspiel.load_game('kuhn_poker', {"players": 2})

In [4]:
solver = DeepCFRSolver(
        game,
        policy_network_layers=(128, 128),
        advantage_network_layers=(128, 128),
        num_iterations=1024,
        num_traversals=128,
        learning_rate=1e-4,
        batch_size_advantage=2048,
        batch_size_strategy=2048,
        memory_capacity=1e7,
        infer_device='/device:gpu:0',
        train_device='/device:gpu:0')

In [5]:
conv = exploitability.nash_conv(
    game,
    policy.tabular_policy_from_callable(game, solver.action_probabilities))
print("Deep CFR - NashConv:", conv)

Deep CFR - NashConv: 0.6035902977264429


In [6]:
%%time
_, advantage_losses, policy_loss = solver.solve()

KeyboardInterrupt: 

In [7]:
for player, losses in list(advantage_losses.items()):
  print("Advantage for player:", player,
                losses[:2] + ["..."] + losses[-2:])
  print("Advantage Buffer Size for player", player,
                len(solver.advantage_buffers[player]))
print("Strategy Buffer Size:",
              len(solver.strategy_buffer))
print("Final policy loss:", policy_loss)
conv = exploitability.nash_conv(
    game,
    policy.tabular_policy_from_callable(game, solver.action_probabilities))
print("Deep CFR - NashConv:", conv)

NameError: name 'advantage_losses' is not defined