# Playing Simple Games with Neural Nets

In this notebook, we implement equilibria learning viea self play for simple games such as Battle of the Sexes and Matching Pennies.

In [1]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
    
import torch
from bnelearn.strategy import MatrixGameStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import TwoByTwoBimatrixGame
from bnelearn.optimizer import ES
from bnelearn.environment import Environment

In [2]:
from tensorboardX import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

In [3]:
## Experiment setup
n_players = 2

## Environment settings
#training batch size
batch_size = 2**10
input_length = 1


# optimization params
epoch = 100
learning_rate = 1
lr_decay = False
lr_decay_every = 1000
lr_decay_factor = 0.8

sigma = 5 #ES noise parameter
n_perturbations = 8

name = 13
namestr = './pb/{}'.format(name)

### Env

In [13]:
# prisoners dilemma
pd = torch.tensor([[[-1, -1],[-3, 0]], [[ 0, -3],[-2,-2]]])
prisoners_dilemma = TwoByTwoBimatrixGame(pd)

# battle of sexes. ! won't work, game is noy symmetric!
mp = torch.tensor([[[3, 2],[0,0]], [[0,0],[2,3]]])

In [14]:
# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_bidder(strategy, batch_size):
    return Bidder.uniform(0,0, strategy, batch_size = batch_size, n_players=2)

In [15]:
model = MatrixGameStrategy(n_actions=2).cuda()

In [16]:
game = prisoners_dilemma

In [17]:
env = Environment(game, environment_agents=[],
                 max_env_size =1,
                 n_players=2,
                 batch_size=batch_size,
                 strategy_to_bidder_closure=strat_to_bidder)

In [18]:
optimizer = ES(model=model, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations)

In [19]:
def log_hyperparams(writer):
    writer.add_scalar('hyperparams/batch_size', batch_size)
    writer.add_scalar('hyperparams/learning_rate', learning_rate)
    writer.add_scalar('hyperparams/sigma', sigma)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations)
    
    

Training

In [None]:
torch.cuda.empty_cache()
writer = SummaryWriter(log_dir=namestr)
log_hyperparams(writer)

for e in range(epoch+1):    
    
    # lr decay?
    if lr_decay and e % lr_decay_every == 0 and e > 0:
        learning_rate = learning_rate * lr_decay_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
        writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
        
    # always: do optimizer step
    utility = -optimizer.step()
    writer.add_scalar('eval/utility', utility, e) 
    writer.add_scalar('eval/prob_action_0', model.distribution.probs[0], e)    
    #print(list(model.named_parameters()))
    print(e)
        
torch.cuda.empty_cache()
writer.close()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
