# Playing Simple Games with Neural Nets

In this notebook, we implement equilibria learning viea self play for simple games such as Battle of the Sexes and Matching Pennies.

In [1]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
    
import torch
from bnelearn.strategy import MatrixGameStrategy
from bnelearn.bidder import Bidder
from bnelearn.mechanism import PrisonersDilemma, BattleOfTheSexes, MatchingPennies
from bnelearn.optimizer import ES
from bnelearn.environment import Environment

In [2]:
from tensorboardX import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

## Symmetric Game: Prisoners' Dilemma

In [3]:
## Experiment setup
n_players = 2

## Environment settings
#training batch size
batch_size = 64
input_length = 1


# optimization params
epoch = 10
learning_rate = 1
lr_decay = False
lr_decay_every = 1000
lr_decay_factor = 0.8

sigma = 5 #ES noise parameter
n_perturbations = 8

name = 13
namestr = './pd/{}'.format(name)

In [4]:
# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_bidder(strategy, batch_size):
    return Bidder.uniform(0,0, strategy, batch_size = batch_size, n_players=2)

In [5]:
model = MatrixGameStrategy(n_actions=2).cuda()

In [6]:
game = PrisonersDilemma()

In [7]:
env = Environment(game, environment_agents=[],
                 max_env_size =1,
                 n_players=2,
                 batch_size=batch_size,
                 strategy_to_bidder_closure=strat_to_bidder)

In [8]:
optimizer = ES(model=model, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations)

In [9]:
def log_hyperparams(writer):
    writer.add_scalar('hyperparams/batch_size', batch_size)
    writer.add_scalar('hyperparams/learning_rate', learning_rate)
    writer.add_scalar('hyperparams/sigma', sigma)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations)
    
    

Training

In [10]:
torch.cuda.empty_cache()
writer = SummaryWriter(log_dir=namestr)
log_hyperparams(writer)

for e in range(epoch+1):    
    
    # lr decay?
    if lr_decay and e % lr_decay_every == 0 and e > 0:
        learning_rate = learning_rate * lr_decay_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
        writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
        
    # always: do optimizer step
    utility = -optimizer.step()
    writer.add_scalar('eval/utility', utility, e) 
    writer.add_scalar('eval/prob_action_0', model.distribution.probs[0], e)    
    #print(list(model.named_parameters()))
    print(e)
        
torch.cuda.empty_cache()
writer.close()

0
1
2
3
4
5
6
7
8
9
10


In [11]:
bidder = strat_to_bidder(model, 100)

In [12]:
bidder.get_action().float().mean()

tensor(1., device='cuda:0')

## Assymmetric Games, BoS and Matching Pennies

In [13]:
## Experiment setup
n_players = 2

## Environment settings
#training batch size
batch_size = 64
input_length = 1


# optimization params
epoch = 25
learning_rate = 1
lr_decay = False
lr_decay_every = 1000
lr_decay_factor = 0.8

sigma = 5 #ES noise parameter
n_perturbations = 8

name = 'test1'
namestr = './BoS/{}'.format(name)

In [14]:
# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_bidder(strategy, batch_size):
    return Bidder.uniform(0,0, strategy, batch_size = batch_size, n_players=2)

In [15]:
game = BattleOfTheSexes()

In [16]:
model1 = MatrixGameStrategy(n_actions=2).cuda()
model2 = MatrixGameStrategy(n_actions=2).cuda()

In [17]:
env = Environment(game, environment_agents=[model1, model2],
                 max_env_size =2,
                 n_players=2,
                 batch_size=batch_size,
                 strategy_to_bidder_closure=strat_to_bidder)

In [18]:
optimizer1 = ES(model=model1, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations)
optimizer2 = ES(model=model2, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations)

In [19]:
torch.cuda.empty_cache()
writer = SummaryWriter(log_dir=namestr)
log_hyperparams(writer)

for e in range(epoch+1):    
    
    # lr decay?
    if lr_decay and e % lr_decay_every == 0 and e > 0:
        learning_rate = learning_rate * lr_decay_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
        writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
        
    # always: do optimizer step
    utility1 = -optimizer1.step()
    writer.add_scalar('eval/p1_utility', utility1, e) 
    writer.add_scalar('eval/p1_prob_action_0', model1.distribution.probs[0], e)
    
    utility2 =  -optimizer2.step()
    writer.add_scalar('eval/p2_utility', utility2, e)
    writer.add_scalar('eval/p2_prob_action_0', model2.distribution.probs[0], e)
    #print(list(model.named_parameters()))
    print(e)
        
torch.cuda.empty_cache()
writer.close()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


In [26]:
x = {'a': 1}

In [27]:
y = {'a': 2}

In [30]:
from collections import deque
d = deque([x,y], 2)

In [32]:
for i in d:
    i['a'] = 3

In [21]:
x = 1 if 1==0 else 2

In [22]:
x

2