# Playing Simple Games with Neural Nets

In this notebook, we implement equilibria learning viea self play for simple games such as Battle of the Sexes and Matching Pennies.

In [None]:
import os
import sys
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
    
import torch
from bnelearn.strategy import MatrixGameStrategy
from bnelearn.bidder import Bidder, Player, MatrixGamePlayer
from bnelearn.mechanism import PrisonersDilemma, BattleOfTheSexes, MatchingPennies
from bnelearn.optimizer import ES
from bnelearn.environment import Environment, AuctionEnvironment, MatrixGameEnvironment

In [None]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

In [None]:
torch.cuda.is_available()

## Symmetric Game: Prisoners' Dilemma

In [None]:
experiment_name = 5
logdir = os.path.join(root_path, 'notebooks', 'pd', str(experiment_name))

In [None]:
logdir

In [None]:
## Experiment setup
n_players = 2

## Environment settings
#training batch size
batch_size = 64
input_length = 1


# optimization params
epoch = 25
learning_rate = 1
lr_decay = False
lr_decay_every = 1000
lr_decay_factor = 0.8

sigma = 5 #ES noise parameter
n_perturbations = 8


In [None]:
# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_player(strategy, batch_size, player_position=None):
    return MatrixGamePlayer(strategy, batch_size = batch_size, player_position=player_position)

In [None]:
model = MatrixGameStrategy(n_actions=2).cuda()

In [None]:
game = PrisonersDilemma()

In [None]:
env = AuctionEnvironment(game, 
                 agents=[],
                 max_env_size =1,
                 n_players=2,
                 batch_size=batch_size,
                 strategy_to_bidder_closure=strat_to_player)

In [None]:
optimizer = ES(model=model, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations)

In [None]:
def log_hyperparams(writer):
    writer.add_scalar('hyperparams/batch_size', batch_size)
    writer.add_scalar('hyperparams/learning_rate', learning_rate)
    writer.add_scalar('hyperparams/sigma', sigma)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations)    

Training

In [None]:
with SummaryWriter(log_dir=logdir, flush_secs=30) as writer:
    torch.cuda.empty_cache()
    log_hyperparams(writer)

    for e in range(epoch+1):    

        # lr decay?
        if lr_decay and e % lr_decay_every == 0 and e > 0:
            learning_rate = learning_rate * lr_decay_factor
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
            writer.add_scalar('hyperparams/learning_rate', learning_rate, e)

        # always: do optimizer step
        utility = -optimizer.step()
        writer.add_scalar('eval/utility', utility, e) 
        writer.add_scalar('eval/prob_action_0', model.distribution.probs[0], e)    
        #print(list(model.named_parameters()))
        print(e)

In [None]:
player = strat_to_player(model, 10)

In [None]:
player

In [None]:
player.get_action().float().mean()

## Assymmetric Games, BoS and Matching Pennies

In [None]:
## Experiment setup
n_players = 2

## Environment settings
#training batch size
batch_size = 2**5
input_length = 1


# optimization params
epoch = 200
learning_rate = 1
lr_decay = False
lr_decay_every = 100
lr_decay_factor = 0.8

sigma = 5 #ES noise parameter
n_perturbations = 10

game = MatchingPennies()
directory_name = 'matching_pennies'
experiment_name = '04-01-batch=32'
logdir = os.path.join(root_path, 'notebooks', directory_name, str(experiment_name))

In [None]:
# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_player(strategy, batch_size, player_position=None):
    return MatrixGamePlayer(strategy, batch_size = batch_size,  player_position=player_position)

In [None]:
model1 = MatrixGameStrategy(n_actions=2).cuda()
model2 = MatrixGameStrategy(n_actions=2).cuda()

In [None]:
env = MatrixGameEnvironment(game, agents=[model1, model2],
                 n_players=2,
                 batch_size=batch_size,
                 strategy_to_player_closure=strat_to_player,
                 env_type = 'fixed'
                 )

In [None]:
optimizer1 = ES(model=model1, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations, env_type='fixed', strat_to_bidder_kwargs={'player_position':0})
optimizer2 = ES(model=model2, environment = env, lr = learning_rate, sigma=sigma, n_perturbations=n_perturbations, env_type='fixed', strat_to_bidder_kwargs={'player_position':1})
optimizers = [optimizer1, optimizer2]

In [None]:
def log_hyperparams(writer):
    writer.add_scalar('hyperparams/batch_size', batch_size)
    writer.add_scalar('hyperparams/learning_rate', learning_rate)
    writer.add_scalar('hyperparams/sigma', sigma)
    writer.add_scalar('hyperparams/n_perturbations', n_perturbations)    

In [None]:
model1.distribution.probs

In [None]:
model2.distribution.probs

In [None]:
with SummaryWriter(log_dir=logdir) as writer:
    torch.cuda.empty_cache()
    log_hyperparams(writer)

    for e in range(epoch+1):    

        # lr decay?
        if lr_decay and e % lr_decay_every == 0 and e > 0:
            learning_rate = learning_rate * lr_decay_factor
            writer.add_scalar('hyperparams/learning_rate', learning_rate, e)
            for optimizer in optimizers:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate


        # always: do optimizer step
        utility1 = -optimizer1.step()
        writer.add_scalar('eval/p1_utility', utility1, e) 
        writer.add_scalar('eval/p1_prob_action_0', model1.distribution.probs[0], e)

        utility2 =  -optimizer2.step()
        writer.add_scalar('eval/p2_utility', utility2, e)
        writer.add_scalar('eval/p2_prob_action_0', model2.distribution.probs[0], e)
        #print(list(model.named_parameters()))
        if not e % 50: print(e)

In [None]:
utility1

In [None]:
utility2