In [None]:
import os, sys, time
root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
    
import torch
from bnelearn.mechanism import RockPaperScissors, PrisonersDilemma, MatchingPennies, BattleOfTheSexes, JordanGame
from bnelearn.environment import MatrixGameEnvironment
from bnelearn.bidder import MatrixGamePlayer
from bnelearn.strategy import MatrixGameStrategy,FictitiousPlayStrategy, FictitiousPlaySmoothStrategy, FictitiousPlayMixedStrategy
from bnelearn.optimizer import ES

from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt

torch.cuda.is_available()

In [None]:
setting = ["FPM","JG"]
param_tau = [0.0001,10,0.9]#[0.5,10,0.99] #[0.0001,10,0.9]
initial_beliefs = None #torch.Tensor([[60,40],[40,60]]).to(device)

options = {"FP": FictitiousPlayStrategy,
           "FPS": FictitiousPlaySmoothStrategy,
           "FPM": FictitiousPlayMixedStrategy,
           "PD": PrisonersDilemma,
           "MP": MatchingPennies,
           "BoS": BattleOfTheSexes,
           "JG": JordanGame}        

run_name = time.strftime('{}_%Y-%m-%d %a %H:%M:%S'.format(setting[0]))
game_name = setting[1]
logdir = os.path.join(root_path, 'notebooks', 'matrix', game_name, run_name)
logdir

## Experiment setup
n_players = 3
epoch = 10000

## Environment settings
#Dummies here
batch_size = 1
input_length = 1

# optimization params
'''
All with 10,000 epochs
PD:
FP [] - Converges to 0 quickly
FPS [0.5, 10, 0.99] - Converges to [0.11,0.89]
FPS [0.0 ,10, 0.90] - Converges to 0 quickly
FPM [0.5, 10, 0.99] - Converges to [0.11,0.89] 
FPM [0.0, 10, 0.90] - Converges to 0 quickly 

MP:
FP [] - Cycles. Historical play cycles around 0.5
FPS [0.5, 10, 0.99] - Play converges to 0.5
FPS [0.0 ,10, 0.90] - Cycles. Historical play cycles around 0.5
FPM [0.5, 10, 0.99] - Play converges to 0.5
FPM [0.0, 10, 0.90] - Cycles. Historical play cycles around 0.5

BoS:
FP [] - Converges to PNE 0 or 1
FPS [0.0 ,10, 0.90] - Converges to PNE 0 or 1
FPS [0.5, 10, 0.99] - Converges slowly to PNE 0 or 1
FPM [0.0 ,10, 0.90] - Converges quickly to PNE 0 or 1
FPM [0.5, 10, 0.99] - Converges to PNE 0 or 1
FPS [0.0 ,10, 0.90], torch.Tensor([[60,40],[40,60]]).to(device)) - Converges to PNE 0 or 1
FPS [0.5, 10, 0.99], torch.Tensor([[60,40],[40,60]]).to(device)) - Converges very slowly to PNE 0 or 1
FPM [0.0 ,10, 0.90], torch.Tensor([[60,40],[40,60]]).to(device)) - Interesting. The play cycles a lot at first and plays MNE later. Hist. play is permanent in MNE.
FPM [0.5, 10, 0.99], torch.Tensor([[60,40],[40,60]]).to(device)) - Converges very fast to [0.57, 0.43]

Jordan Game:
FP [] - Very long cycles
FPS [0.0 ,10, 0.90] - Very long cycles
FPS [0.5, 10, 0.99] - Quickly vonverges to 0.5
FPM [0.0 ,10, 0.90] - Very long cycles
FPM [0.5, 10, 0.99] - Quickly vonverges to 0.5
'''
tau_minimum = param_tau[0]
tau_update_interval =  param_tau[1]
tau_update =  param_tau[2]

param = "tau_minimum: {} \ntau_update_interval: {} \ntau_update: {}".format(tau_minimum,tau_update_interval,tau_update)

cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

specific_gpu = 5
if cuda and specific_gpu:
    torch.cuda.set_device(specific_gpu)

# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
def strat_to_player(strategy, batch_size, player_position=None):
    return MatrixGamePlayer(strategy, batch_size = batch_size, player_position=player_position)


game = options[setting[1]]()
if options[setting[0]] is FictitiousPlayStrategy or options[setting[0]] is FictitiousPlaySmoothStrategy:
    strat1 = options[setting[0]](game = game, initial_beliefs = initial_beliefs)
    strat2 = options[setting[0]](game = game, initial_beliefs = initial_beliefs)
    strat3 = options[setting[0]](game = game, initial_beliefs = initial_beliefs)
    strat = [strat1,strat2,strat3]
else:
    strat0 = options[setting[0]](game = game, initial_beliefs = initial_beliefs)
    strat = [strat0,strat0,strat0]

player1 = strat_to_player(strat[0], batch_size = batch_size, player_position = 0)
player2 = strat_to_player(strat[1], batch_size = batch_size, player_position = 1)
player3 = strat_to_player(strat[2], batch_size = batch_size, player_position = 2)
player = [player1,player2,player3]

env = MatrixGameEnvironment(game = game,
                           agents = [player1,player2,player3],
                           n_players = 3,
                           batch_size = batch_size,
                           strategy_to_player_closure = strat_to_player)

In [None]:
# Parallel updating
print(param)
with SummaryWriter(log_dir=logdir, flush_secs=30) as writer:
    writer.add_text('hyperparams/hyperparameter', param, 0)  
    torch.cuda.empty_cache()
    for e in range(epoch):
        actions = [None,None,None]
        for i,playr in enumerate(player):
            actions[i] = playr.get_action()
        
        if e%1000 == 0:
            print(actions)

        for _,strategy in enumerate(strat):
            strategy.update_observations(actions)
            strategy.update_beliefs()
            if (type(strategy) is FictitiousPlaySmoothStrategy or type(strategy) is FictitiousPlayMixedStrategy) and e > 0 and e%tau_update_interval == 0 and strategy.tau >= tau_minimum:
                strategy.update_tau(tau_update)
            
        # Logging
        for i,playr in enumerate(player):
            writer.add_histogram('eval/p{}_action_distribution'.format(i), actions[i].view(-1).cpu().numpy(), e)
            # Careful: With FPM this always shows only probs_self of player 1. TODO: Change when implementing a logger
            writer.add_scalar('eval_player_{}/prob_action_0'.format(i), playr.strategy.probs_self[0], e)
            writer.add_scalar('eval_player_{}/hist_prob_action_0'.format(i), playr.strategy.probs[i][0], e)
            if type(strategy) is FictitiousPlayMixedStrategy:
                # Careful: This is currently not working. It takes the exp. util of the correct player but the probs_self is always of the last player TODO: Fix!
                writer.add_scalar('eval_player_{}/utility'.format(i), (playr.strategy.exp_util * playr.strategy.probs_self[0]).sum(), e)
            else:
                writer.add_scalar('eval_player_{}/utility'.format(i), playr.strategy.exp_util[actions[i]], e)
        
'''
# Sequential updating
for e in range(epoch):
    for i,playr in enumerate(player):
        actions = [None,None]
        actions[i] = playr.get_action()
        print(actions)
        for _,strategy in enumerate(strat):
            strategy.update(actions)
'''