In [1]:
--- Libraries:
arguments = require 'Settings.arguments'
constants = require 'Settings.constants'
card_to_string = require 'Game.card_to_string_conversion'
card_tools = require 'Game.card_tools'
game_settings = require 'Settings.game_settings'
Plot = require 'itorch.Plot'
nninit = require 'nninit'
require 'torch'
require 'math'
require 'Tree.tree_builder'
require 'Tree.tree_data_generation'
require 'Tree.tree_values'
require 'Tree.tree_visualiser'
require 'nn'
require 'Tree.tree_cfr'
require 'Tree.tree_cfr2'
require 'nngraph'
require 'optim'
require 'image'
require 'NN.strategy_net_builder'
require 'NN.strategy_net_trainer'
require 'Tree.tree_warm_start'
require 'Nn.StrategyCriterion'


require 'Adversarials.exploitabilityVS'


In [2]:
function strategies_initializer(params, starting_ranges,num_iter,num_skip)
    local timer = torch.Timer()
    local num_iter = num_iter or 200
    local num_skip = num_skip or 50
    local params = params
    local tree_train= builder:build_tree(params)
    local tree_data = TreeData(tree_train)
    
    --- running CFR for nn_iter skiping num_skip
    local solver_train = TreeCFR2()
    solver_train.cfr_skip = num_skip
    solver_train:run_clean_cfr(tree_train,starting_ranges,num_iter)

    --- Getting training set:
    tree_data:get_training_set(tree_train,1)
    print('training train tree with CFR: ')--- ..timer:time().. ' secs')

    -- Building the neural net model
    local strategy_nn = StrategyNN()
    local nn_model = strategy_nn.model

    -- Building trainer:
    local nn_trainer = NNTrainer(tree_data,nn_model)
    -- Options:
    local opt = {}
    opt.learningRate = 0.001
    opt.momentum = 0.95
    opt.validate = false
    local current_criterion = StrategyLoss()---nn.SmoothL1Criterion()
    local train,test= nn_trainer:train(nn_model,current_criterion,opt,10000)
    print("training neural network: ")--- ..timer:time().. ' secs')

    --- NNWarm Starting
    local treeNNWarm = builder:build_tree(params)
    local dataNN = TreeData(treeNNWarm)
    dataNN:get_training_set(treeNNWarm,1)
    dataNN:generate_new_initial_strategies(treeNNWarm,nn_trainer)
    
    ---Warm start the regrets for NN:
    dataNN:warm_start_regrets(treeNNWarm,tree_train)
    print("target warm-start")
    
    --- Target Warm Starting
    local treeTargetWarm = builder:build_tree(params)
    local tree_data_target = TreeData(treeTargetWarm)
    tree_data_target:warm_start_targets_and_regrets(treeTargetWarm,tree_train)
    print("Whole Process: ")--- ..timer:time().. ' secs')
    ---timer:stop()
    return treeNNWarm,treeTargetWarm
end


function get_returns(tree1,tree2,num_iter)
    --- default number of iterations:
    local num_iter = num_iter or 100
    
    --- Evaluator : Poker match for num_iter times each combination
    local evaluator = StrategyEvaluator(tree1,tree2)
    evaluator:play_all_combinations_n_times(tree1,tree2,num_iter)
    local win_rate = evaluator.A2_winning_rate
    local avg_gain= evaluator.A2_avg_pot_won

    --- Return the avg pot won and the win rate:
    return avg_gain,win_rate
end


function get_tensor_returns(tree1,tree2,number_of_tests,num_iter)

    --- num_iter is the number of games played all 60 combinations:
    local num_iter = num_iter or 100
    local number_of_tests = number_of_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,number_of_tests)
    local win_rate_tensor = torch.FloatTensor(1,number_of_tests)
    
    --- Playing for number_of_tests times the 60 combinations num iter times
    for i =1,number_of_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_iter) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate
    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end

function get_CFR_tensor_returns(tree1,tree2,num_tests,num_games,cfr_iter,cfr_skip)
    --- Cfr iterations and number of skpis:
    local cfr_iter = cfr_iter or 100
    local cfr_skip = cfr_skip or 50

    --- CFR Solver:
    local solver = TreeCFR2()
    solver.cfr_skip = cfr_skip 
    local starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
    starting_ranges[1]:copy(card_tools:get_uniform_range(tree2.board))
    starting_ranges[2]:copy(card_tools:get_uniform_range(tree2.board))
    
    --- Clean CFR : without exploitability computation
    ---local tree_data2 = TreeData(tree2)
    local solver_train = TreeCFR2()
    solver:run_clean_cfr(tree2, starting_ranges,cfr_iter)
    
    ---print("CFR completed iterations")
    --- Starts poker_match
    local num_games = num_games or 100
    local num_tests = num_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,num_tests)
    local win_rate_tensor = torch.FloatTensor(1,num_tests)
    
    for i =1,num_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_games) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate

    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end


In [3]:
--- FUNCTIONS
--- Comparing with 2000
function compare_cfrs(tree_cfr1,tree_cfr2000)
    local difference = {}
    local table1 = tree_cfr1.total_strategies
    local table2 = tree_cfr2000.total_strategies
    
    for i = 1, #table1 do
        difference[i] = torch.sqrt(torch.sum(torch.pow((table1[i] - table2[#table2]),2)))
    end
    return torch.FloatTensor(difference)
end
--- Comparing 2000 with itself
function get_convergence(tree_cfr)
    local difference = {}
    local table_cfr = tree_cfr.total_strategies
    local end_strategy = table_cfr[#table_cfr]
    
    for i = 1, #table_cfr do
        difference[i] = torch.sqrt(torch.sum(torch.pow((table_cfr[i] - end_strategy),2)))
    end
    return torch.FloatTensor(difference)
end

In [4]:
--- Create the tree
builder = PokerTreeBuilder()
--- Parameters for the tree
params = {}
params.root_node = {}
params.root_node.board = card_to_string:string_to_board('')
params.root_node.street = 1
params.root_node.current_player = constants.players.P1
params.root_node.bets = arguments.Tensor{200, 200}
--- CFR Solver
starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
starting_ranges[1]:copy(card_tools:get_uniform_range(params.root_node.board))
starting_ranges[2]:copy(card_tools:get_uniform_range(params.root_node.board))

### 1. Exploitability for original CFR and modified CFR

### 2. Exploitability for different Strategies

In [24]:
number_of_iterations = 100

In [25]:
tree_bench = builder:build_tree(params)

In [26]:
--- Get Data from 2000 iterations
tree800 = builder:build_tree(params)
tree_data800 = TreeData(tree800)
tree800_cfr = TreeCFR2()
tree800_cfr.cfr_skip = 1

In [27]:
tree800_cfr:run_cfr(tree800, starting_ranges,number_of_iterations)
tree_data800:get_training_set(tree800,1)

### Training the Model

In [28]:
-- Building the neural net model
strategy_nn = StrategyNN()
nn_model = strategy_nn.model
-- Building trainer:
nn_trainer = NNTrainer(tree_data800,nn_model)
-- Options:
opt = {}
opt.learningRate = 0.001
opt.momentum = 0.95
opt.validate = false

In [29]:
current_criterion = StrategyLoss()---nn.SmoothL1Criterion()
train_loss,test_loss = nn_trainer:train(nn_model,current_criterion,opt,10000)

In [30]:
pred_strat = nn_model:forward({nn_trainer.all_data.features,nn_trainer.all_data.masks})
true_strat =nn_trainer.all_data.targets
print(torch.mean(torch.abs(pred_strat- true_strat)))

0.0094617262737756


### No Warm Start

In [31]:
--- Building CFR Solver
treeNoWarm = builder:build_tree(params)
treeNoWarm_cfr = TreeCFR2()
treeNoWarm_cfr.cfr_skip = 1
treeNoWarm_cfr:run_cfr(treeNoWarm, starting_ranges,number_of_iterations)

### Neural Net Start 

In [32]:
--- NNWarm Starting
treeNNWarm = builder:build_tree(params)
dataNN = TreeData(treeNNWarm)
dataNN:get_training_set(treeNNWarm,1)
dataNN:generate_new_initial_strategies(treeNNWarm,nn_trainer)
---Warm start the regrets:
dataNN:warm_start_regrets(treeNNWarm,tree800)
---tree_warm_start = TreeWarmStart()
---tree_warm_start:run_warm_start(treeNNWarm, starting_ranges,80)
--- Building CFR Solver


In [33]:
treeNNWarm_cfr = TreeCFR2()
treeNNWarm_cfr.cfr_skip = 1
treeNNWarm_cfr:run_cfr(treeNNWarm, starting_ranges,number_of_iterations)

In [34]:
--- NNWarm Starting
treeNN_NoRegWarm = builder:build_tree(params)
dataNN_NoReg = TreeData(treeNN_NoRegWarm)
dataNN_NoReg:get_training_set(treeNN_NoRegWarm,1)
dataNN_NoReg:generate_new_initial_strategies(treeNN_NoRegWarm,nn_trainer)
---Warm start the regrets:
---dataNN:warm_start_regrets(treeNN_NoRegWarm,tree800)
--- Building CFR Solver
treeNN_NoRegWarm_cfr = TreeCFR2()
treeNN_NoRegWarm_cfr.cfr_skip = 1
treeNN_NoRegWarm_cfr:run_cfr(treeNN_NoRegWarm, starting_ranges,number_of_iterations)

### Target Warm Start 

In [35]:
--- Target Warm Starting
treeTarRegWarm = builder:build_tree(params)
dataTargReg = TreeData(treeTarRegWarm)
---dataTarg:get_training_set(treeTargWarm,1)
---dataTarg:warm_start_targets(treeTargWarm,tree800)
---dataTarg:warm_start_regrets(treeTargWarm,tree800)
---tree_warm_start = TreeWarmStart()
---tree_warm_start:run_warm_start(treeTargWarm, starting_ranges,number_of_iterations)
dataTargReg:warm_start_targets_and_regrets(treeTarRegWarm,tree800)

In [36]:
--- Building CFR Solver
treeTarRegWarm_cfr = TreeCFR2()
treeTarRegWarm_cfr.cfr_skip = 1
treeTarRegWarm_cfr:run_cfr(treeTarRegWarm, starting_ranges,number_of_iterations)

In [37]:
--- Target Warm Starting
treeTarWarm = builder:build_tree(params)
dataTarg = TreeData(treeTarWarm)
dataTarg:warm_start_targets(treeTarWarm,tree800)
--- Building CFR Solver
treeTarWarm_cfr = TreeCFR2()
treeTarWarm_cfr.cfr_skip = 1
treeTarWarm_cfr:run_cfr(treeTarWarm, starting_ranges,number_of_iterations)

In [38]:
--- Target Warm Starting
treeRegWarm = builder:build_tree(params)
dataReg = TreeData(treeRegWarm)
dataReg:warm_start_regrets(treeRegWarm,tree800)
--- Building CFR Solver
treeRegWarm_cfr = TreeCFR2()
treeRegWarm_cfr.cfr_skip = 1
treeRegWarm_cfr:run_cfr(treeRegWarm, starting_ranges,number_of_iterations)

### Plot different starts

In [39]:
exploit_unif = tree800_cfr.exploitability_vec
exploit_NoW = treeNoWarm_cfr.exploitability_vec
exploit_NNW = treeNNWarm_cfr.exploitability_vec
exploit_NN_NoReg = treeNN_NoRegWarm_cfr.exploitability_vec
exploit_TarRegW = treeTarRegWarm_cfr.exploitability_vec
exploit_TarW = treeTarWarm_cfr.exploitability_vec
exploit_RegW = treeRegWarm_cfr.exploitability_vec

In [46]:
local num_iter = torch.range(1,exploit_NoW:size(1))
local plot = Plot()

---plot:line(num_iter,exploit_unif,'black' ,'No Warm start CFR = Uniform warm start')
plot:line(num_iter,exploit_NoW,'black' ,'No Warm start = Random initialization')
plot:line(num_iter,exploit_NNW,'blue' ,'Neural Network warm start')
---plot:line(num_iter,exploit_NN_NoReg,'orange' ,'Neural Net warm start CFR no Regrets')
plot:line(num_iter,exploit_TarRegW,'green' ,'Oracle warm start')
--plot:line(num_iter,exploit_TarW,'brown' ,'Warm start only targets')
---plot:line(num_iter,exploit_RegW,'red' ,'Warm start with target values ')
plot:legend(true):title('Exploitability of different initializations, 200 iter.')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()


In [66]:
exploit_NoW


 164.9537
 117.2761
  86.4621
  72.8915
  58.3179
  50.2712
  43.8656
  42.2188
  40.9673
  39.3922
  34.7872
  31.7053
  29.7355
  27.8908
  26.6914
  25.3558
  23.9800
  22.7686
  21.9448
  21.3121
  20.6796
  19.9596
  19.1915
  18.3132
  17.2323
  16.2894
  15.8036
  15.4565
  15.0492
  14.6929
  14.2757
  13.7188
  13.1431
  12.8652
  12.7097
  12.3721
  12.1430
  11.9503
  11.6885
  11.3222
  10.9847
  10.6921
  10.4499
  10.2059
  10.0190
   9.8856
   9.6589
   9.4314
   9.2289
   9.0422
   8.8607
[torch.FloatTensor of size 51]



In [69]:
8*1500000

12000000


In [45]:
local num_iter = torch.range(1,exploit_NoW:size(1))
local plot = Plot()

---plot:line(num_iter,exploit_unif,'black' ,'No Warm start CFR = Uniform warm start')
plot:line(num_iter,exploit_NoW,'black' ,'No Warm start = Random initialization')
---plot:line(num_iter,exploit_NNW,'blue' ,'Neural Net warm start CFR')
plot:line(num_iter,exploit_NN_NoReg,'blue' ,'Neural Network warm start without regrets')
plot:line(num_iter,exploit_TarRegW,'brown' ,'Oracle warm start with regrets')
plot:line(num_iter,exploit_TarW,'green' ,'Oracle warm start without regrets')
---plot:line(num_iter,exploit_RegW,'red' ,'Warm start with target values')
plot:legend(true):title('Exploitability of different initializations, 200 iter.')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()
