In [None]:
--- Libraries:
arguments = require 'Settings.arguments'
constants = require 'Settings.constants'
card_to_string = require 'Game.card_to_string_conversion'
card_tools = require 'Game.card_tools'
game_settings = require 'Settings.game_settings'
Plot = require 'itorch.Plot'
nninit = require 'nninit'
require 'torch'
require 'math'
require 'Tree.tree_builder'
require 'Tree.tree_data_generation'
require 'Tree.tree_values'
require 'Tree.tree_visualiser'
require 'nn'
require 'Tree.tree_cfr'
require 'Tree.tree_cfr2'
require 'nngraph'
require 'optim'
require 'image'
require 'NN.strategy_net_builder'
require 'NN.strategy_net_trainer'
require 'Tree.tree_warm_start'
require 'Nn.StrategyCriterion'


require 'Adversarials.exploitabilityVS'


In [None]:
function strategies_initializer(params, starting_ranges,num_iter,num_skip)
    local timer = torch.Timer()
    local num_iter = num_iter or 200
    local num_skip = num_skip or 50
    local params = params
    local tree_train= builder:build_tree(params)
    local tree_data = TreeData(tree_train)
    
    --- running CFR for nn_iter skiping num_skip
    local solver_train = TreeCFR2()
    solver_train.cfr_skip = num_skip
    solver_train:run_clean_cfr(tree_train,starting_ranges,num_iter)

    --- Getting training set:
    tree_data:get_training_set(tree_train,1)
    print('training train tree with CFR: ')--- ..timer:time().. ' secs')

    -- Building the neural net model
    local strategy_nn = StrategyNN()
    local nn_model = strategy_nn.model

    -- Building trainer:
    local nn_trainer = NNTrainer(tree_data,nn_model)
    -- Options:
    local opt = {}
    opt.learningRate = 0.001
    opt.momentum = 0.95
    opt.validate = false
    local current_criterion = StrategyLoss()---nn.SmoothL1Criterion()
    local train,test= nn_trainer:train(nn_model,current_criterion,opt,10000)
    print("training neural network: ")--- ..timer:time().. ' secs')

    --- NNWarm Starting
    local treeNNWarm = builder:build_tree(params)
    local dataNN = TreeData(treeNNWarm)
    dataNN:get_training_set(treeNNWarm,1)
    dataNN:generate_new_initial_strategies(treeNNWarm,nn_trainer)
    
    ---Warm start the regrets for NN:
    dataNN:warm_start_regrets(treeNNWarm,tree_train)
    print("target warm-start")
    
    --- Target Warm Starting
    local treeTargetWarm = builder:build_tree(params)
    local tree_data_target = TreeData(treeTargetWarm)
    tree_data_target:warm_start_targets_and_regrets(treeTargetWarm,tree_train)
    print("Whole Process: ")--- ..timer:time().. ' secs')
    ---timer:stop()
    return treeNNWarm,treeTargetWarm
end


function get_returns(tree1,tree2,num_iter)
    --- default number of iterations:
    local num_iter = num_iter or 100
    
    --- Evaluator : Poker match for num_iter times each combination
    local evaluator = StrategyEvaluator(tree1,tree2)
    evaluator:play_all_combinations_n_times(tree1,tree2,num_iter)
    local win_rate = evaluator.A2_winning_rate
    local avg_gain= evaluator.A2_avg_pot_won

    --- Return the avg pot won and the win rate:
    return avg_gain,win_rate
end


function get_tensor_returns(tree1,tree2,number_of_tests,num_iter)

    --- num_iter is the number of games played all 60 combinations:
    local num_iter = num_iter or 100
    local number_of_tests = number_of_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,number_of_tests)
    local win_rate_tensor = torch.FloatTensor(1,number_of_tests)
    
    --- Playing for number_of_tests times the 60 combinations num iter times
    for i =1,number_of_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_iter) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate
    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end

function get_CFR_tensor_returns(tree1,tree2,num_tests,num_games,cfr_iter,cfr_skip)
    --- Cfr iterations and number of skpis:
    local cfr_iter = cfr_iter or 100
    local cfr_skip = cfr_skip or 50

    --- CFR Solver:
    local solver = TreeCFR2()
    solver.cfr_skip = cfr_skip 
    local starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
    starting_ranges[1]:copy(card_tools:get_uniform_range(tree2.board))
    starting_ranges[2]:copy(card_tools:get_uniform_range(tree2.board))
    
    --- Clean CFR : without exploitability computation
    ---local tree_data2 = TreeData(tree2)
    local solver_train = TreeCFR2()
    solver:run_clean_cfr(tree2, starting_ranges,cfr_iter)
    
    ---print("CFR completed iterations")
    --- Starts poker_match
    local num_games = num_games or 100
    local num_tests = num_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,num_tests)
    local win_rate_tensor = torch.FloatTensor(1,num_tests)
    
    for i =1,num_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_games) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate

    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end


In [None]:
--- FUNCTIONS
--- Comparing with 2000
function compare_cfrs(tree_cfr1,tree_cfr2000)
    local difference = {}
    local table1 = tree_cfr1.total_strategies
    local table2 = tree_cfr2000.total_strategies
    
    for i = 1, #table1 do
        difference[i] = torch.sqrt(torch.sum(torch.pow((table1[i] - table2[#table2]),2)))
    end
    return torch.FloatTensor(difference)
end
--- Comparing 2000 with itself
function get_convergence(tree_cfr)
    local difference = {}
    local table_cfr = tree_cfr.total_strategies
    local end_strategy = table_cfr[#table_cfr]
    
    for i = 1, #table_cfr do
        difference[i] = torch.sqrt(torch.sum(torch.pow((table_cfr[i] - end_strategy),2)))
    end
    return torch.FloatTensor(difference)
end

In [None]:
--- Create the tree
builder = PokerTreeBuilder()
--- Parameters for the tree
params = {}
params.root_node = {}
params.root_node.board = card_to_string:string_to_board('')
params.root_node.street = 1
params.root_node.current_player = constants.players.P1
params.root_node.bets = arguments.Tensor{200, 200}
--- CFR Solver
starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
starting_ranges[1]:copy(card_tools:get_uniform_range(params.root_node.board))
starting_ranges[2]:copy(card_tools:get_uniform_range(params.root_node.board))

### 1. Exploitability for original CFR and modified CFR

In [None]:
--- Building CFR Solver
tree100 = builder:build_tree(params)
tree100_cfr = TreeCFR()
tree100_cfr.cfr_skip = 0
tree100_cfr:run_cfr(tree100, starting_ranges,10)

print(tree100.exploitability)

In [None]:
--- Building CFR Solver
tree102 = builder:build_tree(params)
tree102_cfr = TreeCFR2()
tree102_cfr.cfr_skip = 1
tree102_cfr:run_cfr(tree102, starting_ranges,10)
print(tree102.exploitability)

In [None]:
exploit_cfr100 = tree100_cfr.exploitability_vec
exploit_cfr102 = tree102_cfr.exploitability_vec

In [None]:
local num_iter = torch.range(1,exploit_cfr100:size(1))
local plot = Plot()

plot:line(num_iter,exploit_cfr100,'red' ,'Original')
plot:line(num_iter,exploit_cfr102,'blue' ,'New CFR')
plot:legend(true):title('Exploitability of different strategies, 800 iterations')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()


### 2. Exploitability for different Strategies

In [None]:
number_of_iterations = 200

In [None]:
num_tests =10
num_games = 100

In [None]:
tree_bench = builder:build_tree(params)

In [None]:
--- Get Data from 2000 iterations
tree_data800 = TreeData(tree800)
tree800_cfr = TreeCFR2()
tree800_cfr.cfr_skip = 1
gain_unif_after,win_unif_after = get_tensor_returns(tree_bench,treeTarRegWarm,tree,num_tests,num_games)
tree800_cfr:run_cfr(tree800, starting_ranges,number_of_iterations)
tree_data800:get_training_set(tree800,1)
gain_unif_after,win_unif_after = get_tensor_returns(tree_bench,treeTarRegWarm,tree,num_tests,num_games)

### Training the Model

In [None]:
-- Building the neural net model
strategy_nn = StrategyNN()
nn_model = strategy_nn.model
-- Building trainer:
nn_trainer = NNTrainer(tree_data800,nn_model)
-- Options:
opt = {}
opt.learningRate = 0.001
opt.momentum = 0.95
opt.validate = false

In [None]:
current_criterion = StrategyLoss()---nn.SmoothL1Criterion()
train_loss,test_loss = nn_trainer:train(nn_model,current_criterion,opt,10000)

In [None]:
pred_strat = nn_model:forward({nn_trainer.all_data.features,nn_trainer.all_data.masks})
true_strat =nn_trainer.all_data.targets
print(torch.mean(torch.abs(pred_strat- true_strat)))

### No Warm Start

In [None]:
--- Building CFR Solver
treeNoWarm = builder:build_tree(params)
treeNoWarm_cfr = TreeCFR2()
treeNoWarm_cfr.cfr_skip = 1
treeNoWarm_cfr:run_cfr(treeNoWarm, starting_ranges,number_of_iterations)

### Neural Net Start 

In [None]:
--- NNWarm Starting
treeNNWarm = builder:build_tree(params)
dataNN = TreeData(treeNNWarm)
dataNN:get_training_set(treeNNWarm,1)
dataNN:generate_new_initial_strategies(treeNNWarm,nn_trainer)
---Warm start the regrets:
dataNN:warm_start_regrets(treeNNWarm,tree800)
---tree_warm_start = TreeWarmStart()
---tree_warm_start:run_warm_start(treeNNWarm, starting_ranges,80)
--- Building CFR Solver


In [None]:
gain_unif_after,win_unif_after = get_tensor_returns(tree_bench,treeNNWarm,tree,num_tests,num_games)

In [None]:
treeNNWarm_cfr = TreeCFR2()
treeNNWarm_cfr.cfr_skip = 1
treeNNWarm_cfr:run_cfr(treeNNWarm, starting_ranges,number_of_iterations)

In [None]:
gain_unif_after,win_unif_after = get_tensor_returns(tree_bench,treeNNWarm,tree,num_tests,num_games)

In [None]:
--- NNWarm Starting
treeNN_NoRegWarm = builder:build_tree(params)
dataNN_NoReg = TreeData(treeNN_NoRegWarm)
dataNN_NoReg:get_training_set(treeNN_NoRegWarm,1)
dataNN_NoReg:generate_new_initial_strategies(treeNN_NoRegWarm,nn_trainer)
---Warm start the regrets:
---dataNN:warm_start_regrets(treeNN_NoRegWarm,tree800)
--- Building CFR Solver
treeNN_NoRegWarm_cfr = TreeCFR2()
treeNN_NoRegWarm_cfr.cfr_skip = 1
treeNN_NoRegWarm_cfr:run_cfr(treeNN_NoRegWarm, starting_ranges,number_of_iterations)

### Target Warm Start 

In [None]:
--- Target Warm Starting
treeTarRegWarm = builder:build_tree(params)
dataTargReg = TreeData(treeTarRegWarm)
---dataTarg:get_training_set(treeTargWarm,1)
---dataTarg:warm_start_targets(treeTargWarm,tree800)
---dataTarg:warm_start_regrets(treeTargWarm,tree800)
---tree_warm_start = TreeWarmStart()
---tree_warm_start:run_warm_start(treeTargWarm, starting_ranges,number_of_iterations)
dataTargReg:warm_start_targets_and_regrets(treeTarRegWarm,tree800)
--- Building CFR Solver
treeTarRegWarm_cfr = TreeCFR2()
treeTarRegWarm_cfr.cfr_skip = 1
treeTarRegWarm_cfr:run_cfr(treeTarRegWarm, starting_ranges,number_of_iterations)

In [None]:
--- Target Warm Starting
treeTarWarm = builder:build_tree(params)
dataTarg = TreeData(treeTarWarm)
dataTarg:warm_start_targets(treeTarWarm,tree800)
--- Building CFR Solver
treeTarWarm_cfr = TreeCFR2()
treeTarWarm_cfr.cfr_skip = 1
treeTarWarm_cfr:run_cfr(treeTarWarm, starting_ranges,number_of_iterations)

In [None]:
--- Target Warm Starting
treeRegWarm = builder:build_tree(params)
dataReg = TreeData(treeRegWarm)
dataReg:warm_start_regrets(treeRegWarm,tree800)
--- Building CFR Solver
treeRegWarm_cfr = TreeCFR2()
treeRegWarm_cfr.cfr_skip = 1
treeRegWarm_cfr:run_cfr(treeRegWarm, starting_ranges,number_of_iterations)

### Plot different starts

In [None]:
exploit_unif = tree800_cfr.exploitability_vec
exploit_NoW = treeNoWarm_cfr.exploitability_vec
exploit_NNW = treeNNWarm_cfr.exploitability_vec
exploit_NN_NoReg = treeNN_NoRegWarm_cfr.exploitability_vec
exploit_TarRegW = treeTarRegWarm_cfr.exploitability_vec
exploit_TarW = treeTarWarm_cfr.exploitability_vec
exploit_RegW = treeRegWarm_cfr.exploitability_vec

In [None]:
local num_iter = torch.range(1,exploit_NoW:size(1))
local plot = Plot()

---plot:line(num_iter,exploit_unif,'black' ,'No Warm start CFR = Uniform warm start')
plot:line(num_iter,exploit_NoW,'black' ,'No Warm start = Random initialization')
plot:line(num_iter,exploit_NNW,'blue' ,'Neural Network warm start')
---plot:line(num_iter,exploit_NN_NoReg,'brown' ,'Neural Net warm start CFR no Regrets')
plot:line(num_iter,exploit_TarRegW,'green' ,'Oracle warm start')
---plot:line(num_iter,exploit_TarW,'orange' ,'Warm start only targets')
---plot:line(num_iter,exploit_RegW,'red' ,'Warm start with target values ')
plot:legend(true):title('Exploitability of different initializations, 200 iter.')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()


In [None]:
function strategies_initializer(params, starting_ranges,num_iter,num_skip)
    local timer = torch.Timer()
    local num_iter = num_iter or 200
    local num_skip = num_skip or 50
    local params = params
    local tree_train= builder:build_tree(params)
    local tree_data = TreeData(tree_train)
    
    --- running CFR for nn_iter skiping num_skip
    local solver_train = TreeCFR2()
    solver_train.cfr_skip = num_skip
    solver_train:run_clean_cfr(tree_train,starting_ranges,num_iter)

    --- Getting training set:
    tree_data:get_training_set(tree_train,1)
    print('training train tree with CFR: ')--- ..timer:time().. ' secs')

    -- Building the neural net model
    local strategy_nn = StrategyNN()
    local nn_model = strategy_nn.model

    -- Building trainer:
    local nn_trainer = NNTrainer(tree_data,nn_model)
    -- Options:
    local opt = {}
    opt.learningRate = 0.001
    opt.momentum = 0.95
    opt.validate = false
    local current_criterion = StrategyLoss()---nn.SmoothL1Criterion()
    local train,test= nn_trainer:train(nn_model,current_criterion,opt,10000)
    print("training neural network: ")--- ..timer:time().. ' secs')

    --- NNWarm Starting
    local treeNNWarm = builder:build_tree(params)
    local dataNN = TreeData(treeNNWarm)
    dataNN:get_training_set(treeNNWarm,1)
    dataNN:generate_new_initial_strategies(treeNNWarm,nn_trainer)
    
    ---Warm start the regrets for NN:
    dataNN:warm_start_regrets(treeNNWarm,tree_train)
    print("target warm-start")
    
    --- Target Warm Starting
    local treeTargetWarm = builder:build_tree(params)
    local tree_data_target = TreeData(treeTargetWarm)
    tree_data_target:warm_start_targets_and_regrets(treeTargetWarm,tree_train)
    print("Whole Process: ")--- ..timer:time().. ' secs')
    ---timer:stop()
    return treeNNWarm,treeTargetWarm
end


function get_returns(tree1,tree2,num_iter)
    --- default number of iterations:
    local num_iter = num_iter or 100
    
    --- Evaluator : Poker match for num_iter times each combination
    local evaluator = StrategyEvaluator(tree1,tree2)
    evaluator:play_all_combinations_n_times(tree1,tree2,num_iter)
    local win_rate = evaluator.A2_winning_rate
    local avg_gain= evaluator.A2_avg_pot_won

    --- Return the avg pot won and the win rate:
    return avg_gain,win_rate
end


function get_tensor_returns(tree1,tree2,number_of_tests,num_iter)

    --- num_iter is the number of games played all 60 combinations:
    local num_iter = num_iter or 100
    local number_of_tests = number_of_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,number_of_tests)
    local win_rate_tensor = torch.FloatTensor(1,number_of_tests)
    
    --- Playing for number_of_tests times the 60 combinations num iter times
    for i =1,number_of_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_iter) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate
    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end

function get_CFR_tensor_returns(tree1,tree2,num_tests,num_games,cfr_iter,cfr_skip)
    --- Cfr iterations and number of skpis:
    local cfr_iter = cfr_iter or 100
    local cfr_skip = cfr_skip or 50

    --- CFR Solver:
    local solver = TreeCFR2()
    solver.cfr_skip = cfr_skip 
    local starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
    starting_ranges[1]:copy(card_tools:get_uniform_range(tree2.board))
    starting_ranges[2]:copy(card_tools:get_uniform_range(tree2.board))
    
    --- Clean CFR : without exploitability computation
    ---local tree_data2 = TreeData(tree2)
    local solver_train = TreeCFR2()
    solver:run_clean_cfr(tree2, starting_ranges,cfr_iter)
    
    ---print("CFR completed iterations")
    --- Starts poker_match
    local num_games = num_games or 100
    local num_tests = num_tests or 10
    local avg_gain_tensor = torch.FloatTensor(1,num_tests)
    local win_rate_tensor = torch.FloatTensor(1,num_tests)
    
    for i =1,num_tests do
        local avg_gain,win_rate = get_returns(tree1,tree2,num_games) 
        avg_gain_tensor[{1,i}]= avg_gain
        win_rate_tensor[{1,i}]= win_rate

    end
    print('avg gains: '..avg_gain_tensor:mean()..' ,win rate :'..win_rate_tensor:mean())
    return avg_gain_tensor,win_rate_tensor
end


In [None]:
local num_iter = torch.range(1,exploit_NoW:size(1))
local plot = Plot()

---plot:line(num_iter,exploit_unif,'black' ,'No Warm start CFR = Uniform warm start')
plot:line(num_iter,exploit_NoW,'black' ,'No Warm start = Random initialization')
---plot:line(num_iter,exploit_NNW,'blue' ,'Neural Net warm start CFR')
plot:line(num_iter,exploit_NN_NoReg,'blue' ,'Neural Network warm start without regrets')
plot:line(num_iter,exploit_TarRegW,'brown' ,'Oracle warm start with regrets')
plot:line(num_iter,exploit_TarW,'green' ,'Oracle warm start without regrets')
---plot:line(num_iter,exploit_RegW,'red' ,'Warm start with target values')
plot:legend(true):title('Exploitability of different initializations, 200 iter.')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()


### Graphical Comparaison

### 3. Comparaision with 2000 CFR2

In [None]:
--- Get Data from 2000 iterations
tree802 = builder:build_tree(params)
tree_data802 = TreeData(tree802)
tree802_cfr = TreeCFR2()
tree802_cfr:run_cfr(tree802, starting_ranges,number_of_iterations)
tree_data802:get_training_set(tree802,1)

In [None]:
-- Building the neural net model
strategy_nn2 = StrategyNN()
nn_model2 = strategy_nn2.model
-- Building trainer:
nn_trainer2 = NNTrainer(tree_data802,nn_model2)
-- Options:
opt = {}
opt.learningRate = 0.001
opt.momentum = 0.95
opt.validate = false

In [None]:
current_criterion2 = nn.SmoothL1Criterion()
train_loss,test_loss = nn_trainer:train(nn_model2,current_criterion2,opt,10000)

In [None]:
--- No Warm Start
treeNoWarm2 = builder:build_tree(params)
treeNoWarm_cfr2 = TreeCFR2()
treeNoWarm_cfr2:run_cfr(treeNoWarm2, starting_ranges,number_of_iterations)

In [None]:
--- NNWarm Starting
treeNNWarm2 = builder:build_tree(params)
dataNN2 = TreeData(treeNNWarm2)
dataNN2:get_training_set(treeNNWarm2,1)
dataNN2:generate_new_initial_strategies(treeNNWarm2,nn_trainer2)

In [None]:
--- NNWarm CFR Solver
treeNNWarm_cfr2 = TreeCFR2()
treeNNWarm_cfr2:run_cfr(treeNNWarm2, starting_ranges,number_of_iterations)

In [None]:
--- Target Warm Starting
treeTargWarm2 = builder:build_tree(params)
dataTarg2 = TreeData(treeTargWarm2)
dataTarg2:get_training_set(treeTargWarm2,1)
dataTarg2:warm_start_targets(treeTargWarm2,tree802)

In [None]:
--- Building CFR Solver
treeTargWarm_cfr2 = TreeCFR2()
treeTargWarm_cfr2:run_cfr(treeTargWarm2, starting_ranges,number_of_iterations)

In [None]:
exploit_unif2 = tree802_cfr.exploitability_vec
exploit_NoW2 = treeNoWarm_cfr2.exploitability_vec
exploit_NNW2 = treeNNWarm_cfr2.exploitability_vec
exploit_TargW2 = treeTargWarm_cfr2.exploitability_vec

In [None]:
local num_iter = torch.range(1,exploit_NoW:size(1))
local plot = Plot()

plot:line(num_iter,exploit_unif2,'black' ,'Original CFR in the paper')
plot:line(num_iter,exploit_NoW2,'red' ,'No Warm start CFR')
plot:line(num_iter,exploit_NNW2,'blue' ,'Neural Net warm start CFR')
plot:line(num_iter,exploit_TargW2,'green' ,'Warm start with target values')
plot:legend(true):title('Exploitability of different strategoes')
plot:xaxis('Scaled number of iterations'):yaxis('Exploitability'):redraw()
