In [1]:
--- Libraries:
arguments = require 'Settings.arguments'
constants = require 'Settings.constants'
card_to_string = require 'Game.card_to_string_conversion'
card_tools = require 'Game.card_tools'
game_settings = require 'Settings.game_settings'
Plot = require 'itorch.Plot'
nninit = require 'nninit'
require 'torch'
require 'math'
require 'Tree.tree_builder'
require 'Tree.tree_data_generation'
require 'Tree.tree_visualiser'
require 'nn'
require 'Tree.tree_cfr'
require 'nngraph'
require 'optim'
require 'image'
require 'NN.strategy_net_builder'
require 'NN.strategy_net_trainer'



In [2]:
function convert_to_valid_actions(actions_vector)
    valid_actions_vector  = actions_vector:clone()
    for i =1,valid_actions_vector:size(1) do
        if valid_actions_vector[i] == 0 then
            valid_actions_vector[i] =  -math.huge
        end
    end
   return valid_actions_vector 
end

In [3]:
--- Create the tree
builder = PokerTreeBuilder()
--- Parameters for the tree
params = {}
params.root_node = {}
params.root_node.board = card_to_string:string_to_board('')
params.root_node.street = 1
params.root_node.current_player = constants.players.P1
params.root_node.bets = arguments.Tensor{200, 200}

--- BUild tree
tree = builder:build_tree(params)
--- build data
game = TreeData(tree)


In [4]:
--- CFR Solver
local starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
starting_ranges[1]:copy(card_tools:get_uniform_range(params.root_node.board))
starting_ranges[2]:copy(card_tools:get_uniform_range(params.root_node.board))
local tree_cfr = TreeCFR()
print("Solver")
tree_cfr:run_cfr(tree, starting_ranges)
print("geting training set")
game:get_training_set(tree,1)

Solver


geting training set


In [5]:
function convert_to_masks(actions_tensor)
    masks_tensor = torch.Tensor(actions_tensor:size()):fill(0)
    for i =1, actions_tensor:size(1) do
        masks_tensor[{{i},{}}] = find_mask(actions_tensor[{{i},{}}])
    end
    return masks_tensor
end

In [6]:
function find_mask(action)
    local mask = action:clone()
    for i= 1,action:size(2) do
        if mask[1][i] < 1 then
            mask[1][i] = math.huge
        else
            mask[1][i] = 0
        end
    end
    return mask
end

In [7]:
--- Training NN
function train(features,masks,targets,model,criterion,opt)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local loss_vector = {}
    --- Defining the function that gives back the loss and the gradient
    
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({features,masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, targets)
        model:backward({features,masks}, gradCriterion)

        return loss,gradParameters
    end
    
    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    learningRateDecay = 5e-5}

    for i = 1,10000 do
        optim.sgd(feval, params, sgdState)
        loss = criterion:forward(model:forward({features,masks}), targets)
        table.insert(loss_vector,loss)
    end
    return torch.Tensor(loss_vector)
end

function split_data(dataset,percentage_train)
    percentage_train = percentage_train or 0.75
    num_train = math.floor(dataset:size(1)*percentage_train)
    data_train = dataset[{{1,num_train},{}}]:clone()
    data_test = dataset[{{num_train+1,-1},{}}]:clone()
    return data_train,data_test
end

In [8]:
--- Training NN
function train_and_test(data_train,data_test,model,criterion,opt)
---    torch.manualSeed(1)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local training_loss_tensor = {}
    local test_loss_tensor = {}
    --- Defining the function that gives back the loss and the gradient
    local train_loss = 0
    local test_loss = 0
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({data_train.features,data_train.masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, data_train.targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, data_train.targets)
        model:backward({data_train.features,data_train.masks}, gradCriterion)

        return loss,gradParameters
    end
    
    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    momentum = opt.momentum or 0,
    nesterov = opt.nesteov or false,
    learningRateDecay = 5e-7}
    
    adamState = adamState or {
    learningRate = opt.learningRate or 0.001,
    learningRateDecay = opt.learningRateDecay or 0.9,
    weightDecay = opt.weightDecay or 0.999}

    for i = 1,10000 do
        optim.sgd(feval, params, sgdState)

        -- Training loss:
        train_loss = criterion:forward(model:forward({data_train.features,data_train.masks}), data_train.targets)
        table.insert(training_loss_tensor,train_loss)

        -- Test loss:
        test_loss = criterion:forward(model:forward({data_test.features,data_test.masks}), data_test.targets)
        table.insert(test_loss_tensor,test_loss)
    end
    return torch.Tensor(training_loss_tensor),torch.Tensor(test_loss_tensor)
end

--- Split the data into training and testing data:
function split_data(dataset,percentage_train)
    local indexes = torch.randperm(dataset:size(1))
    local dataset_new_index = torch.Tensor(dataset:size()):fill(0)
    for i =1,indexes:size(1) do
        dataset_new_index[{{i},{}}]  = dataset[{{indexes[i]},{}}] 
    end
    
    percentage_train = percentage_train or 0.70
    num_train = math.floor(dataset_new_index:size(1)*percentage_train)
    data_train = dataset_new_index[{{1,num_train},{}}]:clone()
    data_test = dataset_new_index[{{num_train+1,-1},{}}]:clone()
    return data_train,data_test
end

function build_training_object(features,masks,targets)
    data = {}
    data.features = features
    data.masks = masks
    data.targets = targets
    return data
end

--- creates the training/ testing structure
function create_data_structure(gameData)
    local input_tensor = gameData.input_tensor:clone()
    local output_tensor = gameData.output_tensor:clone()
    local targets = output_tensor
    
    -- Getting features and masks:
    local features = input_tensor[{{},{1,30}}]
    local legal_actions = input_tensor[{{},{31,34}}]
    local masks = convert_to_masks(legal_actions)
    
        -- Spliting in test and train :
    local features_train, features_test = split_data(features) 
    local masks_train, masks_test = split_data(masks) 
    local targets_train, targets_test = split_data(targets) 

    -- Features and masks
    train_data = build_training_object(features_train,masks_train,targets_train)
    test_data = build_training_object(features_test,masks_test,targets_test)

    return train_data,test_data
end

In [9]:
-----WORKING SEQUENCE
layer1 = nn.ParallelTable()
layer1:add(nn.Linear(30, 64):init('weight', nninit.orthogonal))
layer1:add(nn.Identity)

layer2 = nn.ParallelTable()
layer2:add(nn.Sigmoid())
layer2:add(nn.Identity)

layer21 = nn.ParallelTable()
layer21:add(nn.Linear(64,4):init('weight', nninit.orthogonal))
layer21:add(nn.Identity)

layer3 = nn.ParallelTable()
layer3:add(nn.Reshape(4,1))
layer3:add(nn.Reshape(4,1))

layer4 = nn.CSubTable()
layer5 = nn.Reshape(4)
layer6 = nn.SoftMax()

mlp = nn.Sequential()
mlp:add(layer1)
mlp:add(layer2)
mlp:add(layer21)
mlp:add(layer3)
mlp:add(layer4)
mlp:add(layer5)
mlp:add(layer6)

In [10]:
data_train, data_test = create_data_structure(game)

In [11]:
criterion = nn.MSECriterion()
-- Options:
opt = {}
opt.learningRate = 0.001
--opt.momentum = 0.7
---opt.nesterov = true
---opt.momentum
--opt.learningRateDecay = 0.9
--opt.weightDecay = 0.999

In [12]:
number_ok = {}
train_loss_table = {}
train_loss_table.first ={}
train_loss_table.last ={}

test_loss_table = {}
test_loss_table.first = {}
test_loss_table.last = {}
for i = 1,10 do
    train_loss,test_loss = train_and_test(data_train,data_test,mlp,criterion,opt)
    table.insert(train_loss_table.first, train_loss[1])
    table.insert(train_loss_table.last, train_loss[-1])
    table.insert(test_loss_table.first, test_loss[1])
    table.insert(test_loss_table.first, test_loss[-1])
    if torch.sum(train_loss) < torch.sum(test_loss) then
        table.insert(number_ok,1)
    end
end
print(torch.sum((torch.Tensor(number_ok))))

0


In [15]:
t2 = torch.range(1,train_loss:size(1))
local plot = Plot()
plot:line(t2,train_loss,'red' ,'train loss')
plot:line(t2,test_loss,'blue' ,'test loss')
plot:legend(true):title('Mean Square Error')
plot:draw()

In [None]:
local StrategyCriterion, parent = torch.class('nn.StrategyCriterion', 'nn.Criterion')

function Strategy:__init(sizeAverage)
   parent.__init(self)
   if sizeAverage ~= nil then
     self.sizeAverage = sizeAverage
   else
     self.sizeAverage = true
   end
end

function StrategyCriterion:updateOutput(input, target)
   self.output_tensor = self.output_tensor or input.new(1)
   input.THNN.MSECriterion_updateOutput(
      input:cdata(),
      target:cdata(),
      self.output_tensor:cdata(),
      self.sizeAverage
   )
   self.output = self.output_tensor[1]
   return self.output
end

function StrategyCriterion:updateGradInput(input, target)
   input.THNN.MSECriterion_updateGradInput(
      input:cdata(),
      target:cdata(),
      self.gradInput:cdata(),
      self.sizeAverage
   )
   return self.gradInput
end

## Using the modules

In [None]:
strategy_nn = StrategyNN()
nn_model = strategy_nn.model

In [None]:
-- Criterion: 
criterion =nn.MSECriterion()
-- Loss test
loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)
print(loss_x)

In [None]:
-- Options:
opt = {}
opt.learningRate = 0.001
---opt.learningRateDecay = 0.9
---opt.weightDecay = 0.999
opt.momentum = 0.9

---Training:
---train_loss,test_loss = train_and_test(train_data,test_data,nn_model,criterion,opt)

In [None]:
-- Building the neural net model
strategy_nn = StrategyNN()
nn_model = strategy_nn.model

-- Building trainer:
nn_trainer = NNTrainer(game,nn_model)

-- Criterion definition:
criterion = nn.MSECriterion()

train_data = nn_trainer.train_data

loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)
print(loss_x)
---print(nn_model:getParameters())

-- Options:
opt = {}
opt.learningRate = 0.001
opt.momentum = 0
opt.nesterov = true

---Training:
---train_loss= nn_trainer:train(nn_model,criterion,opt)

---loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)

---print(loss_x)


In [None]:
--- Ploting results from the total training
only_train_loss= nn_trainer:train(nn_model,criterion,opt)

In [None]:
t1 = torch.range(1,only_train_loss:size(1))
local plot = Plot()
plot:line(t1,only_train_loss,'red' ,'train loss')
---plot:line(t1,test_loss,'blue' ,'test loss')
plot:legend(true):title('Mean Square Error')
plot:draw()

In [None]:
--- Trying a new model: 
nn_model2 = nn_trainer:set_new_model()
nn_trainer:generate_training_and_validation_sets()

In [None]:
train_loss2,test_loss2= nn_trainer:train_and_validate(nn_model2,nn.MSECriterion(),opt)

In [None]:
t2 = torch.range(1,train_loss2:size(1))
local plot = Plot()
plot:line(t2,train_loss2,'red' ,'train loss')
plot:line(t2,test_loss2,'blue' ,'test loss')
plot:legend(true):title('Mean Square Error')
plot:draw()

In [None]:
--- Training NN
function train_and_test_minibatch(data_train,data_test,model,criterion,opt)
    local epochs = 1000
---    torch.manualSeed(1)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local training_loss_tensor = {}
    local test_loss_tensor = {}
    --- Defining the function that gives back the loss and the gradient
    local train_loss = 0
    local test_loss = 0
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({data_train.features,data_train.masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, data_train.targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, data_train.targets)
        model:backward({data_train.features,data_train.masks}, gradCriterion)

        return loss,gradParameters
    end
    
    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    momentum = opt.momentum or 0,
    nesterov = opt.nesteov or false,
    learningRateDecay = 5e-7}
    
    adamState = adamState or {
    learningRate = opt.learningRate or 0.001,
    learningRateDecay = opt.learningRateDecay or 0.9,
    weightDecay = opt.weightDecay or 0.999}

    for i = 1,10000 do
        optim.sgd(feval, params, sgdState)

        -- Training loss:
        train_loss = criterion:forward(model:forward({data_train.features,data_train.masks}), data_train.targets)
        table.insert(training_loss_tensor,train_loss)

        -- Test loss:
        test_loss = criterion:forward(model:forward({data_test.features,data_test.masks}), data_test.targets)
        table.insert(test_loss_tensor,test_loss)
    end
    return torch.Tensor(training_loss_tensor),torch.Tensor(test_loss_tensor)
end