In [1]:
--- Libraries:
arguments = require 'Settings.arguments'
constants = require 'Settings.constants'
card_to_string = require 'Game.card_to_string_conversion'
card_tools = require 'Game.card_tools'
game_settings = require 'Settings.game_settings'
Plot = require 'itorch.Plot'

require 'torch'
require 'math'
require 'Tree.tree_builder'
require 'Tree.tree_data_generation'
require 'Tree.tree_visualiser'
require 'nn'
require 'Tree.tree_cfr'
require 'nngraph'
require 'optim'
require 'image'
require 'NN.strategy_net_builder'
require 'NN.strategy_net_trainer'

In [2]:
function convert_to_valid_actions(actions_vector)
    valid_actions_vector  = actions_vector:clone()
    for i =1,valid_actions_vector:size(1) do
        if valid_actions_vector[i] == 0 then
            valid_actions_vector[i] =  -math.huge
        end
    end
   return valid_actions_vector 
end

In [3]:
--- Create the tree
builder = PokerTreeBuilder()
--- Parameters for the tree
params = {}
params.root_node = {}
params.root_node.board = card_to_string:string_to_board('')
params.root_node.street = 1
params.root_node.current_player = constants.players.P1
params.root_node.bets = arguments.Tensor{200, 200}

--- BUild tree
tree = builder:build_tree(params)
--- build data
game = TreeData(tree)


In [4]:
--- CFR Solver
local starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count)
starting_ranges[1]:copy(card_tools:get_uniform_range(params.root_node.board))
starting_ranges[2]:copy(card_tools:get_uniform_range(params.root_node.board))
local tree_cfr = TreeCFR()
print("Solver")
tree_cfr:run_cfr(tree, starting_ranges)
print("geting training set")
game:get_training_set(tree,1)

Solver


geting training set


In [5]:
function convert_to_masks(actions_tensor)
    masks_tensor = torch.Tensor(actions_tensor:size()):fill(0)
    for i =1, actions_tensor:size(1) do
        masks_tensor[{{i},{}}] = find_mask(actions_tensor[{{i},{}}])
    end
    return masks_tensor
end

In [6]:
function find_mask(action)
    local mask = action:clone()
    for i= 1,action:size(2) do
        if mask[1][i] < 1 then
            mask[1][i] = math.huge
        else
            mask[1][i] = 0
        end
    end
    return mask
end

In [7]:
--- Training NN
function train(features,masks,targets,model,criterion,opt)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local loss_vector = {}
    --- Defining the function that gives back the loss and the gradient
    
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({features,masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, targets)
        model:backward({features,masks}, gradCriterion)

        return loss,gradParameters
    end
    
    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    learningRateDecay = 5e-5}

    for i = 1,10000 do
        optim.sgd(feval, params, sgdState)
        loss = criterion:forward(model:forward({features,masks}), targets)
        table.insert(loss_vector,loss)
    end
    return torch.Tensor(loss_vector)
end

function split_data(dataset,percentage_train)
    percentage_train = percentage_train or 0.75
    num_train = math.floor(dataset:size(1)*percentage_train)
    data_train = dataset[{{1,num_train},{}}]:clone()
    data_test = dataset[{{num_train+1,-1},{}}]:clone()
    return data_train,data_test
end

In [13]:
--- Training NN
function train_and_test(data_train,data_test,model,criterion,opt)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local training_loss_tensor = {}
    local test_loss_tensor = {}
    --- Defining the function that gives back the loss and the gradient
    local train_loss = 0
    local test_loss = 0
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({data_train.features,data_train.masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, data_train.targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, data_train.targets)
        model:backward({data_train.features,data_train.masks}, gradCriterion)

        return loss,gradParameters
    end
    
    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    momentum = opt.momentum or 0,
    learningRateDecay = 5e-7}
    
    adamState = adamState or {
    learningRate = opt.learningRate or 0.001,
    learningRateDecay = opt.learningRateDecay or 0.9,
    weightDecay = opt.weightDecay or 0.999}

    for i = 1,10000 do
        optim.sgd(feval, params, adamState)

        -- Training loss:
        train_loss = criterion:forward(model:forward({data_train.features,data_train.masks}), data_train.targets)
        table.insert(training_loss_tensor,train_loss)

        -- Test loss:
        test_loss = criterion:forward(model:forward({data_test.features,data_test.masks}), data_test.targets)
        table.insert(test_loss_tensor,test_loss)
    end
    return torch.Tensor(training_loss_tensor),torch.Tensor(test_loss_tensor)
end

--- Split the data into training and testing data:
function split_data(dataset,percentage_train)
    local indexes = torch.randperm(dataset:size(1))
    local dataset_new_index = torch.Tensor(dataset:size()):fill(0)
    for i =1,indexes:size(1) do
        dataset_new_index[{{i},{}}]  = dataset[{{indexes[i]},{}}] 
    end
    
    percentage_train = percentage_train or 0.80
    num_train = math.floor(dataset_new_index:size(1)*percentage_train)
    data_train = dataset_new_index[{{1,num_train},{}}]:clone()
    data_test = dataset_new_index[{{num_train+1,-1},{}}]:clone()
    return data_train,data_test
end

function build_training_object(features,masks,targets)
    data = {}
    data.features = features
    data.masks = masks
    data.targets = targets
    return data
end

--- creates the training/ testing structure
function create_data_structure(gameData)
    local input_tensor = gameData.input_tensor:clone()
    local output_tensor = gameData.output_tensor:clone()
    local targets = output_tensor
    
    -- Getting features and masks:
    local features = input_tensor[{{},{1,30}}]
    local legal_actions = input_tensor[{{},{31,34}}]
    local masks = convert_to_masks(legal_actions)
    
        -- Spliting in test and train :
    local features_train, features_test = split_data(features) 
    local masks_train, masks_test = split_data(masks) 
    local targets_train, targets_test = split_data(targets) 

    -- Features and masks
    train_data = build_training_object(features_train,masks_train,targets_train)
    test_data = build_training_object(features_test,masks_test,targets_test)

    return train_data,test_data
end

In [14]:
-----WORKING SEQUENCE
layer1 = nn.ParallelTable()
layer1:add(nn.Linear(30, 50))
layer1:add(nn.Identity)

layer2 = nn.ParallelTable()
layer2:add(nn.Sigmoid())
layer2:add(nn.Identity)

layer21 = nn.ParallelTable()
layer21:add(nn.Linear(50,4))
layer21:add(nn.Identity)

layer3 = nn.ParallelTable()
layer3:add(nn.Reshape(4,1))
layer3:add(nn.Reshape(4,1))

layer4 = nn.CSubTable()
layer5 = nn.Reshape(4)
layer6 = nn.Sigmoid()

mlp = nn.Sequential()
mlp:add(layer1)
mlp:add(layer2)
mlp:add(layer21)
mlp:add(layer3)
mlp:add(layer4)
mlp:add(layer5)
mlp:add(layer6)

In [15]:
train_data,test_data = create_data_structure(game)

In [16]:
strategy_nn = StrategyNN()
nn_model = strategy_nn.model

In [17]:
-- Criterion: 
criterion =nn.BCECriterion()
-- Loss test
loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)
print(loss_x)

1.4307515621185


In [18]:
-- Options:
opt = {}
opt.learningRate = 0.001
--opt.learningRateDecay = 0.9
--opt.weightDecay = 0.999
---opt.momentum = 0.05

---Training:
---train_loss,test_loss = train_and_test(train_data,test_data,nn_model,criterion,opt)

In [19]:
-- Building the neural net model
strategy_nn = StrategyNN()
nn_model = strategy_nn.model

-- Building trainer:
nn_trainer = NNTrainer(game,nn_model)

-- Criterion definition:
criterion =nn.MSECriterion()
train_data = nn_trainer.train_data

loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)
print(loss_x)
---print(nn_model:getParameters())

-- Options:
opt = {}
opt.learningRate = 0.001
opt.momentum = 0

---Training:
---train_loss= nn_trainer:train(nn_model,criterion,opt)

---loss_x = criterion:forward(nn_model:forward({train_data.features,train_data.masks}), train_data.targets)

---print(loss_x)


0.14473561942577


In [20]:
train_loss= nn_trainer:train(nn_model,criterion,opt)


In [21]:
t1 = torch.range(1,train_loss:size(1))
local plot = Plot()
plot:line(t1,train_loss,'red' ,'train loss')
---plot:line(t1,test_loss,'blue' ,'test loss')
plot:legend(true):title('Mean Square Error')
plot:draw()

In [22]:
train_loss,test_loss= nn_trainer:train_and_validate(nn_model,criterion,opt)


[string "train_loss,test_loss= nn_trainer:train_and_va..."]:1: attempt to call method 'train_and_validate' (a nil value)
stack traceback:
	/usr/local/share/lua/5.2/itorch/main.lua:167: in function 'train_and_validate'
	[string "train_loss,test_loss= nn_trainer:train_and_va..."]:1: in main chunk
	[C]: in function 'xpcall'
	/usr/local/share/lua/5.2/itorch/main.lua:210: in function </usr/local/share/lua/5.2/itorch/main.lua:174>
	(...tail calls...)
	/usr/local/share/lua/5.2/lzmq/poller.lua:75: in function 'poll'
	/usr/local/share/lua/5.2/lzmq/impl/loop.lua:307: in function 'poll'
	/usr/local/share/lua/5.2/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/usr/local/share/lua/5.2/lzmq/impl/loop.lua:370: in function 'start'
	/usr/local/share/lua/5.2/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: in ?: 

In [None]:
t1 = torch.range(1,train_loss:size(1))
local plot = Plot()
plot:line(t1,train_loss,'red' ,'train loss')
plot:line(t1,test_loss,'blue' ,'test loss')
plot:legend(true):title('Mean Square Error')
plot:draw()

In [None]:
x, dl_dx = mlp:getParameters()
criterion = nn.MSECriterion()  
loss_x = criterion:forward(mlp:forward({features,masks}), targets)
print(loss_x)

In [None]:
loss_vector = {}
feval = function(x)
    --- Features:
    dl_dx:zero()
    -- Loss:
    
    -- Forward pass:
    mlp:forward({features,masks})
    predictions = mlp.output
    
    -- Errors: 
    loss_x = criterion:forward(predictions, targets)
    --Backprop:
    gradCriterion = criterion:backward(predictions, targets)
    mlp:backward({features,masks}, gradCriterion)
---    mlp1:updateParameters(0.0001)
    return loss_x,dl_dx
end

In [None]:
opt = {}
opt.learningRate = 0.01

-- Perform SGD step:
sgdState = sgdState or {
learningRate = opt.learningRate,
learningRateDecay = 5e-7}

for i = 1,10000 do
    optim.sgd(feval, x, sgdState)
    loss_x = criterion:forward(mlp:forward({features,masks}), targets)
    table.insert(loss_vector,loss_x)
end

In [None]:
loss_x = criterion:forward(predictions, targets)
print(loss_x)

# Not very relevant


In [None]:
--- Features:
input_tensor = game.input_tensor:clone()
features = input_tensor[{{},{1,30}}]
legal_actions = input_tensor[{{},{31,34}}]
masks = convert_to_masks(legal_actions)

--- Targets:
output_tensor = game.output_tensor:clone()
_,class = torch.max(output_tensor,2)

In [None]:
-- Building NN
function build_nn ()
    layer1 = nn.ParallelTable()
    layer1:add(nn.Linear(30, 50))
    layer1:add(nn.Identity)

    layer2 = nn.ParallelTable()
    layer2:add(nn.Sigmoid())
    layer2:add(nn.Identity)

    layer21 = nn.ParallelTable()
    layer21:add(nn.Linear(50,100))
    layer21:add(nn.Identity)
    
    layer22 = nn.ParallelTable()
    layer22:add(nn.ReLU())
    layer22:add(nn.Identity)

    layer23 = nn.ParallelTable()
    layer23:add(nn.Linear(100,4))
    layer23:add(nn.Identity)
    
    layer3 = nn.ParallelTable()
    layer3:add(nn.Reshape(4,1))
    layer3:add(nn.Reshape(4,1))

    layer4 = nn.CSubTable()
    layer5 = nn.Reshape(features:size(1),4)
    layer6 = nn.SoftMax()

    model = nn.Sequential()
    model:add(layer1)
    model:add(layer2)
    model:add(layer21)
    model:add(layer22)
    model:add(layer23)
    model:add(layer3)
    model:add(layer4)
    model:add(layer5)
---    model:add(layer6)
return model
end

In [None]:
model_test = build_nn()

In [None]:
criterion =nn.CrossEntropyCriterion()

In [None]:
loss_x = criterion:forward(model_test:forward({features,masks}), class)
print(loss_x)

In [None]:
function train(features,masks,targets,model,criterion)
    --- Defining the parameters and the gradient
    local params, gradParameters = model:getParameters()
    local loss_vector = {}
    --- Defining the function that gives back the loss and the gradient
    
    feval = function(params)
        --- Features:
        gradParameters:zero()
        
        -- Forward pass:
        model:forward({features,masks})
        local predictions = model.output

        -- Errors: 
        local loss = criterion:forward(predictions, targets)
        --Backprop:
        local gradCriterion = criterion:backward(predictions, targets)
        model:backward({features,masks}, gradCriterion)

        return loss,gradParameters
    end
    
    opt = {}
    opt.learningRate = 0.01

    -- Perform SGD step:
    sgdState = sgdState or {
    learningRate = opt.learningRate,
    learningRateDecay = 5e-7}

    for i = 1,10000 do
        optim.sgd(feval, params, sgdState)
        loss = criterion:forward(model:forward({features,masks}), targets)
        table.insert(loss_vector,loss)
    end
    return torch.Tensor(loss_vector)
end

In [None]:
loss_vector = train(features,masks,class,model_test,criterion)

In [None]:
nn.SoftMax():forward(model_test:forward({features,masks}))

In [None]:
targets

In [None]:
-- training function
function train(dataset)
   -- epoch tracker
   epoch = epoch or 1
   -- do one epoch
   print('<trainer> on training set:')
   print("<trainer> online epoch # " .. epoch .. ' [batchSize = ' .. opt.batchSize .. ']')
   for t = 1,dataset:size(),opt.batchSize do
      -- create mini batch
      local inputs = torch.Tensor(opt.batchSize,1,geometry[1],geometry[2])
      local targets = torch.Tensor(opt.batchSize)
      local k = 1
      for i = t,math.min(t+opt.batchSize-1,dataset:size()) do
         -- load new sample
         local sample = dataset[i]
         local input = sample[1]:clone()
         local _,target = sample[2]:clone():max(1)
         target = target:squeeze()
         inputs[k] = input
         targets[k] = target
         k = k + 1
      end

      -- create closure to evaluate f(X) and df/dX
      local feval = function(x)
         -- just in case:
         collectgarbage()

         -- get new parameters
         if x ~= parameters then
            parameters:copy(x)
         end

         -- reset gradients
         gradParameters:zero()

         -- evaluate function for complete mini batch
         local outputs = model:forward(inputs)
         local f = criterion:forward(outputs, targets)

         -- estimate df/dW
         local df_do = criterion:backward(outputs, targets)
         model:backward(inputs, df_do)

         -- penalties (L1 and L2):
         if opt.coefL1 ~= 0 or opt.coefL2 ~= 0 then
            -- locals:
            local norm,sign= torch.norm,torch.sign

            -- Loss:
            f = f + opt.coefL1 * norm(parameters,1)
            f = f + opt.coefL2 * norm(parameters,2)^2/2

            -- Gradients:
            gradParameters:add( sign(parameters):mul(opt.coefL1) + parameters:clone():mul(opt.coefL2) )
         end

         -- update confusion
         for i = 1,opt.batchSize do
            confusion:add(outputs[i], targets[i])
         end

         -- return f and df/dX
         return f,gradParameters
      end

      -- optimize on current mini-batch
      if opt.optimization == 'LBFGS' then

         -- Perform LBFGS step:
         lbfgsState = lbfgsState or {
            maxIter = opt.maxIter,
            lineSearch = optim.lswolfe
         }
         optim.lbfgs(feval, parameters, lbfgsState)
       
         -- disp report:
         print('LBFGS step')
         print(' - progress in batch: ' .. t .. '/' .. dataset:size())
         print(' - nb of iterations: ' .. lbfgsState.nIter)
         print(' - nb of function evalutions: ' .. lbfgsState.funcEval)

      elseif opt.optimization == 'SGD' then

         -- Perform SGD step:
         sgdState = sgdState or {
            learningRate = opt.learningRate,
            momentum = opt.momentum,
            learningRateDecay = 5e-7
         }
         optim.sgd(feval, parameters, sgdState)
      
         -- disp progress
         xlua.progress(t, dataset:size())

      else
         error('unknown optimization method')
      end
   end
   

In [None]:
print(mlp1)

In [None]:
function gradientUpdate(perceptron, dataset, target, learningRate)
  temp_dataset = dataset
  temp_target = target
  temp_perceptron = perceptron

  print("### new gradientUpdate() ###");

  print("#dataset "..#dataset);
  print("(#dataset[1][1])[1] "..(#dataset[1][1])[1]);
  print("#target "..#target);
  predictionValue = (perceptron:forward(dataset)[1])[1]
  print('predictionValue '..predictionValue);

  --   if predictionValue*target < 1 then

  realTarget=changeSignToArray(target)
  gradientWrtOutput = torch.Tensor(realTarget)
  temp_gradient = gradientWrtOutput

  perceptron:zeroGradParameters() 
  perceptron:backward(dataset, gradientWrtOutput) 
  perceptron:updateParameters(learningRate)
  -- end

    return perceptron;
end


In [None]:
function table_of_tensors_to_batch(tbl)
    local batch = torch.Tensor(#tbl, unpack(tbl[1]:size():totable()))
    for i = 1, #tbl do
       batch[i] = tbl[i] 
    end
    return batch
end

In [None]:
feval = function(x_new)
   -- set x to x_new, if differnt
   -- (in this simple example, x_new will typically always point to x,
   -- so the copy is really useless)
   if x ~= x_new then
      x:copy(x_new)
   end
   -- select a new training sample
   _nidx_ = (_nidx_ or 0) + 1
   if _nidx_ > (#data)[1] then _nidx_ = 1 end

   local sample = data[_nidx_]
   local target = sample[{ {1} }]      -- this funny looking syntax allows
   local inputs = sample[{ {2,3} }]    -- slicing of arrays.

   -- reset gradients (gradients are always accumulated, to accommodate 
   -- batch methods)
   dl_dx:zero()

   -- evaluate the loss function and its derivative wrt x, for that sample
   local loss_x = criterion:forward(model:forward(inputs), target)
   model:backward(inputs, criterion:backward(model.output, target))

   -- return loss(x) and dloss/dx
   return loss_x, dl_dx
end

In [None]:
trainer = nn.StochasticGradient(mlp1, criterion)
trainer.learningRate = 0.01
trainer:train(dataset2)

In [None]:
-----WORKING SEQUENCE
layer1 = nn.ParallelTable()
layer1:add(nn.Linear(30, 4))
layer1:add(nn.Identity)

layer2 = nn.ParallelTable()
layer2:add(nn.SoftMax())
layer2:add(nn.Identity)

layer3 = nn.ParallelTable()
layer3:add(nn.Reshape(4,1))
layer3:add(nn.Reshape(4,1))

layer4 = nn.CMulTable()
layer5 = nn.Reshape(4)
layer6 = nn.SoftMax()

mlp = nn.Sequential()
mlp:add(layer1)
mlp:add(layer2)
mlp:add(layer3)
mlp:add(layer4)
mlp:add(layer5)
mlp:add(layer6)

In [None]:

mlp = nn.Sequential()
mlp:add(layer1)
mlp:add(layer2)
mlp:add(layer3)
mlp:add(layer4)
mlp:add(layer5)
mlp:add(layer6)

In [None]:
function gradientUpgrade(model, x, y, criterion, learningRate)
    local prediction = model:forward(x)
    local err = criterion:forward(prediction, y)
    print(err)
    local gradOutputs = criterion:backward(prediction, y)
    ---print(gradOutputs)
    model:zeroGradParameters()
    model:backward(x, gradOutputs)
    model:updateParameters(learningRate)
end

In [None]:
for i = 1,game.input_tensor:size(1) do
    -- random sample
    local input1 = game.input_tensor[i]:narrow(1,1,30)     -- normally distributed example in 2d
    local input2 = game.input_tensor[i]:narrow(1,30,4)     -- normally distributed example in 2d
    input2 = convert_to_valid_actions(input2)
    local output = game.output_tensor[i]
    input = {input1,input2}
    mlp:forward(input)

---    criterion:forward(mlp.output,output)
---    print(criterion:forward(mlp.output,output))
    gradientUpgrade(mlp, input, output, criterion, 0.0001)
    
end

In [None]:
x=dataset[20][1]
t1 = x:narrow(1,1,30)
t2 = x:narrow(1,30,4)
t2 = convert_to_valid_actions(t2)
input_test= {t1,t2}
output_test= dataset[20][2]

In [None]:
print(mlp:forward(input_test))

### test

In [None]:

function gradientUpgrade(model, x, y, criterion, learningRate)
    local prediction = model:forward(x)
    local err = criterion:forward(prediction, y)
    local gradOutputs = criterion:backward(prediction, y)
    model:zeroGradParameters()
    model:backward(x, gradOutputs)
    model:updateParameters(learningRate)
end

model = nn.Sequential()
model:add(nn.Linear(5,1))

x1 = torch.rand(5)
y1 = torch.Tensor({1})
x2 = torch.rand(5)
y2 = torch.Tensor({-1})

criterion = nn.MarginCriterion(1)

for i = 1, 1000 do
    gradientUpgrade(model, x1, y1, criterion, 0.01)
    gradientUpgrade(model, x2, y2, criterion, 0.01)
end

-- with y1[1] we extract the first value in the tensor
print('prediction for x1 = ' .. model:forward(x1)[1] .. ' expected value ' .. y1[1])
print('prediction for x2 = ' .. model:forward(x2)[1] .. ' expected value ' .. y2[1])

print('loss after training for x1 = ' .. criterion:forward(model:forward(x1), y1))
print('loss after training for x2 = ' .. criterion:forward(model:forward(x2), y2))

In [None]:
for i = 1,game.input_tensor:size(1) do
    -- random sample
    local input1 = game.input_tensor[i]:narrow(1,1,30)     -- normally distributed example in 2dç 
    local input2 = game.input_tensor[i]:narrow(1,30,4)     -- normally distributed example in 2d
    input2 = convert_to_valid_actions(input2)
    local output = game.output_tensor[i]
    input = {input1,input2}

    -- feed it to the neural network and the criterion
    -- print (mlp:forward({input1,input2}))

    criterion:forward(mlp:forward(input), output)
    print(criterion:forward(mlp:forward(input), output))
    ---break
    mlp:zeroGradParameters()

    mlp:backward(input, criterion:backward(mlp.output, output))

    mlp:updateParameters(0.00001)

end