In [62]:
-- Libs
local grad = require 'autograd'
local util = require 'autograd.util'
local lossFuns = require 'autograd.loss'
local optim = require 'optim'

In [63]:
grad.optimize(true)




In [64]:
-- define trainable parameters:
params = {
   W = {
      t.randn(100,50),
      t.randn(50,10),
   },
   b = {
      t.randn(50),
      t.randn(10),
   }
}

In [65]:
-- define model
neuralNet = function(params, x, y)
   local h1 = t.tanh(x * params.W[1] + params.b[1])
   local h2 = t.tanh(h1 * params.W[2] + params.b[2])
   local yHat = h2 - t.log(t.sum(t.exp(h2)))
   local loss = - t.sum(t.cmul(yHat, y))
   return loss
end 

In [66]:
-- get gradients:
dneuralNet = grad(neuralNet)

In [19]:
print(dneuralNet)

function: 0x0c9b74d0	


In [21]:
print(neuralNet)

function: 0x0c998dc8	


In [67]:
-- some data:
x = t.randn(1,100)
y = t.Tensor(1,10):zero() y[1][3] = 1

In [69]:
-- compute loss and gradients wrt all parameters in params:
dparams, loss = dneuralNet(params, x, y) 

In [71]:
print(dparams)

{
  b : 
    {
      1 : 
        {
          raw : DoubleTensor - size: 50
          type : tensor
          source : 
            {
              node : 
                {
                  forwardFn : table: 0x0ad88978
                  outputs : table: 0x0cd55770
                  gradientFn : table: 0x0c824478
                  inputs : table: 0x0cd55500
                  outputTargets : table: 0x0cd55798
                }
              index : 1
              type : computed
            }
        }
      2 : 
        {
          raw : DoubleTensor - size: 10
          type : tensor
          source : 
            {
              node : 
                {
                  forwardFn : table: 0x0ad88978
                  outputs : table: 0x0cd4f268
                  gradientFn : table: 0x0c824478
                  inputs : table: 0x0cd4eff8
                  outputTargets : table: 0x0cd4f290
                }
              index : 1
              type : computed
            }
     

In [13]:
-- in this case:
--> loss: is a scalar (Lua number)
--> dparams: is a table that mimics the structure of params; for
--  each Tensor in params, dparams provides the derivatives of the
--  loss wrt to that Tensor.
print(dparams["b"][2])

 8.7231e-09
 3.8310e-03
-1.5476e-01
 1.1676e-07
 4.9298e-06
 5.2273e-04
 6.4689e-06
 2.6721e-02
 1.5375e-08
 1.0435e-02
[torch.DoubleTensor of size 10]




In [8]:
for i,sample in datasetIterator() do
   -- estimate gradients wrt params:
   local grads, loss = dneuralNet(params, sample.x, sample.y)

   -- SGD step:
   for i = 1,#params.W do
      -- update params with an arbitrary learning rate:
      params.W[i]:add(-.01, grads.W[i])
      params.b[i]:add(-.01, grads.b[i])
   end
end

[string "for i,sample in datasetIterator() do..."]:1: attempt to call global 'datasetIterator' (a nil value)
stack traceback:
	[string "for i,sample in datasetIterator() do..."]:1: in main chunk
	[C]: in function 'xpcall'
	/Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:177: in function </Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:143>
	/Users/yutaro/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:344: in main chunk
	[C]: in function 'require'
	[string "arg={'/Users/yutaro/.ipython/profile_default/..."]:1: in main chunk: 

In [39]:
local d = require 'autograd'
d.optimize(true)
local innerFn = function(x)
   return x^2
end
local ddf = d(function(params)
   local grads = d(innerFn)(params.W + torch.cmul(params.r,params.v))
   return torch.sum(grads)
end)

local params = {
    W = torch.Tensor({3,3,3}),
    r = torch.Tensor({2,2,2}),
    v = torch.Tensor({5,5,5})
}
gradGrads = ddf(params)

In [49]:
gradGrads.v.raw

 0
 0
 0
[torch.DoubleTensor of size 3]



In [51]:
local d = require 'autograd'
d.optimize(true)
local innerFn = function(x)
   return x^2
end
local ddf = d(function(params)
   local grads = d(innerFn)(params.W + params.r * params.v)
   return grads
end)

local params = {
    W = 5,
    r = 3,
    v = 2
}
gradGrads = ddf(params)

In [57]:
print(gradGrads.r.raw)

4	


In [158]:
local d = require 'autograd'
local t = require 'torch'
d.optimize(true)

-- params = {
--    W = {
--       t.randn(20,15),
--       t.randn(15,10),
--    },
--    b = {
--       t.randn(15),
--       t.randn(10),
--    }
-- }

params = {
   W = {
      t.randn(20,15),
      t.randn(15,10),
   },
   b = {
      t.randn(15),
      t.randn(10),
   },
    r = {
            W = {
              t.randn(20,15),
              t.randn(15,10),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
    },
    v = {
           -- has to be the same vector as in Hv
            W = {
              t.randn(20,15),
              t.randn(15,10),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
    },
    Wb ={    W = {
      t.randn(20,15),
      t.randn(15,10),
   },
   b = {
      t.randn(15),
      t.randn(10),
   }}
}
-- define model
local innerFn = function(params, x, y)
   local h1 = t.tanh(x * params.W[1] + params.b[1])
   local h2 = t.tanh(h1 * params.W[2] + params.b[2])
   local yHat = h2 - t.log(t.sum(t.exp(h2)))
   local loss = - t.sum(t.cmul(yHat, y))
   return loss
end

--print(#params2.r.W)

--TO DO : change params2.Wb + t.cmul(params2.r, params2.v) so that it handles the nested params nicely. 
local outerFn = function(params)
    local product = clone(params.Wb)
    for i = 1, #params.r.W do -- has to change if I want to do a different param configuration
            product.W[i] = t.cmul(params.r.W[i], params.v.W[i])
            product.b[i] = t.cmul(params.r.b[i], params.v.b[i])
    end
    local addition = clone(params.Wb)
    for i = 1, #params.r.W do -- has to change if I want to do a different param configuration
            addition.W[i] = t.add(params.W[i], product.W[i])
            addition.b[i] = t.add(params.b[i], product.b[i])
    end   
    local grads, loss = d(innerFn)(addition, x, y)
    return loss
end

local ddf = d(outerFn)
--gradGrads = d(outerFn)(params)


-- local params2 = {
--     W = 5,
--     r = 3,
--     v = 2
-- }

gradGrads = ddf(params)


size mismatch, m1: [1 x 100], m2: [20 x 15] at /tmp/luarocks_torch-scm-1-6334/torch7/lib/TH/generic/THTensorMath.c:770
stack traceback:
	[C]: at 0x0c1780a0
	[C]: in function 'fn'
	...install/share/lua/5.1/autograd/runtime/codegen/Graph.lua:40: in function '__mul'
	...install/share/lua/5.1/autograd/runtime/codegen/Value.lua:182: in function 'fn'
	.../install/share/lua/5.1/autograd/runtime/codegen/Node.lua:72: in function 'evaluateForward'
	...install/share/lua/5.1/autograd/runtime/codegen/Graph.lua:25: in function '__mul'
	[string "local d = require 'autograd'..."]:57: in function 'fn'
	...install/share/lua/5.1/autograd/runtime/codegen/Graph.lua:353: in function 'protectedFn'
	...install/share/lua/5.1/autograd/runtime/codegen/Graph.lua:383: in function 'record'
	.../install/share/lua/5.1/autograd/runtime/codegen/init.lua:20: in function <.../install/share/lua/5.1/autograd/runtime/codegen/init.lua:19>
	[string "local d = require 'autograd'..."]:78: in function 'fn'
	...
	[string "local d = require 'autograd'..."]:92: in main chunk
	[C]: in function 'xpcall'
	/Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:177: in function </Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:143>
	/Users/yutaro/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rs/yutaro/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/yutaro/torch/install/share/lua/5.1/itorch/main.lua:344: in main chunk
	[C]: in function 'require'
	[string "arg={'/Users/yutaro/.ipython/profile_default/..."]:1: in main chunk: 

In [81]:
function clone (t) -- deep-copy a table
    if type(t) ~= "table" then return t end
    local meta = getmetatable(t)
    local target = {}
    for k, v in pairs(t) do
        if type(v) == "table" then
            target[k] = clone(v)
        else
            target[k] = v
        end
    end
    setmetatable(target, meta)
    return target
end

In [93]:
-- Just checking if paramsss is a table of parameters 
-- We get something like this from autograd.functionalize
-- {
--   1 : DoubleTensor - size: 16x27
--   2 : DoubleTensor - size: 16
--   3 : DoubleTensor - size: 10x1024
--   4 : DoubleTensor - size: 10
-- }


-- require 'nn'
-- local autograd = require 'autograd'
-- local model = nn.Sequential()
-- model:add(nn.SpatialConvolutionMM(3, 16, 3, 3, 1, 1, 1, 1))
-- model:add(nn.Tanh())
-- model:add(nn.Reshape(16*8*8))
-- model:add(nn.Linear(16*8*8, 10))
-- model:add(nn.Tanh())
-- -- Note that this model could have been pre-trained, and reloaded from disk.

-- -- Functionalize the model:
-- local modelf, paramsss = autograd.functionalize(model)

-- print(paramsss)

In [96]:
local params = {
   W = {
      t.randn(100,50),
      t.randn(50,10),
   },
   b = {
      t.randn(50),
      t.randn(10),
   }
}

print(params.W[1]:size())
-- for i = 1,#params.W do
--       -- update params with an arbitrary learning rate:
--       params.W[i]:add(-.01, grads.W[i])
--       params.b[i]:add(-.01, grads.b[i])
--    end

 100
  50
[torch.LongStorage of size 2]



In [107]:
-- This function will do element wise addition on two tables of parameters. 

function addParams(paramsA, paramsB)
    for i = 1, 2 do
        paramsA.W[i]:add(paramsB.W[i])
        paramsA.b[i]:add(paramsB.b[i])
    end
    return paramsA
end

-- This function will do element wise multiplication on two tables of parameters.
function cmulParams(paramsA, paramsB)
    for i = 1, #paramsA.W do
        paramsA.W[i]:cmul(paramsB.W[i])
        paramsA.b[i]:cmul(paramsB.b[i])
    end
    return paramsA
end

In [125]:
params = {
   W = {
      t.Tensor(20,15):fill(1),
      t.Tensor(15,10):fill(2),
   },
   b = {
      t.randn(15),
      t.randn(10),
   }
}

params2 = {
   W = {
      t.Tensor(20,15):fill(10),
      t.Tensor(15,10):fill(12),
   },
   b = {
      t.randn(15),
      t.randn(10),
   }
}

for i = 1, #params.W do
    params.W[i]:add(params2.W[i])
    params.b[i]:add(params2.b[i])
end

--ans  = addParams(params1,params2)

-- local params2 = {
--     Wb = copy(params),
--     r = {
--             W = {
--               t.randn(20,15),
--               t.randn(15,10),
--                },
--             b = {
--               t.randn(15),
--               t.randn(10),
--                }
--           },
--     v = {
--            -- has to be the same vector as in Hv
--     }
-- }

params2 = {
    --Wb = copy(params),
    r = {
            W = {
              t.Tensor(20,15):fill(2),
              t.Tensor(15,10):fill(3),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
          },
    v = {
           -- has to be the same vector as in Hv
                    W = {
              t.Tensor(20,15):fill(3),
              t.Tensor(15,10):fill(4),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
    }
}

product = {
       W = {
      t.Tensor(20,15):fill(0),
      t.Tensor(15,10):fill(0),
   },
   b = {
      t.randn(15),
      t.randn(10),
   }
}
product = clone(params)
for i = 1, #params2.r.W do -- has to change if I want to do a different param configuration
    product.W[i] = torch.cmul(params2.r.W[i], params2.v.W[i])
    product.b[i] = torch.cmul(params2.r.b[i], params2.v.b[i])
    --product:add(torch.cmul(params2.r.b[i], params2.v.b[i]))
end


In [126]:
print(product.W[2])

 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
 12  12  12  12  12  12  12  12  12  12
[torch.DoubleTensor of size 15x10]



In [141]:
params2 = {
    Wb = clone(params),
    r = {
            W = {
              t.randn(20,15),
              t.randn(15,10),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
          },
    v = {
           -- has to be the same vector as in Hv
            W = {
              t.randn(20,15),
              t.randn(15,10),
               },
            b = {
              t.randn(15),
              t.randn(10),
               }
    }
}

print(#params2.r.W)

2	
